From c725270b990b52667e791ae0598be831d3e3c58f Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 9 Jun 2026 20:27:20 -0400 Subject: [PATCH] conductor(track): workspace_path_finalize_20260609 - per-run workspace under tests/artifacts/ --- .../metadata.json | 37 +++ .../workspace_path_finalize_20260609/spec.md | 234 ++++++++++++++++++ .../state.toml | 38 +++ 3 files changed, 309 insertions(+) create mode 100644 conductor/tracks/workspace_path_finalize_20260609/metadata.json create mode 100644 conductor/tracks/workspace_path_finalize_20260609/spec.md create mode 100644 conductor/tracks/workspace_path_finalize_20260609/state.toml diff --git a/conductor/tracks/workspace_path_finalize_20260609/metadata.json b/conductor/tracks/workspace_path_finalize_20260609/metadata.json new file mode 100644 index 00000000..da45a703 --- /dev/null +++ b/conductor/tracks/workspace_path_finalize_20260609/metadata.json @@ -0,0 +1,37 @@ +{ + "track_id": "workspace_path_finalize_20260609", + "name": "Workspace Path Finalize (2026-06-09) - the LAST track on this issue", + "created_at": "2026-06-09", + "status": "spec", + "priority": "A", + "blocked_by": [], + "blocks": [], + "inherits_from": [ + "conductor/tracks/test_infrastructure_hardening_20260609/" + ], + "supersedes": [], + "domain": "Meta-Tooling (test infrastructure)", + "scope_summary": "One-line fixture change to move live_gui workspace from %TEMP%/pytest-of-... back to tests/artifacts/live_gui_workspace/ (gitignored, in project tree, where the sims expect it). The Phase 3 tmp_path_factory refactor was a regression. The user explicitly called this out.", + "estimated_effort": "30 minutes", + "phases": 1, + "verification_criteria": [ + "tests/conftest.py:465 reads Path('tests/artifacts/live_gui_workspace')", + "tests/test_workspace_path_finalize.py has 2 tests, both pass", + "Full batch: tier-1 5/5, tier-2 5/5, tier-3 0 new failures", + "The 4 sim tests in tests/test_extended_sims.py pass in batch" + ], + "out_of_scope": [ + "Refactoring simulation/sim_base.py", + "Adding new audit scripts", + "Updating docs", + "Filing follow-up tracks", + "Any 'while we're at it' refactors" + ], + "risks": [ + { + "risk": "1-line edit corrupts conftest (as happened in the previous attempt)", + "mitigation": "Use manual-slop_set_file_slice; verify syntax with ast.parse after" + } + ], + "tier_2_supervision_required_for": [] +} diff --git a/conductor/tracks/workspace_path_finalize_20260609/spec.md b/conductor/tracks/workspace_path_finalize_20260609/spec.md new file mode 100644 index 00000000..1c1b8b88 --- /dev/null +++ b/conductor/tracks/workspace_path_finalize_20260609/spec.md @@ -0,0 +1,234 @@ +# Track Specification: Workspace Path Per-Run (2026-06-09) + +## Overview + +Conftest creates `tests/artifacts/live_gui_workspace_/` once per pytest invocation. No env vars, no CLI args, no runner changes. The conftest is the source of truth for the workspace path. + +**Per-test pollution is intentional** — it exposes fragility, which is the whole point of the test infrastructure hardening track. + +**Per-run isolation** — each `uv run pytest` invocation gets a new timestamped folder, so state doesn't leak across runs. + +**Why this design:** +- No env vars (anti-pattern, hidden global state) +- No CLI args (conftest is the right place for test infrastructure) +- No runner changes (`run_tests_batched.py` already works) +- Path is in the project tree under `tests/artifacts/` (gitignored, inspectable, where the sims expect it) +- `tests/artifacts/` is already gitignored — no repo pollution + +## Current State Audit (as of fe240db4) + +### Bug +`tests/conftest.py:453-465`: +```python +@pytest.fixture(scope="session") +def live_gui(request, tmp_path_factory) -> Generator["_LiveGuiHandle", None, None]: + ... + temp_workspace = tmp_path_factory.mktemp("live_gui_workspace") +``` + +This puts the workspace at `C:\Users\\AppData\Local\Temp\pytest-of-\pytest-N\live_gui_workspace0`. That's: +1. Not in the project tree (user can't find it) +2. Per-pytest-invocation (re-rolled each run, which is fine), but with an opaque name +3. Different location from what the sims in `simulation/sim_base.py` expect (`tests/artifacts/...`) + +### The fix +Replace `tmp_path_factory.mktemp("live_gui_workspace")` with a deterministic per-run folder under `tests/artifacts/`: +```python +from datetime import datetime +_run_id = datetime.now().strftime("%Y%m%d_%H%M%S") +temp_workspace = Path(f"tests/artifacts/live_gui_workspace_{_run_id}") +``` + +This: +- Creates `tests/artifacts/live_gui_workspace_20260609_201530/` on the user's CWD (project root) +- Each `uv run pytest` invocation gets a new folder (timestamp is per-second granularity) +- All 49 live_gui tests in that invocation share the workspace +- The folder is in `tests/artifacts/` (already gitignored, see `git check-ignore tests/artifacts`) +- The sims' `os.path.abspath("tests/artifacts/temp_*.toml")` resolves to the project tree, which matches + +### What to KEEP from Phase 3 +- `tests/test_live_gui_workspace_fixture.py` — the test file that verifies the `live_gui_workspace` fixture +- The 5 test files updated in `006bb114` to use the fixture instead of hardcoded paths +- The `_LiveGuiHandle` class with `__iter__`/`__getitem__` backward compat +- The `_check_live_gui_health` autouse fixture +- The `clean_baseline` marker +- The 3-task fix at `fe240db4` (MMA + RAG state reset) + +### What to REVERT +- `tests/conftest.py:465`: change `tmp_path_factory.mktemp("live_gui_workspace")` back to a stable path under `tests/artifacts/` + +### What to ADD +- A `_run_id` module-level constant in conftest.py (computed once at import time) +- The `live_gui_workspace` fixture already exists; just verify it returns the new path + +## Goals + +1. **Goal A: Workspace at `tests/artifacts/live_gui_workspace_/`.** Conftest creates the folder, all live_gui tests share it for the duration of the run. +2. **Goal B: Sim tests pass in full batch.** `tests/test_extended_sims.py` 4 sims pass in tier-3. +3. **Goal C: Per-run isolation.** Each `uv run pytest` invocation gets a new folder. State from a prior run doesn't pollute. +4. **Goal D: Inspectable from project tree.** The user can `ls tests/artifacts/live_gui_workspace_*/` to see what the GUI subprocess is working with. + +### Non-Goals + +- ❌ Per-test isolation. The whole point is per-test pollution = exposed fragility. +- ❌ Env vars. The user explicitly rejected them. +- ❌ CLI args. Conftest is the right place. +- ❌ Runner changes. `run_tests_batched.py` is fine as-is. +- ❌ Refactoring `simulation/sim_base.py`. It already uses `tests/artifacts/` paths. +- ❌ New audit scripts. +- ❌ New tests beyond the 2 verification tests. +- ❌ Doc updates. +- ❌ Follow-up tracks. + +## Functional Requirements + +### FR1. Conftest creates per-run workspace + +**Where:** `tests/conftest.py:453-465` + +**What:** Change ONE line: +```python +# BEFORE (line 453) +def live_gui(request, tmp_path_factory) -> Generator["_LiveGuiHandle", None, None]: + ... + temp_workspace = tmp_path_factory.mktemp("live_gui_workspace") + +# AFTER +_RUN_ID = datetime.now().strftime("%Y%m%d_%H%M%S") +_RUN_WORKSPACE = Path(f"tests/artifacts/live_gui_workspace_{_RUN_ID}") + +def live_gui(request) -> Generator["_LiveGuiHandle", None, None]: + ... + temp_workspace = _RUN_WORKSPACE +``` + +Add `from datetime import datetime` to the imports at the top of conftest.py. + +### FR2. `live_gui_workspace` fixture returns the new path + +**Where:** `tests/conftest.py:673-677` (the existing `live_gui_workspace` fixture) + +**What:** The fixture already exists and returns `handle.workspace`. The `handle.workspace` is set in `_LiveGuiHandle.__init__` from `temp_workspace`. So once FR1 is applied, the fixture returns the new path automatically. + +Verify with a new test: +```python +def test_live_gui_workspace_is_under_tests_artifacts(live_gui_workspace): + assert str(live_gui_workspace).replace("\\", "/").startswith("tests/artifacts/live_gui_workspace_") +``` + +### FR3. Workspace is gitignored + +**Where:** `.gitignore` (already has `tests/artifacts/`) + +Verify with a new test: +```python +def test_live_gui_workspace_is_gitignored(live_gui_workspace): + import subprocess + result = subprocess.run( + ["git", "check-ignore", str(live_gui_workspace)], + capture_output=True, text=True, cwd="." + ) + assert result.returncode == 0, f"Workspace {live_gui_workspace} is not gitignored" +``` + +## Non-Functional Requirements + +- **NFR1: 1 import + 1 line change.** Add `from datetime import datetime`. Change line 465. +- **NFR2: No regressions.** Tier-1 and tier-2 batch results must match the `fe240db4` baseline. +- **NFR3: 1 commit.** Atomic. Not batched. +- **NFR4: 1-space indent, CRLF, type hints.** Per project conventions. + +## Architecture Reference + +- **`tests/conftest.py:453-540`** — the `live_gui` session-scoped fixture. Only lines 465 + 453 + the import change. +- **`tests/conftest.py:673-677`** — the `live_gui_workspace` fixture. No change needed; it returns `handle.workspace` which is the new path. +- **`scripts/run_tests_batched.py`** — no change. +- **`simulation/sim_base.py:80-91`** — no change. `os.path.abspath("tests/artifacts/temp_*.toml")` resolves to the project tree, which works. +- **`.gitignore`** — already has `tests/artifacts/`. No change. + +## Out of Scope + +- Per-test isolation +- Env vars +- CLI args +- Runner changes +- Sim refactoring +- New audit scripts +- Doc updates +- Follow-up tracks +- Any "while we're at it" refactors + +## Verification Criteria + +1. ✅ `tests/conftest.py:453` no longer takes `tmp_path_factory` parameter +2. ✅ `tests/conftest.py:465` (or equivalent) reads `_RUN_WORKSPACE` (the timestamped path) +3. ✅ `tests/artifacts/live_gui_workspace_/` exists after a pytest run +4. ✅ 2 new verification tests pass +5. ✅ Full batch: tier-1 5/5, tier-2 5/5, tier-3 0 new failures (or matches `fe240db4` baseline + the 4 sim tests now pass) +6. ✅ The 4 sim tests in `tests/test_extended_sims.py` pass in batch +7. ✅ 1 atomic commit + +## Execution Plan + +This is a 1-commit, 4-step change. No phases. No agent handoffs. + +### Step 1: Pre-edit checkpoint +```powershell +cd C:\projects\manual_slop; git add . && git commit -m "wip: pre-workspace-path-finalize" --allow-empty +``` + +### Step 2: Apply the changes +Use `manual-slop_set_file_slice` (the recommended surgical tool per `conductor/edit_workflow.md`): + +1. Add `from datetime import datetime` to the imports section of `tests/conftest.py` +2. Add the module-level constants near the top of conftest.py (after imports): + ```python + _RUN_ID = datetime.now().strftime("%Y%m%d_%H%M%S") + _RUN_WORKSPACE = Path(f"tests/artifacts/live_gui_workspace_{_RUN_ID}") + ``` +3. Change `tests/conftest.py:453` from `def live_gui(request, tmp_path_factory)` to `def live_gui(request)` +4. Change `tests/conftest.py:465` from `temp_workspace = tmp_path_factory.mktemp("live_gui_workspace")` to `temp_workspace = _RUN_WORKSPACE` + +Verify syntax after each edit: +```powershell +cd C:\projects\manual_slop; uv run python -c "import ast; ast.parse(open('tests/conftest.py').read()); print('OK')" +``` + +### Step 3: Add 2 verification tests +Create `tests/test_workspace_path_finalize.py` with the 2 tests in FR2 and FR3. + +### Step 4: Run the 2 new tests +```powershell +cd C:\projects\manual_slop; uv run pytest tests/test_workspace_path_finalize.py -v --timeout=30 +``` +Expect: 2/2 pass. + +### Step 5: Run the full batch +```powershell +cd C:\projects\manual_slop; uv run .\scripts\run_tests_batched.py 2>&1 | Tee-Object -FilePath "tests/artifacts/post_finalize_batch_20260609.log" | Select-Object -Last 30 +``` +Expect: tier-1 5/5, tier-2 5/5, tier-3 0 new failures (or 4 sim tests now pass + 1 RAG test now passes). + +### Step 6: Commit +```powershell +cd C:\projects\manual_slop; git add tests/conftest.py tests/test_workspace_path_finalize.py tests/artifacts/post_finalize_batch_20260609.log +git commit -m "fix(test): per-run workspace under tests/artifacts/ (no env vars, no tmp_path)" +$h = git log -1 --format='%H' +git notes add -m "Replaces tmp_path_factory.mktemp with a per-run timestamped folder under tests/artifacts/. Each pytest invocation gets a new folder; all live_gui tests in that invocation share it (per-test pollution is intentional and exposes fragility, per the test_infrastructure_hardening_20260609 spec). Workspace is gitignored via tests/artifacts/. Sims in simulation/sim_base.py use os.path.abspath('tests/artifacts/...') which resolves correctly from the project root." $h +``` + +## Risk Assessment + +| Risk | Likelihood | Impact | Mitigation | +|---|---|---|---| +| 4-line edit corrupts conftest | Low | High | Use `manual-slop_set_file_slice`; verify syntax with `ast.parse` after each edit; pre-edit checkpoint | +| `_RUN_ID` collides if two pytest invocations start in the same second | Very low | Low | Acceptable — second-precision is enough for human-driven runs; for CI, add a uuid suffix if needed (out of scope) | +| Stale workspaces accumulate in `tests/artifacts/` | Medium | Low | They're gitignored; the user can `rm -rf tests/artifacts/live_gui_workspace_*` when needed; out of scope for this track | + +## See Also + +- **User feedback:** Per-test pollution is intentional. Per-run isolation is the goal. No env vars. No CLI args. Conftest is the source of truth. +- **Pre-Phase 3 baseline:** `tests/conftest.py` had the workspace at `Path("tests/artifacts/live_gui_workspace")` (no timestamp). Sims worked. +- **The phantom bug:** CWD drift was already fixed by `os.path.abspath` in `RAGEngine.index_file` (commit `eb8357ec`). +- **The 3-task fix that mattered:** `fe240db4` (MMA + RAG state reset). +- **What NOT to do:** `tmp_path_factory` (per-pytest-invocation, opaque, in %TEMP%). Env vars (hidden global state). CLI args (wrong abstraction layer). diff --git a/conductor/tracks/workspace_path_finalize_20260609/state.toml b/conductor/tracks/workspace_path_finalize_20260609/state.toml new file mode 100644 index 00000000..57c30c67 --- /dev/null +++ b/conductor/tracks/workspace_path_finalize_20260609/state.toml @@ -0,0 +1,38 @@ +# Track state for workspace_path_finalize_20260609 +# Updated by executing agent as tasks complete + +[meta] +track_id = "workspace_path_finalize_20260609" +name = "Workspace Path Finalize (2026-06-09) - the LAST track on this issue" +status = "active" +current_phase = 1 +last_updated = "2026-06-09" + +[blocked_by] +# No blockers; this is the final cleanup of the test_infrastructure_hardening track + +[blocks] +# This track blocks nothing. It is the last track on this issue. + +[phases] +phase_1 = { status = "in_progress", checkpointsha = "", name = "Apply 1-line fix and verify" } + +[tasks] +t1_1 = { status = "pending", commit_sha = "", description = "Pre-edit checkpoint" } +t1_2 = { status = "pending", commit_sha = "", description = "Apply 1-line conftest.py change" } +t1_3 = { status = "pending", commit_sha = "", description = "Add 2 verification tests" } +t1_4 = { status = "pending", commit_sha = "", description = "Run the 2 new tests" } +t1_5 = { status = "pending", commit_sha = "", description = "Run the full batch" } +t1_6 = { status = "pending", commit_sha = "", description = "Commit" } + +[verification] +workspace_at_tests_artifacts = false +new_tests_pass = false +full_batch_passes = false +sim_tests_pass_in_batch = false + +[baseline_capture] +# Captured from the fe240db4 commit +tier_1_status = "PASS (5/5 batches)" +tier_2_status = "PASS (5/5 batches)" +tier_3_status = "FAIL on test_extended_sims.py::test_context_sim_live (1 known flake from Phase 3 tmp_path_factory refactor)"