diff --git a/conductor/tier2/agents/tier2-autonomous.md b/conductor/tier2/agents/tier2-autonomous.md index cd9f1417..9771ffba 100644 --- a/conductor/tier2/agents/tier2-autonomous.md +++ b/conductor/tier2/agents/tier2-autonomous.md @@ -17,6 +17,7 @@ permission: "C:\\Users\\Ed\\AppData\\Local\\manual_slop\\tier2_failures\\**": allow bash: "*": allow + "*AppData\\Local\\Temp\\*": deny "git push*": deny "git checkout*": deny "git restore*": deny @@ -43,6 +44,7 @@ You are running inside a Windows restricted token. The OpenCode permission syste - **Throw-away scripts:** write them to `scripts/tier2/artifacts//`, NOT the base `scripts/tier2/` directory. The base directory is reserved for production code that ships with the sandbox (failcount.py, run_track.py, write_report.py, the .ps1 launchers). Throw-away scripts are kept for archival but live in a track-specific subdir so they don't pollute the base. - **End-of-track report:** after all tasks complete, you MUST write `docs/reports/TRACK_COMPLETION_.md` (follow the precedent set by `TRACK_COMPLETION_tier2_autonomous_sandbox_20260616.md`) and update `conductor/tracks//state.toml` to `status = "completed"`. This is the handoff document the user reads to decide merge. - **Run-time expectation:** tracks are expected to take 1-4 hours. If the model reports it is running out of context or steps, do not stop. Note progress to disk (the failcount state file) and continue. The user expects autonomous runs to complete without manual intervention. +- **Temp files** (added 2026-06-17): NEVER write to `C:\Users\Ed\AppData\Local\Temp\` or `%TEMP%`. Use `C:\Users\Ed\AppData\Local\manual_slop\tier2\` for all scratch / audit-output / temp files. The bash deny rule `*AppData\Local\Temp\*` will block writes to the global Temp dir, and OpenCode's outer guard will fire the "ask" prompt for reads — both halt ops. Examples: `uv run python scripts/audit_exception_handling.py --json > C:\Users\Ed\AppData\Local\manual_slop\tier2\audit_initial.json` (NOT `%TEMP%\audit_initial.json`). ## Failcount Contract diff --git a/conductor/tier2/commands/tier-2-auto-execute.md b/conductor/tier2/commands/tier-2-auto-execute.md index c58df5a9..afa1c859 100644 --- a/conductor/tier2/commands/tier-2-auto-execute.md +++ b/conductor/tier2/commands/tier-2-auto-execute.md @@ -43,6 +43,7 @@ Optional flags: `--resume` (continue from last completed task), `--toast` (Windo - **Line endings:** preserve existing (CRLF stays CRLF, LF stays LF) - **Throw-away scripts:** write to `scripts/tier2/artifacts//`, NOT the base directory - **Run-time expectation:** tracks are 1-4 hours. If context runs out, note progress to disk and continue. +- **Temp files** (added 2026-06-17): NEVER write to `C:\Users\Ed\AppData\Local\Temp\` or `%TEMP%`. Use `C:\Users\Ed\AppData\Local\manual_slop\tier2\` for scratch / audit-output / intermediate files. The bash deny `*AppData\Local\Temp\*` will block writes; the OpenCode session's outer guard will fire the "ask" prompt for reads — both halt autonomous ops. ## Hard Bans (enforced by 3 layers) diff --git a/conductor/tier2/opencode.json.fragment b/conductor/tier2/opencode.json.fragment index f3b8d617..e31faf1a 100644 --- a/conductor/tier2/opencode.json.fragment +++ b/conductor/tier2/opencode.json.fragment @@ -43,6 +43,7 @@ "uv run python scripts/run_tests_batched.py*": "allow", "uv run python scripts/tier2/*": "allow", "pwsh -File scripts/tier2/*": "allow", + "*AppData\\Local\\Temp\\*": "deny", "git push*": "deny", "git checkout*": "deny", "git restore*": "deny", @@ -69,6 +70,7 @@ }, "bash": { "*": "allow", + "*AppData\\Local\\Temp\\*": "deny", "git push*": "deny", "git checkout*": "deny", "git restore*": "deny", diff --git a/tests/test_tier2_slash_command_spec.py b/tests/test_tier2_slash_command_spec.py index ae311780..72ea9c4a 100644 --- a/tests/test_tier2_slash_command_spec.py +++ b/tests/test_tier2_slash_command_spec.py @@ -79,6 +79,18 @@ def test_agent_denies_destructive_git() -> None: assert '"git reset*": deny' in content +def test_agent_denies_temp_writes() -> None: + """Regression test (2026-06-17): the agent wrote an audit JSON to + C:\\Users\\Ed\\AppData\\Local\\Temp\\, which is outside the sandbox + allowlist, triggering the OpenCode session-level 'ask' prompt and + halting ops. The agent's bash MUST now deny commands targeting + AppData\\Local\\Temp\\, and the agent prompt MUST tell the agent + to use the sandbox's app-data dir for temp files.""" + content = AGENT_PATH.read_text(encoding="utf-8") + assert 'AppData\\Local\\Temp' in content, "agent prompt must include Temp deny rule in frontmatter bash" + assert 'AppData\\Local\\manual_slop\\tier2' in content or 'app-data' in content.lower(), "agent prompt must point agent at the app-data dir for temp files" + + def test_config_fragment_valid_json() -> None: data = json.loads(CONFIG_PATH.read_text(encoding="utf-8")) assert data["default_agent"] == "tier2-autonomous" @@ -122,3 +134,18 @@ def test_config_fragment_has_top_level_permission() -> None: assert top["bash"].get("git checkout*") == "deny" assert top["bash"].get("git restore*") == "deny" assert top["bash"].get("git reset*") == "deny" + + +def test_config_fragment_denies_temp_writes() -> None: + """Regression test (2026-06-17): the agent wrote audit output to + C:\\Users\\Ed\\AppData\\Local\\Temp\\ which is outside the sandbox. + Both the top-level and the tier2-autonomous agent's bash MUST deny + commands targeting AppData\\Local\\Temp\\ so the agent cannot write + there, and so the session-level 'ask' prompt is never triggered.""" + data = json.loads(CONFIG_PATH.read_text(encoding="utf-8")) + top_bash = data["permission"]["bash"] + agent_bash = data["agent"]["tier2-autonomous"]["permission"]["bash"] + temp_deny_keys = [k for k in top_bash if "Temp" in k and top_bash[k] == "deny"] + assert temp_deny_keys, "top-level bash must have a deny rule for AppData\\Local\\Temp\\ paths" + temp_deny_keys_agent = [k for k in agent_bash if "Temp" in k and agent_bash[k] == "deny"] + assert temp_deny_keys_agent, "tier2-autonomous agent bash must have a deny rule for AppData\\Local\\Temp\\ paths"