Private
Public Access
0
0
Files
manual_slop/tests/test_tier2_slash_command_spec.py
T
ed 387adff579 fix(tier2): expand %TEMP% deny patterns to catch env-var forms
Follow-up to the 'NEVER USE APPDATA' directive. The agent kept
trying to use \C:\Users\Ed\AppData\Local\Temp / \C:\Users\Ed\AppData\Local\Temp / %TEMP% / %TMP% — the previous
deny rule (*AppData\\\\* and *AppData\\Local\\Temp\\*) only matched
the literal expanded path, not the env-var form. The agent would
self-block based on its own interpretation of the rule, but it still
TRIED before self-blocking (the 'fucking tired of it fucking with
AppData' complaint).

Fix:
1. opencode.json.fragment: add bash deny patterns matched against
   the LITERAL command string (before shell expansion):
     *\C:\Users\Ed\AppData\Local\Temp*    - PowerShell env var (the form the agent tried)
     *\C:\Users\Ed\AppData\Local\Temp*     - PowerShell env var
     *%TEMP%*        - cmd env var
     *%TMP%*         - cmd env var
     *GetTempPath*   - .NET API
     *gettempdir*    - Python tempfile module
     *mkstemp*       - Python tempfile.mkstemp
   Applied to BOTH the top-level permission.bash (for default agents)
   and the tier2-autonomous agent's permission.bash.

2. conductor/tier2/agents/tier2-autonomous.md: rewrite the Temp
   files section to explicitly list ALL forbidden literals and
   reiterate 'every one of those literal command strings is denied
   at the bash level'. Updated changelog note.

3. conductor/tier2/commands/tier-2-auto-execute.md: same.

4. tests/test_tier2_slash_command_spec.py: extend
   test_config_fragment_denies_temp_writes to assert each of the 9
   patterns in both the top-level and the agent's bash.

Verified: re-ran setup against the live clone. tier2 agent's bash
has 13 deny patterns (9 AppData/temp + 4 git). 37/37 default-on
tests pass.

Note: the user's prior commit (fix(tier2): remove AppData allow
rules from OpenCode permission JSON) already removed the AppData
allow rules from read/write and added the broader *AppData\\\\*
deny rule. This commit layers on top of that with the env-var-form
deny patterns.
2026-06-19 07:41:15 -04:00

195 lines
8.4 KiB
Python

"""Contract tests for the Tier 2 slash command, agent profile, and config fragment.
These tests verify that the templates the bootstrap copies to the Tier 2
clone contain the protocol contract that Tier 2 autonomous relies on.
"""
import json
import re
from pathlib import Path
COMMAND_PATH = Path("conductor/tier2/commands/tier-2-auto-execute.md")
AGENT_PATH = Path("conductor/tier2/agents/tier2-autonomous.md")
CONFIG_PATH = Path("conductor/tier2/opencode.json.fragment")
def test_command_file_exists() -> None:
assert COMMAND_PATH.exists()
def test_command_prompt_no_appdata() -> None:
"""Regression test (2026-06-18, updated 2026-06-18 after Tier 2's
project-relative relocation): the slash command prompt must NOT
reference AppData paths and must point at the actual code defaults.
The user directed 'NEVER USE APPDATA'. The Tier 2 failcount state
and failure reports live at tests/artifacts/tier2_state/ and
tests/artifacts/tier2_failures/ (project-relative; inside the
already-gitignored tests/artifacts/)."""
content = COMMAND_PATH.read_text(encoding="utf-8")
assert "tests/artifacts/tier2_state" in content, "command prompt must point at tests/artifacts/tier2_state/<track>/ for failcount state (Tier 2's project-relative default)"
assert "<app-data>" not in content, "command prompt must NOT reference <app-data> (2026-06-18 NEVER USE APPDATA)"
assert "AppData\\Local\\manual_slop\\tier2" not in content, "command prompt must NOT reference the AppData tier2 dir"
def test_command_has_frontmatter() -> None:
content = COMMAND_PATH.read_text(encoding="utf-8")
assert re.match(r"^---\n.*?\n---\n", content, re.DOTALL)
def test_command_takes_track_name_argument() -> None:
content = COMMAND_PATH.read_text(encoding="utf-8")
assert "$ARGUMENTS" in content
assert "track-name" in content or "<track-name>" in content
def test_command_uses_git_switch_not_checkout() -> None:
content = COMMAND_PATH.read_text(encoding="utf-8")
assert "git switch -c" in content
protocol_marker = "## Protocol"
next_section_marker = "## Hard Bans"
start = content.find(protocol_marker)
end = content.find(next_section_marker)
assert start != -1 and end != -1
protocol_section = content[start:end]
import re as _re
shell_lines = _re.findall(r"^\s*\d+\.\s*`(git [^`]+)`", protocol_section, _re.MULTILINE)
assert shell_lines, "expected numbered git commands in protocol"
assert all("checkout" not in line for line in shell_lines), f"protocol uses git checkout: {shell_lines}"
def test_command_fetches_origin_master() -> None:
content = COMMAND_PATH.read_text(encoding="utf-8")
assert "git fetch origin master" in content
def test_command_initializes_failcount_state() -> None:
content = COMMAND_PATH.read_text(encoding="utf-8")
assert "load_state" in content or "fresh state" in content.lower()
def test_command_calls_should_give_up() -> None:
content = COMMAND_PATH.read_text(encoding="utf-8")
assert "should_give_up" in content
def test_command_writes_report_on_give_up() -> None:
content = COMMAND_PATH.read_text(encoding="utf-8")
assert "write_failure_report" in content
def test_command_prints_abort_banner() -> None:
content = COMMAND_PATH.read_text(encoding="utf-8")
assert "TRACK ABORTED" in content or "ABORTED" in content
def test_agent_file_exists() -> None:
assert AGENT_PATH.exists()
def test_agent_denies_destructive_git() -> None:
content = AGENT_PATH.read_text(encoding="utf-8")
assert '"git push*": deny' in content
assert '"git checkout*": deny' in content
assert '"git restore*": deny' in content
assert '"git reset*": deny' in content
def test_agent_denies_temp_writes() -> None:
"""Regression test (2026-06-17, rewritten 2026-06-18, paths updated
2026-06-18 after Tier 2's project-relative relocation):
2026-06-17: the agent wrote an audit JSON to
C:\\Users\\Ed\\AppData\\Local\\Temp\\, which is outside the sandbox
allowlist, triggering the OpenCode session-level 'ask' prompt and
halting ops. The agent's bash MUST deny commands targeting
AppData\\Local\\Temp\\, and the agent prompt MUST tell the agent
to use the sandbox's app-data dir for temp files.
2026-06-18: the user directed 'NEVER USE APPDATA'. The agent prompt
must:
- include the broader *AppData\\* bash deny rule (catches Local,
LocalLow, Roaming, etc., not just Temp)
- point at tests/artifacts/tier2_state/<track>/state.json for
failcount state (Tier 2's project-relative default)
- point at tests/artifacts/tier2_failures/ for failure reports
- NOT reference AppData\\Local\\manual_slop\\tier2 (the old path)"""
content = AGENT_PATH.read_text(encoding="utf-8")
assert 'AppData\\Local\\Temp' in content, "agent prompt must include Temp deny rule in frontmatter bash (kept for self-documentation)"
assert "*AppData\\\\*" in content, "agent prompt must include the broader *AppData\\* deny rule (added 2026-06-18)"
assert "tests/artifacts/tier2_state" in content, "agent prompt must point agent at tests/artifacts/tier2_state/<track>/ for failcount state (Tier 2's project-relative default)"
assert "tests/artifacts/tier2_failures" in content, "agent prompt must point agent at tests/artifacts/tier2_failures/ for failure reports"
assert "AppData\\Local\\manual_slop\\tier2" not in content, "agent prompt must NOT reference the AppData tier2 dir (2026-06-18 NEVER USE APPDATA)"
def test_config_fragment_valid_json() -> None:
data = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
assert data["default_agent"] == "tier2-autonomous"
perms = data["agent"]["tier2-autonomous"]["permission"]
assert "git push*" in perms["bash"]
assert "git checkout*" in perms["bash"]
assert "git restore*" in perms["bash"]
assert "git reset*" in perms["bash"]
def test_config_fragment_has_top_level_model() -> None:
"""Top-level model MUST be minimax-coding-plan/MiniMax-M3 (the Tier 2
model), NOT the main repo's zai/glm-5. Regression test for the bug
where the clone inherited the main repo's default model and Tier 2
ran on zai/glm-5 instead of MiniMax-M3 (2026-06-17)."""
data = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
assert "model" in data, "top-level model field is required"
assert data["model"] == "minimax-coding-plan/MiniMax-M3", (
f"top-level model must be MiniMax-M3, got: {data['model']}"
)
def test_config_fragment_has_top_level_permission() -> None:
"""Top-level permission.read/write MUST allow the sandbox dirs (added
2026-06-17 after the bug where the agent's permission.read was not
enforced for the default agent, leading to ACCESS DENIED on
manual_slop_tier2 paths)."""
data = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
assert "permission" in data
top = data["permission"]
assert "read" in top, "top-level permission.read is required"
assert top["read"].get("*") == "deny", "top-level permission.read MUST deny *"
assert top["read"].get("C:\\projects\\manual_slop_tier2\\**") == "allow", "sandbox clone path must be allowlisted"
assert "write" in top
assert top["write"].get("*") == "deny"
assert top["write"].get("C:\\projects\\manual_slop_tier2\\**") == "allow"
assert "bash" in top
assert top["bash"].get("*") == "deny", "top-level bash MUST deny * (default agents are locked down)"
assert top["bash"].get("git status*") == "allow", "read-only git commands must be in the allowlist"
assert top["bash"].get("git push*") == "deny"
assert top["bash"].get("git checkout*") == "deny"
assert top["bash"].get("git restore*") == "deny"
assert top["bash"].get("git reset*") == "deny"
def test_config_fragment_denies_temp_writes() -> None:
"""Regression test (2026-06-17, expanded 2026-06-19 to catch all
env-var forms): the agent wrote audit output to
C:\\Users\\Ed\\AppData\\Local\\Temp\\ which is outside the sandbox.
Both the top-level and the tier2-autonomous agent's bash MUST deny
commands targeting the global temp dir in ANY form (literal path,
$env:TEMP, $env:TMP, %TEMP%, %TMP%, GetTempPath, gettempdir,
mkstemp, NamedTemporaryFile)."""
data = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
top_bash = data["permission"]["bash"]
agent_bash = data["agent"]["tier2-autonomous"]["permission"]["bash"]
# Required deny patterns (matched against the literal command string)
required = [
"*AppData\\*",
"*AppData\\Local\\Temp\\*",
"*$env:TEMP*",
"*$env:TMP*",
"*%TEMP%*",
"*%TMP%*",
"*GetTempPath*",
"*gettempdir*",
"*mkstemp*",
]
for pat in required:
assert top_bash.get(pat) == "deny", f"top-level bash must deny pattern: {pat!r}"
assert agent_bash.get(pat) == "deny", f"tier2-autonomous agent bash must deny pattern: {pat!r}"