03c9df8450
The Tier 2 agent wrote audit_exception_handling.py output to C:\\Users\\Ed\\AppData\\Local\\Temp\\audit_initial.json via shell redirection. This is OUTSIDE the sandbox allowlist (which is C:\\projects\\manual_slop_tier2 + C:\\Users\\Ed\\AppData\\Local\\ manual_slop\\tier2 + C:\\Users\\Ed\\AppData\\Local\\manual_slop\\ tier2_failures). The OpenCode session-level guard fires the 'ask' prompt for paths outside the project root, which has no answer in an autonomous session, so ops halted mid-track. Fix (3 layers): 1. opencode.json.fragment: add bash deny rule '*AppData\\Local\\Temp\\*': 'deny' to BOTH the top-level permission.bash (for default agents) and the tier2-autonomous agent's permission.bash. The agent physically cannot run shell commands that target the global Temp dir. 2. conductor/tier2/agents/tier2-autonomous.md: add 'Temp files' convention telling the agent to use C:\\Users\\Ed\\AppData\\Local\\manual_slop\\tier2\\ for scratch / audit-output / intermediate files, NOT %TEMP%. 3. conductor/tier2/commands/tier-2-auto-execute.md: same convention in the slash command so the agent sees it at slash-command time. Tests (default-on): - test_agent_denies_temp_writes: agent prompt has the Temp deny in frontmatter bash + the app-data dir note - test_config_fragment_denies_temp_writes: both top-level and agent bash have the deny rule All 16 tier 2 slash command tests pass. Also: cleaned up the leaked audit_initial.json + audit.json + audit_after*.json from %TEMP% (they were leftovers from a prior run). Re-ran setup against the live clone; opencode.json's agent bash and top-level bash both have the deny rule.
152 lines
6.4 KiB
Python
152 lines
6.4 KiB
Python
"""Contract tests for the Tier 2 slash command, agent profile, and config fragment.
|
|
|
|
These tests verify that the templates the bootstrap copies to the Tier 2
|
|
clone contain the protocol contract that Tier 2 autonomous relies on.
|
|
"""
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
COMMAND_PATH = Path("conductor/tier2/commands/tier-2-auto-execute.md")
|
|
AGENT_PATH = Path("conductor/tier2/agents/tier2-autonomous.md")
|
|
CONFIG_PATH = Path("conductor/tier2/opencode.json.fragment")
|
|
|
|
|
|
def test_command_file_exists() -> None:
|
|
assert COMMAND_PATH.exists()
|
|
|
|
|
|
def test_command_has_frontmatter() -> None:
|
|
content = COMMAND_PATH.read_text(encoding="utf-8")
|
|
assert re.match(r"^---\n.*?\n---\n", content, re.DOTALL)
|
|
|
|
|
|
def test_command_takes_track_name_argument() -> None:
|
|
content = COMMAND_PATH.read_text(encoding="utf-8")
|
|
assert "$ARGUMENTS" in content
|
|
assert "track-name" in content or "<track-name>" in content
|
|
|
|
|
|
def test_command_uses_git_switch_not_checkout() -> None:
|
|
content = COMMAND_PATH.read_text(encoding="utf-8")
|
|
assert "git switch -c" in content
|
|
protocol_marker = "## Protocol"
|
|
next_section_marker = "## Hard Bans"
|
|
start = content.find(protocol_marker)
|
|
end = content.find(next_section_marker)
|
|
assert start != -1 and end != -1
|
|
protocol_section = content[start:end]
|
|
import re as _re
|
|
shell_lines = _re.findall(r"^\s*\d+\.\s*`(git [^`]+)`", protocol_section, _re.MULTILINE)
|
|
assert shell_lines, "expected numbered git commands in protocol"
|
|
assert all("checkout" not in line for line in shell_lines), f"protocol uses git checkout: {shell_lines}"
|
|
|
|
|
|
def test_command_fetches_origin_master() -> None:
|
|
content = COMMAND_PATH.read_text(encoding="utf-8")
|
|
assert "git fetch origin master" in content
|
|
|
|
|
|
def test_command_initializes_failcount_state() -> None:
|
|
content = COMMAND_PATH.read_text(encoding="utf-8")
|
|
assert "load_state" in content or "fresh state" in content.lower()
|
|
|
|
|
|
def test_command_calls_should_give_up() -> None:
|
|
content = COMMAND_PATH.read_text(encoding="utf-8")
|
|
assert "should_give_up" in content
|
|
|
|
|
|
def test_command_writes_report_on_give_up() -> None:
|
|
content = COMMAND_PATH.read_text(encoding="utf-8")
|
|
assert "write_failure_report" in content
|
|
|
|
|
|
def test_command_prints_abort_banner() -> None:
|
|
content = COMMAND_PATH.read_text(encoding="utf-8")
|
|
assert "TRACK ABORTED" in content or "ABORTED" in content
|
|
|
|
|
|
def test_agent_file_exists() -> None:
|
|
assert AGENT_PATH.exists()
|
|
|
|
|
|
def test_agent_denies_destructive_git() -> None:
|
|
content = AGENT_PATH.read_text(encoding="utf-8")
|
|
assert '"git push*": deny' in content
|
|
assert '"git checkout*": deny' in content
|
|
assert '"git restore*": deny' in content
|
|
assert '"git reset*": deny' in content
|
|
|
|
|
|
def test_agent_denies_temp_writes() -> None:
|
|
"""Regression test (2026-06-17): the agent wrote an audit JSON to
|
|
C:\\Users\\Ed\\AppData\\Local\\Temp\\, which is outside the sandbox
|
|
allowlist, triggering the OpenCode session-level 'ask' prompt and
|
|
halting ops. The agent's bash MUST now deny commands targeting
|
|
AppData\\Local\\Temp\\, and the agent prompt MUST tell the agent
|
|
to use the sandbox's app-data dir for temp files."""
|
|
content = AGENT_PATH.read_text(encoding="utf-8")
|
|
assert 'AppData\\Local\\Temp' in content, "agent prompt must include Temp deny rule in frontmatter bash"
|
|
assert 'AppData\\Local\\manual_slop\\tier2' in content or 'app-data' in content.lower(), "agent prompt must point agent at the app-data dir for temp files"
|
|
|
|
|
|
def test_config_fragment_valid_json() -> None:
|
|
data = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
|
|
assert data["default_agent"] == "tier2-autonomous"
|
|
perms = data["agent"]["tier2-autonomous"]["permission"]
|
|
assert "git push*" in perms["bash"]
|
|
assert "git checkout*" in perms["bash"]
|
|
assert "git restore*" in perms["bash"]
|
|
assert "git reset*" in perms["bash"]
|
|
|
|
|
|
def test_config_fragment_has_top_level_model() -> None:
|
|
"""Top-level model MUST be minimax-coding-plan/MiniMax-M3 (the Tier 2
|
|
model), NOT the main repo's zai/glm-5. Regression test for the bug
|
|
where the clone inherited the main repo's default model and Tier 2
|
|
ran on zai/glm-5 instead of MiniMax-M3 (2026-06-17)."""
|
|
data = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
|
|
assert "model" in data, "top-level model field is required"
|
|
assert data["model"] == "minimax-coding-plan/MiniMax-M3", (
|
|
f"top-level model must be MiniMax-M3, got: {data['model']}"
|
|
)
|
|
|
|
|
|
def test_config_fragment_has_top_level_permission() -> None:
|
|
"""Top-level permission.read/write MUST allow the sandbox dirs (added
|
|
2026-06-17 after the bug where the agent's permission.read was not
|
|
enforced for the default agent, leading to ACCESS DENIED on
|
|
manual_slop_tier2 paths)."""
|
|
data = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
|
|
assert "permission" in data
|
|
top = data["permission"]
|
|
assert "read" in top, "top-level permission.read is required"
|
|
assert top["read"].get("*") == "deny", "top-level permission.read MUST deny *"
|
|
assert top["read"].get("C:\\projects\\manual_slop_tier2\\**") == "allow", "sandbox clone path must be allowlisted"
|
|
assert "write" in top
|
|
assert top["write"].get("*") == "deny"
|
|
assert top["write"].get("C:\\projects\\manual_slop_tier2\\**") == "allow"
|
|
assert "bash" in top
|
|
assert top["bash"].get("*") == "deny", "top-level bash MUST deny * (default agents are locked down)"
|
|
assert top["bash"].get("git status*") == "allow", "read-only git commands must be in the allowlist"
|
|
assert top["bash"].get("git push*") == "deny"
|
|
assert top["bash"].get("git checkout*") == "deny"
|
|
assert top["bash"].get("git restore*") == "deny"
|
|
assert top["bash"].get("git reset*") == "deny"
|
|
|
|
|
|
def test_config_fragment_denies_temp_writes() -> None:
|
|
"""Regression test (2026-06-17): the agent wrote audit output to
|
|
C:\\Users\\Ed\\AppData\\Local\\Temp\\ which is outside the sandbox.
|
|
Both the top-level and the tier2-autonomous agent's bash MUST deny
|
|
commands targeting AppData\\Local\\Temp\\ so the agent cannot write
|
|
there, and so the session-level 'ask' prompt is never triggered."""
|
|
data = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
|
|
top_bash = data["permission"]["bash"]
|
|
agent_bash = data["agent"]["tier2-autonomous"]["permission"]["bash"]
|
|
temp_deny_keys = [k for k in top_bash if "Temp" in k and top_bash[k] == "deny"]
|
|
assert temp_deny_keys, "top-level bash must have a deny rule for AppData\\Local\\Temp\\ paths"
|
|
temp_deny_keys_agent = [k for k in agent_bash if "Temp" in k and agent_bash[k] == "deny"]
|
|
assert temp_deny_keys_agent, "tier2-autonomous agent bash must have a deny rule for AppData\\Local\\Temp\\ paths"
|