#!/bin/sh # Tier 2 autonomous mode: prevent sandbox-only file leaks. # # setup_tier2_clone.ps1 modifies opencode.json and mcp_paths.toml in the # clone (C:\projects\manual_slop_tier2\), and copies the tier-2 agent # prompt + slash command from conductor/tier2/ into .opencode/. If a # tier-2 commit captures any of these via `git add .`, the main repo # would absorb the sandbox's local config drift. # # This hook runs on `git commit` in the tier-2 clone. It reads the # denylist from conductor/tier2/githooks/forbidden-files.txt and # auto-unstages any staged file whose path contains a forbidden # substring. The commit then proceeds with only the legitimate work. # # Layer 1 (OpenCode permission system) blocks the tier-2 agent from # editing these files directly. This hook is the backup layer at the # commit boundary. Layer 3 is the audit script # scripts/audit_tier2_leaks.py in the main repo. # # Why auto-unstage instead of exit 1: tier-2 cannot run `git restore # --staged` (banned by the sandbox permission rules), so a hard reject # would leave the agent stuck mid-flow. Auto-unstage + warn is the # recoverable behavior. # # Why exit 0 always: the hook must never block the agent. Its job is to # remove the leak, not to gate the commit. The failcount machinery in # scripts/tier2/failcount.py tracks repeated red-phase failures and # gives up the run; adding a hook-induced exit 1 would pollute that # signal. CONFIG="conductor/tier2/githooks/forbidden-files.txt" if [ ! -f "$CONFIG" ]; then exit 0 fi # POSIX shells cannot store NUL bytes in variables (command substitution # strips them). So we cannot do `STAGED=$(git diff -z)` and iterate. # Instead, pipe `git diff -z` into a `while read -d ''` loop in a # subshell, and write leaked paths to a temp file. The parent shell then # reads the temp file and unstages via `git rm --cached`. TMPFILE="./.tier2_leaked_$$" trap 'rm -f "$TMPFILE" 2>/dev/null' EXIT # Check if any staged file matches any forbidden substring. # Pattern matching strategy: for each staged file, iterate the config # file's non-comment, non-blank lines. Each pattern is a substring to # look for in the file path. `case "$f" in *"$pattern"*)` is faster # than spawning `grep` per file. # # CRITICAL: the config file may have CRLF line endings (the test writes # it via Python's text mode on Windows). Strip trailing \r from each # pattern before matching, otherwise `*pattern*` will not match a # clean path because the pattern contains a stray carriage return. git diff --cached --name-only -z | while IFS= read -r -d '' f; do [ -z "$f" ] && continue while IFS= read -r pattern || [ -n "$pattern" ]; do # Strip trailing \r (CRLF line endings on Windows) pattern=$(printf '%s' "$pattern" | tr -d '\r') case "$pattern" in ''|'#'*) continue ;; esac case "$f" in *"$pattern"*) printf '%s\n' "$f" >> "$TMPFILE" break ;; esac done < "$CONFIG" done if [ ! -s "$TMPFILE" ]; then exit 0 fi echo "Tier 2: removing sandbox-only files from staging" >&2 echo "(these files belong in the main repo, not in tier-2 commits):" >&2 while IFS= read -r f; do [ -z "$f" ] && continue echo " - $f" >&2 # `git rm --cached` works on tracked files (unstages modifications) # AND on newly-added files (unstages the addition, file becomes # untracked again). NOT `git restore` (banned in sandbox). # # `--force` is required when the index has content that differs from # BOTH HEAD and the working tree (e.g., the file was modified, # staged, then modified again in the working tree). Without # --force, git refuses to discard the staged content. git rm --cached --quiet --force "$f" 2>/dev/null || true done < "$TMPFILE" echo "" >&2 echo "Commit will proceed without these files. To inspect what was" >&2 echo "removed, run: git status" >&2 exit 0