feat(audit): add no-temp-writes audit + regression test
Tier 2 sandbox invariant: no production script under ./scripts/ may
write to the global %TEMP% directory (C:\\Users\\Ed\\AppData\\Local\\
Temp\\). All scratch / intermediate files must live in:
- ./tests/artifacts/ (for test artifacts)
- C:\\Users\\Ed\\AppData\\Local\\manual_slop\\tier2\\ (for app data)
Writing to %TEMP% breaks the sandbox boundary: the OpenCode session
fires the 'ask' prompt for paths outside the project root, halting
autonomous ops (the 2026-06-17 bug with audit_exception_handling.py
output being written to %TEMP% by the agent's shell redirection).
Convention enforcement (per conductor/workflow.md Audit Script Policy):
- scripts/audit_no_temp_writes.py: the canonical audit. Same shape
as scripts/audit_exception_handling.py: --json for machine output,
--strict for the CI gate (exits 1 on any violation). Patterns
cover tempfile module, os.environ['TEMP'], C:\Users\Ed\AppData\Local\Temp, %TEMP%,
/tmp/, etc. Excludes the throw-away archive at scripts/tier2/
artifacts/ and itself (so it can find its own pattern defs).
- tests/test_no_temp_writes.py: default-on regression test. Calls
the audit with --strict and asserts exit 0. If a new script
under ./scripts/ ever uses %TEMP%, the test fails and CI breaks.
Current state: CLEAN. All 36 tier2 tests pass (1 new + 16 slash
command spec + 13 failcount + 6 opt-in). Sanity-checked: dropping
a fake 'import tempfile' script into ./scripts/ triggered exit 1
with 'FOUND 1 matches: scripts/_test_temp_check/test_uses_temp.py:1:
import tempfile'.
Future: also add a corresponding deny rule to the sandbox bash
permission in a follow-up if needed (already added in 03c9df84 for
the agent's own bash). The audit + test is the structural guard.
This commit is contained in:
@@ -0,0 +1,108 @@
|
||||
"""Scan ./scripts/** for any usage of the global %TEMP% directory.
|
||||
|
||||
Used to verify the Tier 2 sandbox invariant: no production script
|
||||
under ./scripts/ may write to C:\\Users\\Ed\\AppData\\Local\\Temp\\
|
||||
(or any other platform temp dir). All scratch / intermediate files
|
||||
must live in:
|
||||
- ./tests/artifacts/ (for test artifacts)
|
||||
- C:\\Users\\Ed\\AppData\\Local\\manual_slop\\tier2\\ (for app data)
|
||||
|
||||
This script is the canonical audit. The persistent enforcement is
|
||||
tests/test_no_temp_writes.py (a default-on pytest test that calls
|
||||
this audit's main() and asserts the return code is 0).
|
||||
|
||||
Exit codes:
|
||||
0 CLEAN: no script emits to %TEMP%
|
||||
1 FOUND: at least one script uses %TEMP% (printed to stdout)
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Patterns that indicate a script is using the global temp directory.
|
||||
# The patterns cover:
|
||||
# - Python: tempfile module, os.environ['TEMP'], etc.
|
||||
# - PowerShell: $env:TEMP, $env:TMP
|
||||
# - cmd: %TEMP%, %TMP%
|
||||
# - Unix-style: /tmp/ (sometimes used in cross-platform code)
|
||||
PATTERNS = [
|
||||
r"tempfile\.",
|
||||
r"gettempdir",
|
||||
r"mkstemp",
|
||||
r"NamedTemporaryFile",
|
||||
r"TemporaryFile",
|
||||
r"os\.environ\[.TEMP",
|
||||
r"os\.environ\[.TMP",
|
||||
r"os\.environ\.get..TEMP",
|
||||
r"os\.environ\.get..TMP",
|
||||
r"\$env:TEMP",
|
||||
r"\$env:TMP",
|
||||
r"%TEMP%",
|
||||
r"%TMP%",
|
||||
r"/tmp/",
|
||||
r"\bTempDir\b",
|
||||
r"\btempfile\b",
|
||||
]
|
||||
COMPILED = re.compile("|".join(PATTERNS), re.IGNORECASE)
|
||||
|
||||
# Throw-away scripts from prior Tier 2 tracks live here. They are
|
||||
# archived for reference but are not part of the production code.
|
||||
# The audit excludes them.
|
||||
EXCLUDE_DIRS = {"scripts/tier2/artifacts"}
|
||||
|
||||
# This audit script itself contains the patterns it searches for.
|
||||
# Exclude it so the audit can find its own pattern definitions.
|
||||
EXCLUDE_FILES = {"scripts/audit_no_temp_writes.py"}
|
||||
|
||||
|
||||
def find_violations(root: str = "scripts") -> list[dict[str, object]]:
|
||||
"""Return a list of violations: each is {path, line, content}."""
|
||||
results: list[dict[str, object]] = []
|
||||
for f in Path(root).rglob("*"):
|
||||
if not f.is_file():
|
||||
continue
|
||||
if f.suffix not in {".py", ".ps1", ".sh", ".bat", ".cmd", ".psm1"}:
|
||||
continue
|
||||
rel = str(f).replace("\\", "/")
|
||||
if any(rel.startswith(d) for d in EXCLUDE_DIRS):
|
||||
continue
|
||||
if rel in EXCLUDE_FILES:
|
||||
continue
|
||||
try:
|
||||
content = f.read_text(encoding="utf-8", errors="ignore")
|
||||
except Exception:
|
||||
continue
|
||||
for i, line in enumerate(content.splitlines(), 1):
|
||||
if COMPILED.search(line):
|
||||
results.append({"path": rel, "line": i, "content": line.strip()})
|
||||
return results
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument("--json", action="store_true", help="Output JSON instead of human-readable report")
|
||||
parser.add_argument("--strict", action="store_true", help="Exit 1 if any violations are found (for CI use; the convention's CI gate)")
|
||||
args = parser.parse_args()
|
||||
|
||||
violations = find_violations()
|
||||
|
||||
if args.json:
|
||||
print(json.dumps({"violations": violations, "count": len(violations)}, indent=2))
|
||||
else:
|
||||
if not violations:
|
||||
print("CLEAN: no script under ./scripts/ emits to %TEMP%")
|
||||
else:
|
||||
print(f"FOUND {len(violations)} matches:")
|
||||
for v in violations:
|
||||
print(f" {v['path']}:{v['line']}: {v['content']}")
|
||||
|
||||
return 1 if (args.strict and violations) else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,35 @@
|
||||
"""Default-on regression test: no script under ./scripts/ may write to
|
||||
the global %TEMP% directory (C:\\Users\\Ed\\AppData\\Local\\Temp\\).
|
||||
|
||||
The Tier 2 sandbox is supposed to keep all scratch / intermediate
|
||||
files inside its allowlist (C:\\projects\\manual_slop_tier2 +
|
||||
C:\\Users\\Ed\\AppData\\Local\\manual_slop\\tier2 +
|
||||
C:\\Users\\Ed\\AppData\\Local\\manual_slop\\tier2_failures). Writing
|
||||
to the global Temp dir breaks that boundary: the OpenCode session
|
||||
fires the 'ask' prompt for paths outside the project root, halting
|
||||
autonomous ops.
|
||||
|
||||
The test delegates to scripts/audit_no_temp_writes.py --strict
|
||||
which exits 1 on any violation. If this test fails, a new script
|
||||
under ./scripts/ is using %TEMP% and the Tier 2 sandbox boundary
|
||||
has been violated.
|
||||
"""
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_no_script_emits_to_temp() -> None:
|
||||
audit = Path("scripts/audit_no_temp_writes.py").resolve()
|
||||
assert audit.exists(), f"audit script missing: {audit}"
|
||||
result = subprocess.run(
|
||||
["uv", "run", "python", str(audit), "--strict"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert result.returncode == 0, (
|
||||
f"audit found %TEMP% usage in scripts:\n{result.stdout}\n{result.stderr}\n\n"
|
||||
f"Fix: move scratch files to tests/artifacts/ or "
|
||||
f"C:\\Users\\Ed\\AppData\\Local\\manual_slop\\tier2\\ instead of %TEMP%."
|
||||
)
|
||||
assert "CLEAN" in result.stdout, f"unexpected audit output: {result.stdout}"
|
||||
Reference in New Issue
Block a user