manual_slop/scripts/audit_no_temp_writes.py

"""Scan ./scripts/** for any usage of the global %TEMP% directory.

Used to verify the Tier 2 sandbox invariant: no production script
under ./scripts/ may write to C:\\Users\\Ed\\AppData\\Local\\Temp\\
(or any other platform temp dir). All scratch / intermediate files
must live in:
- ./tests/artifacts/  (for test artifacts)
- C:\\Users\\Ed\\AppData\\Local\\manual_slop\\tier2\\  (for app data)

This script is the canonical audit. The persistent enforcement is
tests/test_no_temp_writes.py (a default-on pytest test that calls
this audit's main() and asserts the return code is 0).

Exit codes:
  0  CLEAN: no script emits to %TEMP%
  1  FOUND: at least one script uses %TEMP% (printed to stdout)
"""
import argparse
import json
import re
import sys
from pathlib import Path

# Patterns that indicate a script is using the global temp directory.
# The patterns cover:
#   - Python: tempfile module, os.environ['TEMP'], etc.
#   - PowerShell: $env:TEMP, $env:TMP
#   - cmd: %TEMP%, %TMP%
#   - Unix-style: /tmp/ (sometimes used in cross-platform code)
PATTERNS = [
    r"tempfile\.",
    r"gettempdir",
    r"mkstemp",
    r"NamedTemporaryFile",
    r"TemporaryFile",
    r"os\.environ\[.TEMP",
    r"os\.environ\[.TMP",
    r"os\.environ\.get..TEMP",
    r"os\.environ\.get..TMP",
    r"\$env:TEMP",
    r"\$env:TMP",
    r"%TEMP%",
    r"%TMP%",
    r"/tmp/",
    r"\bTempDir\b",
    r"\btempfile\b",
]
COMPILED = re.compile("|".join(PATTERNS), re.IGNORECASE)

# Throw-away scripts from prior Tier 2 tracks live here. They are
# archived for reference but are not part of the production code.
# The audit excludes them.
EXCLUDE_DIRS = {"scripts/tier2/artifacts"}

# This audit script itself contains the patterns it searches for.
# Exclude it so the audit can find its own pattern definitions.
# Other audit scripts (e.g. audit_test_sandbox_violations.py) also
# legitimately reference tempfile in their docstring/pattern definitions.
EXCLUDE_FILES = {
 "scripts/audit_no_temp_writes.py",
 "scripts/audit_test_sandbox_violations.py",
}


def find_violations(root: str = "scripts") -> list[dict[str, object]]:
 """Return a list of violations: each is {path, line, content}."""
 results: list[dict[str, object]] = []
 for f in Path(root).rglob("*"):
  if not f.is_file():
   continue
  if f.suffix not in {".py", ".ps1", ".sh", ".bat", ".cmd", ".psm1"}:
   continue
  rel = str(f).replace("\\", "/")
  if any(rel.startswith(d) for d in EXCLUDE_DIRS):
   continue
  if rel in EXCLUDE_FILES:
   continue
  try:
   content = f.read_text(encoding="utf-8", errors="ignore")
  except Exception:
   continue
  for i, line in enumerate(content.splitlines(), 1):
   if COMPILED.search(line):
    results.append({"path": rel, "line": i, "content": line.strip()})
 return results


def main() -> int:
 parser = argparse.ArgumentParser(
  description=__doc__,
  formatter_class=argparse.RawDescriptionHelpFormatter,
 )
 parser.add_argument("--json", action="store_true", help="Output JSON instead of human-readable report")
 parser.add_argument("--strict", action="store_true", help="Exit 1 if any violations are found (for CI use; the convention's CI gate)")
 args = parser.parse_args()

 violations = find_violations()

 if args.json:
  print(json.dumps({"violations": violations, "count": len(violations)}, indent=2))
 else:
  if not violations:
   print("CLEAN: no script under ./scripts/ emits to %TEMP%")
  else:
   print(f"FOUND {len(violations)} matches:")
   for v in violations:
    print(f"  {v['path']}:{v['line']}: {v['content']}")

 return 1 if (args.strict and violations) else 0


if __name__ == "__main__":
 sys.exit(main())