#!/usr/bin/env python3
"""Detect tests that attempt writes outside ./tests/ via hardcoded paths.

Run from repo root: python scripts/audit_test_sandbox_violations.py

Exit codes:
  0  CLEAN (or informational mode with violations listed)
  1  STRICT mode with at least one violation

Patterns flagged:
  - Path("manual_slop.toml") / Path("config.toml") / etc. (top-level TOML/INI)
  - open("manual_slop.toml", "w") and similar write-mode calls
  - Path("C:/projects/...") and Path("C:\\projects\\...") (project root literals)
  - Path("tests/artifacts/...") literal (violates workspace_paths.md)
  - tempfile.mkdtemp() / tempfile.mkstemp() without dir= pointing under ./tests/

Reference: conductor/tracks/test_sandbox_hardening_20260619/spec.md (FR4)
"""
from __future__ import annotations
import argparse
import json
import re
import sys
from pathlib import Path

TOML_BASENAMES = (
 "manual_slop", "config", "credentials",
 "presets", "personas", "tool_presets",
 "workspace_profiles", "project",
 "manualslop_layout", "manualslop_history",
)
INI_BASENAMES = (
 "manualslop_layout", "manualslop_history",
)
_BASENAME_GROUP = "|".join(TOML_BASENAMES)
_INI_GROUP = "|".join(INI_BASENAMES)

PATTERNS = [
 re.compile(rf'Path\(["\'](?:{_BASENAME_GROUP})\.toml["\']'),
 re.compile(rf'Path\(["\'](?:{_INI_GROUP})\.ini["\']'),
 re.compile(rf'open\(["\'](?:{_BASENAME_GROUP})\.toml["\'], ["\']w["\']'),
 re.compile(rf'open\(["\'](?:{_BASENAME_GROUP})\.toml["\'], ["\']a["\']'),
 re.compile(r'Path\(["\']C:[/\\]+projects'),
 re.compile(r'Path\(["\']tests/artifacts/'),
 re.compile(r"tempfile\.mk(?:dt|st)emp\("),
]

EXCLUDE_DIRS = {"artifacts", "logs", "__pycache__", "snapshots"}


def find_violations(tests_dir: Path, apply_excludes: bool = True) -> list[tuple[Path, int, str]]:
 violations: list[tuple[Path, int, str]] = []
 for test_file in tests_dir.rglob("test_*.py"):
  if apply_excludes and any(excluded in test_file.parts for excluded in EXCLUDE_DIRS):
   continue
  try:
   content = test_file.read_text(encoding="utf-8")
  except (OSError, UnicodeDecodeError):
   continue
  for lineno, line in enumerate(content.splitlines(), start=1):
   for pattern in PATTERNS:
    if pattern.search(line):
     violations.append((test_file, lineno, line.strip()))
     break
 return violations


def main() -> int:
 parser = argparse.ArgumentParser(
  description=__doc__,
  formatter_class=argparse.RawDescriptionHelpFormatter,
 )
 parser.add_argument("--json", action="store_true", help="Output JSON instead of human-readable report")
 parser.add_argument("--strict", action="store_true", help="Exit 1 if any violations are found (CI gate)")
 parser.add_argument("--tests-dir", default="tests", help="Tests directory to scan (default: tests)")
 args = parser.parse_args()

 repo_root = Path(__file__).resolve().parent.parent
 tests_dir = (repo_root / args.tests_dir).resolve() if not Path(args.tests_dir).is_absolute() else Path(args.tests_dir).resolve()
 if not tests_dir.exists():
  print(f"Tests dir not found: {tests_dir}", file=sys.stderr)
  return 1
 apply_excludes = (Path(args.tests_dir).resolve() == repo_root / "tests")
 violations = find_violations(tests_dir, apply_excludes=apply_excludes)

 if args.json:
  payload = {
   "tests_dir": str(tests_dir),
   "count": len(violations),
   "violations": [
    {"path": str(p.relative_to(repo_root)), "line": ln, "content": c}
    for p, ln, c in violations
   ],
  }
  print(json.dumps(payload, indent=2))
 else:
  if not violations:
   print("OK: No test source code references hardcoded paths outside ./tests/.")
  else:
   print(f"FAIL: {len(violations)} test source line(s) reference hardcoded paths:")
   for path, lineno, line in violations:
    rel = path.relative_to(repo_root)
    print(f"  {rel}:{lineno}: {line}")

 return 1 if (args.strict and violations) else 0


if __name__ == "__main__":
 sys.exit(main())