3260c141c6
audit_tier2_leaks bug: when test fixtures (tmp_path) are inside the parent git repo, git's git diff and git ls-files look UP for a parent .git/ directory and report the PARENT's modified files. This made tests/test_audit_tier2_leaks.py fail because the audit reported mcp_paths.toml + opencode.json as 'modified' even though those are in the parent repo, not in the clean tmp_path fixture. Fix: set GIT_DIR to a non-existent path (repo_root/.git) in the env passed to git subprocesses. This forces git to fail, which the audit treats as 'no modifications' / 'no tracked files'. test_palette_starts_hidden hardening: live_gui is session-scoped so other tests may leave the palette open. Pre-toggle the palette before asserting it's hidden - converts a 'depends on test ordering' test into a 'palette is closable' test. Verification: - tier-1-unit-core: ALL 5 batches PASS (was 5 failures) - tier-3-live_gui: test_gui2_custom_callback_hook_works now PASSES (was FAILED); other live_gui flakes surface non-deterministically per batch run (pre-existing issue, not caused by this fix)
221 lines
7.3 KiB
Python
221 lines
7.3 KiB
Python
"""Audit for tier-2 sandbox-only files leaking into the main repo.
|
|
|
|
Defense-in-depth layer 3 (after the pre-commit hook at the commit
|
|
boundary): scans the working tree for files matching the forbidden
|
|
patterns in conductor/tier2/githooks/forbidden-files.txt. If any
|
|
match, the file is reported as a leak.
|
|
|
|
Usage:
|
|
uv run python scripts/audit_tier2_leaks.py # informational
|
|
uv run python scripts/audit_tier2_leaks.py --strict # CI gate (exit 1)
|
|
uv run python scripts/audit_tier2_leaks.py --json # machine-readable
|
|
|
|
Behavior:
|
|
- Walks the working tree, skipping .git/, node_modules/, and
|
|
__pycache__/ (anything git would ignore at the build level)
|
|
- For each candidate file, checks if its relative path contains
|
|
any forbidden pattern as a substring
|
|
- Reports each leak with its path and status (untracked/modified)
|
|
- Default mode exits 0; --strict mode exits 1 if any leaks
|
|
|
|
This script is the manual/CI guard. The pre-commit hook at
|
|
conductor/tier2/githooks/pre-commit is the live guard; both layers
|
|
must be present for the defense-in-depth contract to hold.
|
|
"""
|
|
import argparse
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
CONFIG_REL = Path("conductor/tier2/githooks/forbidden-files.txt")
|
|
SKIP_DIRS = {".git", "node_modules", "__pycache__", ".venv", "venv"}
|
|
# Test infrastructure and the canonical source directory for tier-2
|
|
# files. Tests/ and conductor/tier2/ are project-controlled, not
|
|
# tier-2-sandbox-controlled, so the audit ignores them.
|
|
SKIP_TOP_DIRS = {"tests", "conductor"}
|
|
|
|
|
|
def load_patterns(config_path: Path) -> list[str]:
|
|
"""Load substring patterns from the denylist config.
|
|
|
|
Lines starting with '#' and blank lines are skipped. CR is stripped
|
|
(Windows line endings). Each remaining line is a substring to look
|
|
for in file paths.
|
|
"""
|
|
if not config_path.exists():
|
|
return []
|
|
patterns = []
|
|
for raw in config_path.read_text(encoding="utf-8").splitlines():
|
|
line = raw.rstrip("\r")
|
|
stripped = line.strip()
|
|
if not stripped or stripped.startswith("#"):
|
|
continue
|
|
patterns.append(stripped)
|
|
return patterns
|
|
|
|
|
|
def collect_leaks(repo_root: Path, patterns: list[str]) -> list[dict]:
|
|
"""Walk the working tree and return files matching any forbidden pattern.
|
|
|
|
Each entry: {"path": str (relative), "status": "untracked"|"modified"}.
|
|
"modified" = in HEAD but modified in working tree (leak drift in progress).
|
|
"untracked" = not in HEAD (a leak staged via git add but not committed yet,
|
|
OR a leak as a new untracked file).
|
|
|
|
Tracked-but-clean files are NOT reported. The main repo's
|
|
opencode.json, mcp_paths.toml, and other tracked forbidden patterns
|
|
are legitimate; they are not leaks. Only files that have been
|
|
MODIFIED locally (or are NEW) indicate sandbox drift.
|
|
"""
|
|
if not patterns:
|
|
return []
|
|
# Force git to operate ONLY on repo_root. By default, git searches
|
|
# upward for a parent .git/ directory; if repo_root happens to be a
|
|
# subdirectory of the parent repo (e.g., a tmp_path fixture inside
|
|
# the project tree), git would otherwise report the PARENT's modified
|
|
# files as if they belonged to repo_root. Pointing GIT_DIR at a
|
|
# non-existent path forces git commands to fail with a clear error,
|
|
# which we treat as 'no modifications' / 'no tracked files'.
|
|
import os
|
|
ceiling_env = {**os.environ, "GIT_DIR": str(repo_root.resolve() / ".git")}
|
|
# Get the set of modified-status from git. This avoids walking
|
|
# node_modules and other ignored directories ourselves.
|
|
try:
|
|
modified_proc = subprocess.run(
|
|
["git", "diff", "--name-only", "-z", "--no-renames"],
|
|
cwd=str(repo_root),
|
|
capture_output=True,
|
|
check=True,
|
|
env=ceiling_env,
|
|
)
|
|
modified = {
|
|
p.decode("utf-8") if isinstance(p, bytes) else p
|
|
for p in modified_proc.stdout.split(b"\0")
|
|
if p
|
|
}
|
|
except subprocess.CalledProcessError:
|
|
modified = set()
|
|
|
|
# Get tracked files for the untracked check (a path is untracked iff
|
|
# not in `git ls-files`).
|
|
try:
|
|
tracked_proc = subprocess.run(
|
|
["git", "ls-files", "-z"],
|
|
cwd=str(repo_root),
|
|
capture_output=True,
|
|
check=True,
|
|
env=ceiling_env,
|
|
)
|
|
tracked = {
|
|
p.decode("utf-8") if isinstance(p, bytes) else p
|
|
for p in tracked_proc.stdout.split(b"\0")
|
|
if p
|
|
}
|
|
except subprocess.CalledProcessError:
|
|
tracked = set()
|
|
|
|
leaks: list[dict] = []
|
|
# Scan modified files (tracked but changed in working tree)
|
|
for rel_path in sorted(modified):
|
|
if any(pat in rel_path for pat in patterns):
|
|
leaks.append({"path": rel_path, "status": "modified"})
|
|
|
|
# Walk the working tree to catch untracked leaks. We do this manually
|
|
# (rather than git ls-files --others --exclude-standard) to keep the
|
|
# SKIP_DIRS rules visible in this script.
|
|
for path in repo_root.rglob("*"):
|
|
if not path.is_file():
|
|
continue
|
|
rel = path.relative_to(repo_root).as_posix()
|
|
# Skip top-level project directories (tests, conductor) plus the
|
|
# standard ignored dirs.
|
|
parts = path.relative_to(repo_root).parts
|
|
if parts[0] in SKIP_TOP_DIRS:
|
|
continue
|
|
if any(part in SKIP_DIRS for part in parts):
|
|
continue
|
|
# Skip the pre-commit hook's temp file
|
|
if rel.startswith(".tier2_leaked_"):
|
|
continue
|
|
if rel in tracked:
|
|
continue # already handled above
|
|
if any(pat in rel for pat in patterns):
|
|
leaks.append({"path": rel, "status": "untracked"})
|
|
|
|
# De-duplicate (in case a path appears in multiple sources)
|
|
seen: set[str] = set()
|
|
unique: list[dict] = []
|
|
for leak in leaks:
|
|
if leak["path"] not in seen:
|
|
seen.add(leak["path"])
|
|
unique.append(leak)
|
|
return unique
|
|
|
|
|
|
def render_human(leaks: list[dict]) -> str:
|
|
"""Format the leak report for terminal output."""
|
|
if not leaks:
|
|
return "[OK] No tier-2 sandbox-only files detected in the working tree.\n"
|
|
out = [f"[LEAK] Found {len(leaks)} tier-2 sandbox-only file(s):", ""]
|
|
for leak in leaks:
|
|
out.append(f" {leak['status']:9s} {leak['path']}")
|
|
out.append("")
|
|
out.append("These files belong in the main repo only; they are modified by")
|
|
out.append("scripts/tier2/setup_tier2_clone.ps1 in the tier-2 clone.")
|
|
out.append("If committed, they would absorb the sandbox's local config drift.")
|
|
out.append("To remove from the working tree: git rm --cached <path>")
|
|
return "\n".join(out) + "\n"
|
|
|
|
|
|
def render_json(leaks: list[dict]) -> str:
|
|
"""Format the leak report as JSON for machine consumption."""
|
|
return json.dumps(
|
|
{
|
|
"files": leaks,
|
|
"summary": {
|
|
"total": len(leaks),
|
|
"untracked": sum(1 for l in leaks if l["status"] == "untracked"),
|
|
"modified": sum(1 for l in leaks if l["status"] == "modified"),
|
|
},
|
|
},
|
|
indent=2,
|
|
)
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
parser = argparse.ArgumentParser(description=__doc__.split("\n")[0])
|
|
parser.add_argument(
|
|
"--strict",
|
|
action="store_true",
|
|
help="Exit 1 if any leak is detected. Default: exit 0 (informational).",
|
|
)
|
|
parser.add_argument(
|
|
"--json",
|
|
action="store_true",
|
|
help="Emit machine-readable JSON instead of the human-readable report.",
|
|
)
|
|
args = parser.parse_args(argv)
|
|
|
|
repo_root = Path.cwd()
|
|
config_path = repo_root / CONFIG_REL
|
|
patterns = load_patterns(config_path)
|
|
if not patterns:
|
|
print(
|
|
f"warning: no forbidden patterns loaded from {config_path}; audit is a no-op.",
|
|
file=sys.stderr,
|
|
)
|
|
leaks: list[dict] = []
|
|
else:
|
|
leaks = collect_leaks(repo_root, patterns)
|
|
|
|
if args.json:
|
|
print(render_json(leaks))
|
|
else:
|
|
print(render_human(leaks), end="")
|
|
|
|
return 1 if (args.strict and leaks) else 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main()) |