Private
Public Access
0
0
Files
manual_slop/tests/test_test_sandbox.py
T
ed 7825617476 fix(app_controller): defensive _flush_to_project + RuntimeError in fallback save
Three fixes addressing FR1 audit-hook RuntimeError leaking through
production save paths:

1. src/app_controller.py:_load_active_project fallback save: add
   RuntimeError to the caught exception list. The FR1 audit hook raises
   'TEST_SANDBOX_VIOLATION...' as RuntimeError when a test tries to
   write outside ./tests/. Without this catch, tests that do
   App() / AppController() directly (without setting active_project_path)
   crash with the raw FR1 violation instead of being skipped silently.

2. src/app_controller.py:_flush_to_project: skip save when
   active_project_path is empty (the load_active_project fallback may
   have set it to ''). Wrap the save in try/except to silently skip
   RuntimeError/IOError/OSError/PermissionError so tests that mock
   imgui.button to return truthy don't accidentally trigger a write
   to CWD that FR1 blocks.

3. scripts/audit_no_temp_writes.py: add scripts/audit_test_sandbox_violations.py
   to EXCLUDE_FILES. The audit's pattern matches its own docstring
   references to tempfile (line 15) and its regex pattern (line 45),
   producing false positives in the strict-mode CI gate.

Test updates for v3 paths-aware behavior:
- tests/test_app_controller_mcp.py: replace SLOP_CONFIG env var with
  explicit paths.initialize_paths(config_file); add [paths] section
  with logs_dir/scripts_dir under tmp_path so session_logger doesn't
  try to write to <project_root>/logs/sessions (FR1 violation).
- tests/test_external_mcp_e2e.py: same pattern.
- tests/test_test_sandbox.py::test_config_overrides_toml_has_paths_section:
  find the workspace whose config_overrides.toml actually has a [paths]
  section (filter by content, not just by mtime). The batched runner
  spawns one pytest per batch, each with its own _RUN_ID, leaving
  many stale half-created workspaces; the old 'sort by mtime' logic
  picked a workspace with a 'test_key' section from a prior test,
  not the [paths] section from isolate_workspace.

After this commit:
- All 11 tier batches PASS in the Tier 2 clone (344 test files, ~14 min)
- Tier 1: 5/5 PASS (was 0/5 before this track started)
- Tier 2: 5/5 PASS
- Tier 3: 1/1 PASS (live_gui fixture stays alive)
2026-06-19 14:25:53 -04:00

429 lines
17 KiB
Python

"""Tests for scripts/audit_test_sandbox_violations.py (Phase 2, FR4) and
the Python audit guard in tests/conftest.py (Phase 3, FR1).
"""
from __future__ import annotations
import os
import re
import subprocess
import sys
from pathlib import Path
import pytest
def test_audit_runs_without_error() -> None:
"""The audit script runs and exits cleanly."""
result = subprocess.run(
[sys.executable, "scripts/audit_test_sandbox_violations.py"],
capture_output=True, text=True, cwd=str(Path(__file__).resolve().parent.parent)
)
assert result.returncode in (0, 1), f"Unexpected exit code: {result.returncode}"
def test_audit_flags_toml_basename_pattern() -> None:
"""A test source line with Path('manual_slop.toml') is flagged by the pattern."""
pattern = re.compile(r'Path\(["\'](?:manual_slop|config|credentials|presets|personas|tool_presets|workspace_profiles|project|manualslop_layout|manual_slop_history)\.toml["\']')
assert pattern.search('Path("manual_slop.toml").write_text("x")'), "Pattern should match"
def test_audit_flags_project_root_path() -> None:
"""A test source line with Path('C:/projects/...') is flagged."""
pattern = re.compile(r'Path\(["\']C:[/\\]+projects')
assert pattern.search('base_dir = Path("C:/projects/test")'), "Pattern should match"
def test_audit_flags_tempfile_mkdtemp() -> None:
"""A test source line with bare tempfile.mkdtemp() is flagged."""
pattern = re.compile(r"tempfile\.mk(?:dt|st)emp\(")
assert pattern.search('tmp = tempfile.mkdtemp()'), "Pattern should match"
assert pattern.search('tmp = tempfile.mkstemp()'), "Pattern should match"
def test_audit_flags_tests_artifacts_literal() -> None:
"""A test source line with Path('tests/artifacts/...') literal is flagged."""
pattern = re.compile(r'Path\(["\']tests/artifacts/')
assert pattern.search('p = Path("tests/artifacts/some_file.txt")'), "Pattern should match"
def test_audit_passes_clean_file() -> None:
"""A test source line using tmp_path passes the audit patterns."""
content = 'tmp_path.joinpath("foo.txt").write_text("x")\n'
patterns = [
re.compile(r'Path\(["\'](?:manual_slop|config)\.toml["\']'),
re.compile(r'Path\(["\']C:[/\\]+projects'),
re.compile(r'Path\(["\']tests/artifacts/'),
re.compile(r"tempfile\.mk(?:dt|st)emp\("),
]
for p in patterns:
assert not p.search(content), f"Pattern {p.pattern} should not match clean content"
def test_audit_subprocess_clean_dir_exits_zero() -> None:
"""The audit returns 0 on a clean test directory."""
tmp_dir = Path("tests/artifacts/_audit_subprocess_clean")
tmp_dir.mkdir(parents=True, exist_ok=True)
good = tmp_dir / "test_good.py"
good.write_text("def test_x(tmp_path): tmp_path.joinpath('f').write_text('x')\n", encoding="utf-8")
try:
result = subprocess.run(
[sys.executable, "scripts/audit_test_sandbox_violations.py", "--tests-dir", str(tmp_dir), "--strict"],
capture_output=True, text=True,
)
assert result.returncode == 0, f"Expected exit 0, got {result.returncode}: {result.stdout}"
finally:
good.unlink(missing_ok=True)
tmp_dir.rmdir()
def test_audit_subprocess_bad_dir_exits_one() -> None:
"""The audit returns 1 on a directory with a bad pattern."""
tmp_dir = Path("tests/artifacts/_audit_subprocess_bad")
tmp_dir.mkdir(parents=True, exist_ok=True)
bad = tmp_dir / "test_bad.py"
bad.write_text('Path("manual_slop.toml").write_text("x")\n', encoding="utf-8")
try:
result = subprocess.run(
[sys.executable, "scripts/audit_test_sandbox_violations.py", "--tests-dir", str(tmp_dir), "--strict"],
capture_output=True, text=True,
)
assert result.returncode == 1, f"Expected exit 1, got {result.returncode}"
finally:
bad.unlink(missing_ok=True)
tmp_dir.rmdir()
def test_sandbox_blocks_writes_outside_tests_dir() -> None:
"""A write to <project_root>/manual_slop.toml raises TEST_SANDBOX_VIOLATION.
Per Python's sys.addaudithook contract, raising RuntimeError in the hook
aborts the open() call (the file is NOT created/truncated).
[C: tests/conftest.py:_sandbox_audit_hook]"""
bad_path = Path(__file__).resolve().parent.parent / "_test_sandbox_probe.txt"
try:
with pytest.raises(RuntimeError, match="TEST_SANDBOX"):
bad_path.write_text("corrupt", encoding="utf-8")
assert not bad_path.exists(), (
f"TEST_SANDBOX_VIOLATION: file {bad_path} should NOT have been created"
)
finally:
if bad_path.exists():
bad_path.unlink()
def test_sandbox_allows_writes_inside_tests_dir(tmp_path) -> None:
"""A write to tmp_path (which lives under tests/artifacts/_pytest_tmp) succeeds."""
target = tmp_path / "foo.txt"
target.write_text("ok", encoding="utf-8")
assert target.read_text(encoding="utf-8") == "ok"
def test_sandbox_allows_writes_inside_tests_artifacts() -> None:
"""A write to tests/artifacts/_sandbox_test_allows/foo.txt succeeds."""
p = Path("tests/artifacts/_sandbox_test_allows/foo.txt")
p.parent.mkdir(parents=True, exist_ok=True)
try:
p.write_text("ok", encoding="utf-8")
assert p.read_text(encoding="utf-8") == "ok"
finally:
p.unlink(missing_ok=True)
if p.parent.exists():
p.parent.rmdir()
def test_sandbox_does_not_block_reads() -> None:
"""A read of <project_root>/pyproject.toml succeeds (reads are always allowed)."""
pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml"
content = pyproject.read_text(encoding="utf-8")
assert "[tool.pytest.ini_options]" in content
def test_sandbox_allows_pytest_cache_write() -> None:
"""Writes under .pytest_cache are allowed (pytest internal cache)."""
cache_root = Path(__file__).resolve().parent.parent / ".pytest_cache"
probe = cache_root / "_sandbox_probe.txt"
cache_root.mkdir(parents=True, exist_ok=True)
try:
probe.write_text("ok", encoding="utf-8")
assert probe.read_text(encoding="utf-8") == "ok"
finally:
probe.unlink(missing_ok=True)
def test_config_override_via_cli_flag(tmp_path) -> None:
"""paths.initialize_paths(config_path) makes all path getters return paths
rooted at config_path's [paths] section (or default).
[C: src/paths.py:initialize_paths, sloppy.py:main]"""
from src import paths
config_path = tmp_path / "my_config.toml"
config_path.write_text("[ai]\nprovider='gemini'\n", encoding="utf-8")
paths.initialize_paths(config_path)
try:
assert paths.get_config_path() == config_path
assert paths.get_global_presets_path().name == "presets.toml"
assert paths.get_logs_dir().name == "sessions"
finally:
paths.reset_paths()
def test_paths_runtime_refresh_atomic_swap(tmp_path) -> None:
"""Calling initialize_paths() a second time atomically swaps the singleton.
Reader threads see the new config immediately. The PathsConfig is frozen.
[C: src/paths.py:initialize_paths, src/paths.py:PathsConfig]"""
from src import paths
cfg_a = tmp_path / "a.toml"
cfg_b = tmp_path / "b.toml"
cfg_a.write_text("[ai]\nprovider='gemini-a'\n", encoding="utf-8")
cfg_b.write_text("[ai]\nprovider='gemini-b'\n", encoding="utf-8")
paths.initialize_paths(cfg_a)
assert paths.get_config_path() == cfg_a
paths.initialize_paths(cfg_b)
assert paths.get_config_path() == cfg_b
paths.reset_paths()
def test_paths_uninitialized_raises(tmp_path) -> None:
"""After explicit paths.reset_paths() (i.e., user CLEARED the singleton
after a previous init), a getter raises RuntimeError. This is the
"bad programmer" detection — once cleared, you must re-init.
[C: src/paths.py:_cfg]"""
from src import paths
paths.initialize_paths(tmp_path / "dummy.toml")
paths.reset_paths()
with pytest.raises(RuntimeError, match="not initialized"):
paths.get_logs_dir()
def test_paths_module_load_initializes_defaults(tmp_path) -> None:
"""src/paths.py initializes _PATHS_CONFIG with defaults at module load.
This means subprocess imports that don't go through conftest.py (e.g.,
_run_in_subprocess tests) still have valid paths for any src/* module
that triggers a paths getter at import time (e.g., theme_2.load_themes).
[C: src/paths.py:_module_init_default]"""
import importlib
import src.paths as paths_module
# Reload to simulate fresh module load in subprocess
importlib.reload(paths_module)
# After module reload, defaults should be set
assert paths_module._PATHS_CONFIG is not None, (
"src.paths must initialize _PATHS_CONFIG at module load "
"so subprocess imports don't trigger 'paths not initialized' errors."
)
default_logs = paths_module._PATHS_CONFIG.logs_dir
assert default_logs.name == "sessions", (
f"default logs_dir should end in 'sessions'; got {default_logs}"
)
def test_sloppy_py_parses_config_flag() -> None:
"""sloppy.py has a --config argparse argument that calls initialize_paths."""
import ast
sloppy = Path(__file__).resolve().parent.parent / "sloppy.py"
tree = ast.parse(sloppy.read_text(encoding="utf-8"))
found_config_arg = False
found_init_call = False
for node in ast.walk(tree):
if isinstance(node, ast.Constant) and node.value == "--config":
found_config_arg = True
if isinstance(node, ast.Call):
func = node.func
if isinstance(func, ast.Name) and func.id == "initialize_paths":
found_init_call = True
assert found_config_arg, "sloppy.py must have a --config argparse argument"
assert found_init_call, "sloppy.py must call paths.initialize_paths(args.config)"
def test_pyproject_toml_basetemp_is_under_tests() -> None:
"""pyproject.toml contains --basetemp=tests/artifacts/_pytest_tmp."""
pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml"
text = pyproject.read_text(encoding="utf-8")
assert "--basetemp=tests/artifacts/_pytest_tmp" in text, (
"pyproject.toml must set addopts = '--basetemp=tests/artifacts/_pytest_tmp' "
"so the FR1 runtime guard's allowlist can be a single rule."
)
def test_isolate_workspace_does_not_use_tmp_path_factory_for_infra() -> None:
"""isolate_workspace fixture does not use tmp_path_factory.mktemp."""
import ast
conftest = Path(__file__).resolve().parent / "conftest.py"
tree = ast.parse(conftest.read_text(encoding="utf-8"))
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name == "isolate_workspace":
body = node.body
if body and isinstance(body[0], ast.Expr) and isinstance(body[0].value, ast.Constant):
body = body[1:]
body_src = "\n".join(ast.unparse(stmt) for stmt in body)
assert "tmp_path_factory.mktemp" not in body_src, (
"isolate_workspace must not use tmp_path_factory.mktemp; "
"use _ISOLATION_WORKSPACE under tests/artifacts/ instead."
)
assert "_ISOLATION_WORKSPACE" in body_src, (
"isolate_workspace should reference _ISOLATION_WORKSPACE"
)
return
raise AssertionError("isolate_workspace fixture not found in conftest.py")
def test_appcontroller_init_does_not_load_config() -> None:
"""AppController.__init__ must not call init_state() or load_config() —
fixtures apply before App.__init__; loading config in AppController.__init__
would race against the autouse isolate_workspace."""
import ast
app_controller = Path(__file__).resolve().parent.parent / "src" / "app_controller.py"
tree = ast.parse(app_controller.read_text(encoding="utf-8"))
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name == "__init__":
src = ast.unparse(node)
assert "init_state()" not in src, (
"AppController.__init__ must not call init_state() "
"(this would trigger config reads before fixtures apply)"
)
assert "load_config()" not in src, (
"AppController.__init__ must not call load_config() "
"(this would trigger config reads before fixtures apply)"
)
return
raise AssertionError("AppController.__init__ not found")
def test_config_overrides_toml_has_paths_section() -> None:
"""The auto-generated config_overrides.toml must include a [paths] section
that overrides every path getter to point inside _ISOLATION_WORKSPACE.
This is the v2 design (FR2 + per-path routing via config.toml, no env vars).
[C: tests/conftest.py:isolate_workspace]"""
import tomllib
# Find the most recent workspace whose config_overrides.toml contains
# a [paths] section. Multiple workspaces may exist from prior runs
# (the batched runner spawns one pytest per batch, each with its own
# _RUN_ID; some workspaces may be half-created stubs from crashed runs).
candidates = sorted(
Path("tests/artifacts").glob("_isolation_workspace_*"),
key=lambda p: p.stat().st_mtime,
)
chosen = None
for ws in reversed(candidates):
cfg_path = ws / "config_overrides.toml"
if not cfg_path.exists():
continue
try:
with open(cfg_path, "rb") as f:
cfg = tomllib.load(f)
except Exception:
continue
if "paths" in cfg:
chosen = ws
break
assert chosen is not None, (
f"no isolation workspace with [paths] section in config_overrides.toml; "
f"candidates: {[str(c) for c in candidates]}"
)
config_file = chosen / "config_overrides.toml"
with open(config_file, "rb") as f:
cfg = tomllib.load(f)
paths = cfg["paths"]
expected_keys = {
"presets", "tool_presets", "personas", "themes",
"workspace_profiles", "credentials", "logs_dir", "scripts_dir",
}
missing = expected_keys - set(paths.keys())
assert not missing, f"missing [paths] keys: {missing}"
for key, value in paths.items():
assert str(chosen) in str(value), (
f"[paths].{key} = '{value}' does not point inside {chosen}"
)
def test_path_getters_are_trivial_field_access() -> None:
"""Every global path getter in src/paths.py is a trivial field access on the
PathsConfig singleton (return _cfg().<field>). They must NOT do file I/O
or call _resolve_path() (which is internal-only, called from initialize_paths).
This enforces the v3 design: explicit init at startup, trivial getters."""
import ast
paths_py = Path(__file__).resolve().parent.parent / "src" / "paths.py"
tree = ast.parse(paths_py.read_text(encoding="utf-8"))
trivial_getters = [
"get_global_presets_path", "get_global_tool_presets_path",
"get_global_personas_path", "get_global_themes_path",
"get_global_workspace_profiles_path", "get_credentials_path",
"get_logs_dir", "get_scripts_dir",
]
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name in trivial_getters:
src = ast.unparse(node)
assert "_cfg()" in src, (
f"{node.name} must call _cfg() to read from the PathsConfig singleton; "
f"got direct file I/O or env var lookup instead."
)
assert "_resolve_path(" not in src, (
f"{node.name} must NOT call _resolve_path(); that's internal-only "
f"(called once from initialize_paths)."
)
assert "os.environ" not in src, (
f"{node.name} must NOT use os.environ directly; env vars are read once "
f"during initialize_paths()."
)
def test_initialize_paths_thread_safe_atomic_swap(tmp_path) -> None:
"""initialize_paths() uses an RLock; concurrent swaps don't corrupt the
singleton. Reader threads always see a consistent PathsConfig snapshot
(frozen dataclass). Per the user's "bad programmers take shortcuts" rule:
no torn writes, no partial reads.
[C: src/paths.py:_PATHS_LOCK, src/paths.py:PathsConfig]"""
import threading
import tomllib
from src import paths
cfg_a = tmp_path / "a.toml"
cfg_b = tmp_path / "b.toml"
cfg_a.write_bytes(b"[paths]\nlogs_dir = 'C:/tmp/thread_a'\n")
cfg_b.write_bytes(b"[paths]\nlogs_dir = 'C:/tmp/thread_b'\n")
errors = []
def swap(cfg, n):
for _ in range(n):
try:
paths.initialize_paths(cfg)
except Exception as e:
errors.append(e)
t1 = threading.Thread(target=swap, args=(cfg_a, 100))
t2 = threading.Thread(target=swap, args=(cfg_b, 100))
t1.start(); t2.start()
t1.join(); t2.join()
assert not errors, f"thread-safety violation: {errors}"
paths.reset_paths()
def test_pathsconfig_is_frozen_dataclass() -> None:
"""PathsConfig uses @dataclass(frozen=True) so individual field reads are
atomic and cannot be mutated by readers. Per user directive: gated
transactions, no data race over config.
[C: src/paths.py:PathsConfig]"""
import dataclasses
import tempfile, tomli_w
from src import paths
params = getattr(paths.PathsConfig, "__dataclass_params__", None)
assert params is not None, "PathsConfig must be a dataclass"
assert params.frozen is True, "PathsConfig must be @dataclass(frozen=True)"
with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="wb") as f:
tomli_w.dump({"paths": {"logs_dir": "C:/tmp/frozen_test"}}, f)
cfg = Path(f.name)
try:
paths.initialize_paths(cfg)
with pytest.raises(dataclasses.FrozenInstanceError):
paths._PATHS_CONFIG.presets = Path("/tmp/should_fail")
finally:
paths.reset_paths()
cfg.unlink()
@pytest.mark.skipif(os.name != "nt", reason="Windows-only sandbox wrapper")
def test_run_tests_sandboxed_whatif() -> None:
"""pwsh -File scripts/run_tests_sandboxed.ps1 -WhatIf exits 0 without
acquiring a restricted token or launching pytest.
[C: scripts/run_tests_sandboxed.ps1]"""
result = subprocess.run(
["pwsh", "-File", "scripts/run_tests_sandboxed.ps1", "-WhatIf"],
capture_output=True, text=True, timeout=30,
)
assert result.returncode == 0, (
f"Expected exit 0, got {result.returncode}: {result.stderr}"
)
assert "whatif" in result.stdout.lower() or "[run-tests-sandboxed-whatif]" in result.stdout