manual_slop/scripts/audit_no_models_config_io.py

"""Audit script: ensure no production code in src/ calls the models I/O primitives directly.

Architecture rule: AppController owns the config I/O. The
models.load_config_from_disk and models.save_config_to_disk
functions (formerly _load_config_from_disk and _save_config_to_disk)
are private file I/O primitives. Direct callers in src/ are an
architectural smell (bypassing the controller state owner). After
module_taxonomy_refactor_20260627 Phase 3b, they live in src/project.py
and are re-exported by src/models.py for backward compat. The same
audit rule still applies: only AppController should call them.

The only allowed call sites are inside AppController itself.

Usage:
    python scripts/audit_no_models_config_io.py            # human-readable report
    python scripts/audit_no_models_config_io.py --json     # JSON output for CI
    python scripts/audit_no_models_config_io.py --strict   # exit 1 on violations
"""
from __future__ import annotations
import argparse
import json
import os
import re
import sys
from pathlib import Path

# Patterns that are architectural smells in production code.
# These are the I/O primitives; only AppController should call them.
# Post-Phase 3b the names are public (load_config_from_disk /
# save_config_to_disk) but the architectural rule is unchanged.
FORBIDDEN_PATTERNS = [
    (re.compile(r"\bmodels\.load_config_from_disk\s*\("), "models.load_config_from_disk"),
    (re.compile(r"\bmodels\.save_config_to_disk\s*\("), "models.save_config_to_disk"),
    (re.compile(r"\bsrc\.project\.load_config_from_disk\s*\("), "src.project.load_config_from_disk"),
    (re.compile(r"\bsrc\.project\.save_config_to_disk\s*\("), "src.project.save_config_to_disk"),
]

# The OLD private names. After Phase 3b the private names are GONE;
# these patterns are kept to detect any stale call site.
LEGACY_PRIVATE_NAMES = [
    (re.compile(r"\bmodels\._load_config_from_disk\s*\("), "models._load_config_from_disk"),
    (re.compile(r"\bmodels\._save_config_to_disk\s*\("), "models._save_config_to_disk"),
]

# The OLD public names. After the rename these should not exist anywhere.
LEGACY_PUBLIC_NAMES = [
    (re.compile(r"\bmodels\.load_config\s*\("), "models.load_config"),
    (re.compile(r"\bmodels\.save_config\s*\("), "models.save_config"),
]

# Files where these calls are LEGITIMATE.
ALLOWED_FILES = {
    "src/app_controller.py",   # the only public owner of the I/O
    "src/models.py",           # the module that defines them
    "tests/test_models_no_top_level_tomli_w.py",  # tests lazy-load behavior
}

# Source roots to scan
SOURCE_ROOTS = ["src"]


def find_violations() -> list[dict[str, object]]:
    """Scan src/ for direct calls to the forbidden config I/O primitives."""
    violations: list[dict[str, object]] = []
    for root in SOURCE_ROOTS:
        if not os.path.isdir(root):
            continue
        for dirpath, _dirs, files in os.walk(root):
            for fname in files:
                if not fname.endswith(".py"):
                    continue
                path = os.path.join(dirpath, fname)
                # Normalize to forward slashes for matching
                norm = path.replace(os.sep, "/")
                if norm in ALLOWED_FILES:
                    continue
                with open(path, encoding="utf-8", errors="replace") as f:
                    src = f.read()
                docstring_lines = _docstring_lines(src)
                for lineno, line in enumerate(src.splitlines(), start=1):
                    if lineno in docstring_lines:
                        continue
                    stripped = line.lstrip()
                    if stripped.startswith("#"):
                        continue
                    for pattern, name in FORBIDDEN_PATTERNS:
                        if pattern.search(line):
                            violations.append({
                                "file": path,
                                "line": lineno,
                                "pattern": name,
                                "text": line.rstrip(),
                                "severity": "error",
                            })
                    for pattern, name in LEGACY_NAMES:
                        if pattern.search(line):
                            violations.append({
                                "file": path,
                                "line": lineno,
                                "pattern": name,
                                "text": line.rstrip(),
                                "severity": "error",
                            })
    return violations


def _docstring_lines(src: str) -> set[int]:
    """Return a set of 1-based line numbers that are inside a docstring.

    Uses the AST to find module/class/function docstrings, then expands
    the string node's line range. Multi-line strings are included in
    full so any code-looking text inside them is ignored.
    """
    import ast
    lines: set[int] = set()
    try:
        tree = ast.parse(src)
    except SyntaxError:
        return lines
    for node in ast.walk(tree):
        if not isinstance(node, (ast.Module, ast.FunctionDef, ast.AsyncFunctionDef,
                                 ast.ClassDef)):
            continue
        doc = ast.get_docstring(node, clean=False)
        if not doc or not node.body:
            continue
        first = node.body[0]
        if not isinstance(first, ast.Expr) or not isinstance(first.value, ast.Constant):
            continue
        if not isinstance(first.value.value, str):
            continue
        start = first.lineno
        end = getattr(first, "end_lineno", None) or start
        for ln in range(start, end + 1):
            lines.add(ln)
    return lines


def main() -> int:
    parser = argparse.ArgumentParser(
        description="Audit for direct calls to models config I/O primitives in src/"
    )
    parser.add_argument("--json", action="store_true", help="JSON output for CI")
    parser.add_argument("--strict", action="store_true", help="Exit 1 on any violation")
    args = parser.parse_args()

    violations = find_violations()

    if args.json:
        print(json.dumps({"violations": violations, "count": len(violations)}, indent=2))
    else:
        print("=" * 70)
        print("Architectural audit: models config I/O usage in src/")
        print("=" * 70)
        print()
        print("Rule: AppController owns config I/O. Direct calls to")
        print("  - models._load_config_from_disk(...)")
        print("  - models._save_config_to_disk(...)")
        print("  - models.load_config(...)  (legacy)")
        print("  - models.save_config(...)  (legacy)")
        print("from outside AppController are architectural smells.")
        print()
        print(f"Allowed call sites: {sorted(ALLOWED_FILES)}")
        print()
        if not violations:
            print("OK - no violations found.")
        else:
            print(f"Found {len(violations)} violation(s):")
            print()
            for v in violations:
                print(f"  {v['file']}:{v['line']}: {v['pattern']}")
                print(f"    {v['text']}")
                print()

    if args.strict and violations:
        return 1
    return 0


if __name__ == "__main__":
    sys.exit(main())