Private
Public Access
0
0
Files
manual_slop/scripts/audit_no_models_config_io.py
T
ed dcc82ed781 fix(audit): use LEGACY_PRIVATE_NAMES + LEGACY_PUBLIC_NAMES in audit_no_models_config_io
Per post_module_taxonomy_de_cruft_20260627 Phase 0a (FR1). The audit
script's find_violations() function iterated over 'LEGACY_NAMES' but
only LEGACY_PRIVATE_NAMES + LEGACY_PUBLIC_NAMES were defined (the
single LEGACY_NAMES was split into two in module_taxonomy_refactor
Phase 3b but the function reference wasn't updated). This caused a
NameError that crashed the audit with --strict mode.

The spec claimed the bug was in scripts/generate_type_registry.py but
that was a misdiagnosis. generate_type_registry.py works correctly
(verified: 'Registry in sync (29 files checked)'). The actual bug was
in audit_no_models_config_io.py.

This commit:
 - Updates line 95: 'for pattern, name in LEGACY_NAMES:' ->
   'for pattern, name in LEGACY_PRIVATE_NAMES + LEGACY_PUBLIC_NAMES:'
 - The function now iterates over both legacy name lists (private +
   public), matching the actual variables defined in the file.

Verification: VC3 (audit_no_models_config_io passes --strict)
 uv run python scripts/audit_no_models_config_io.py --strict
 # Output: 'OK - no violations found.'
2026-06-26 14:18:34 -04:00

182 lines
7.1 KiB
Python

"""Audit script: ensure no production code in src/ calls the models I/O primitives directly.
Architecture rule: AppController owns the config I/O. The
models.load_config_from_disk and models.save_config_to_disk
functions (formerly _load_config_from_disk and _save_config_to_disk)
are private file I/O primitives. Direct callers in src/ are an
architectural smell (bypassing the controller state owner). After
module_taxonomy_refactor_20260627 Phase 3b, they live in src/project.py
and are re-exported by src/models.py for backward compat. The same
audit rule still applies: only AppController should call them.
The only allowed call sites are inside AppController itself.
Usage:
python scripts/audit_no_models_config_io.py # human-readable report
python scripts/audit_no_models_config_io.py --json # JSON output for CI
python scripts/audit_no_models_config_io.py --strict # exit 1 on violations
"""
from __future__ import annotations
import argparse
import json
import os
import re
import sys
from pathlib import Path
# Patterns that are architectural smells in production code.
# These are the I/O primitives; only AppController should call them.
# Post-Phase 3b the names are public (load_config_from_disk /
# save_config_to_disk) but the architectural rule is unchanged.
FORBIDDEN_PATTERNS = [
(re.compile(r"\bmodels\.load_config_from_disk\s*\("), "models.load_config_from_disk"),
(re.compile(r"\bmodels\.save_config_to_disk\s*\("), "models.save_config_to_disk"),
(re.compile(r"\bsrc\.project\.load_config_from_disk\s*\("), "src.project.load_config_from_disk"),
(re.compile(r"\bsrc\.project\.save_config_to_disk\s*\("), "src.project.save_config_to_disk"),
]
# The OLD private names. After Phase 3b the private names are GONE;
# these patterns are kept to detect any stale call site.
LEGACY_PRIVATE_NAMES = [
(re.compile(r"\bmodels\._load_config_from_disk\s*\("), "models._load_config_from_disk"),
(re.compile(r"\bmodels\._save_config_to_disk\s*\("), "models._save_config_to_disk"),
]
# The OLD public names. After the rename these should not exist anywhere.
LEGACY_PUBLIC_NAMES = [
(re.compile(r"\bmodels\.load_config\s*\("), "models.load_config"),
(re.compile(r"\bmodels\.save_config\s*\("), "models.save_config"),
]
# Files where these calls are LEGITIMATE.
ALLOWED_FILES = {
"src/app_controller.py", # the only public owner of the I/O
"src/models.py", # the module that defines them
"tests/test_models_no_top_level_tomli_w.py", # tests lazy-load behavior
}
# Source roots to scan
SOURCE_ROOTS = ["src"]
def find_violations() -> list[dict[str, object]]:
"""Scan src/ for direct calls to the forbidden config I/O primitives."""
violations: list[dict[str, object]] = []
for root in SOURCE_ROOTS:
if not os.path.isdir(root):
continue
for dirpath, _dirs, files in os.walk(root):
for fname in files:
if not fname.endswith(".py"):
continue
path = os.path.join(dirpath, fname)
# Normalize to forward slashes for matching
norm = path.replace(os.sep, "/")
if norm in ALLOWED_FILES:
continue
with open(path, encoding="utf-8", errors="replace") as f:
src = f.read()
docstring_lines = _docstring_lines(src)
for lineno, line in enumerate(src.splitlines(), start=1):
if lineno in docstring_lines:
continue
stripped = line.lstrip()
if stripped.startswith("#"):
continue
for pattern, name in FORBIDDEN_PATTERNS:
if pattern.search(line):
violations.append({
"file": path,
"line": lineno,
"pattern": name,
"text": line.rstrip(),
"severity": "error",
})
for pattern, name in LEGACY_PRIVATE_NAMES + LEGACY_PUBLIC_NAMES:
if pattern.search(line):
violations.append({
"file": path,
"line": lineno,
"pattern": name,
"text": line.rstrip(),
"severity": "error",
})
return violations
def _docstring_lines(src: str) -> set[int]:
"""Return a set of 1-based line numbers that are inside a docstring.
Uses the AST to find module/class/function docstrings, then expands
the string node's line range. Multi-line strings are included in
full so any code-looking text inside them is ignored.
"""
import ast
lines: set[int] = set()
try:
tree = ast.parse(src)
except SyntaxError:
return lines
for node in ast.walk(tree):
if not isinstance(node, (ast.Module, ast.FunctionDef, ast.AsyncFunctionDef,
ast.ClassDef)):
continue
doc = ast.get_docstring(node, clean=False)
if not doc or not node.body:
continue
first = node.body[0]
if not isinstance(first, ast.Expr) or not isinstance(first.value, ast.Constant):
continue
if not isinstance(first.value.value, str):
continue
start = first.lineno
end = getattr(first, "end_lineno", None) or start
for ln in range(start, end + 1):
lines.add(ln)
return lines
def main() -> int:
parser = argparse.ArgumentParser(
description="Audit for direct calls to models config I/O primitives in src/"
)
parser.add_argument("--json", action="store_true", help="JSON output for CI")
parser.add_argument("--strict", action="store_true", help="Exit 1 on any violation")
args = parser.parse_args()
violations = find_violations()
if args.json:
print(json.dumps({"violations": violations, "count": len(violations)}, indent=2))
else:
print("=" * 70)
print("Architectural audit: models config I/O usage in src/")
print("=" * 70)
print()
print("Rule: AppController owns config I/O. Direct calls to")
print(" - models._load_config_from_disk(...)")
print(" - models._save_config_to_disk(...)")
print(" - models.load_config(...) (legacy)")
print(" - models.save_config(...) (legacy)")
print("from outside AppController are architectural smells.")
print()
print(f"Allowed call sites: {sorted(ALLOWED_FILES)}")
print()
if not violations:
print("OK - no violations found.")
else:
print(f"Found {len(violations)} violation(s):")
print()
for v in violations:
print(f" {v['file']}:{v['line']}: {v['pattern']}")
print(f" {v['text']}")
print()
if args.strict and violations:
return 1
return 0
if __name__ == "__main__":
sys.exit(main())