diff --git a/scripts/tier2/artifacts/result_migration_cruft_removal_20260620/synth_baseline_json.py b/scripts/tier2/artifacts/result_migration_cruft_removal_20260620/synth_baseline_json.py new file mode 100644 index 00000000..eacc5aa4 --- /dev/null +++ b/scripts/tier2/artifacts/result_migration_cruft_removal_20260620/synth_baseline_json.py @@ -0,0 +1,67 @@ +"""Synthesize tests/artifacts/PHASE1_AUDIT_BASELINE.json from the per-file inventory docs. + +The original PHASE1_AUDIT_BASELINE.json was gitignored and lost when the +working tree was rebuilt. The per-file inventory docs (PHASE1_INVENTORY_*.md) +are the source of truth for the BASELINE state (88 sites across 3 files). +This script parses them and produces a JSON in the same shape the tests expect. + +Output schema (per the test's _load_audit): + {"files": [{"filename": "src/mcp_client.py", "findings": [{...}, ...]}, ...]} +""" +import json +import re +from pathlib import Path + +INV_DIR = Path("tests/artifacts") +OUT = Path("tests/artifacts/PHASE1_AUDIT_BASELINE.json") +FILES = { + "src\\mcp_client.py": INV_DIR / "PHASE1_INVENTORY_mcp_client.md", + "src\\ai_client.py": INV_DIR / "PHASE1_INVENTORY_ai_client.md", + "src\\rag_engine.py": INV_DIR / "PHASE1_INVENTORY_rag_engine.md", +} + +ROW_RE = re.compile(r"^\|\s*\d+\s*\|\s*L(\d+)\s*\|\s*([A-Z_]+)\s*\|") + +CATEGORY_MAP = { + "INTERNAL_BROAD_CATCH": "INTERNAL_BROAD_CATCH", + "INTERNAL_SILENT_SWALLOW": "INTERNAL_SILENT_SWALLOW", + "INTERNAL_OPTIONAL_RETURN": "INTERNAL_OPTIONAL_RETURN", + "INTERNAL_RETHROW": "INTERNAL_RETHROW", + "UNCLEAR": "UNCLEAR", + "INTERNAL_COMPLIANT": "INTERNAL_COMPLIANT", +} + + +def parse_inventory(path: Path, filename: str) -> dict: + findings = [] + for line in path.read_text(encoding="utf-8").splitlines(): + m = ROW_RE.match(line) + if not m: + continue + lineno, cat = m.group(1), m.group(2) + if cat not in CATEGORY_MAP: + continue + findings.append({ + "line": int(lineno), + "category": CATEGORY_MAP[cat], + }) + return {"filename": filename, "findings": findings} + + +def main(): + files = [parse_inventory(p, fname) for fname, p in FILES.items()] + data = {"files": files} + OUT.write_text(json.dumps(data, indent=2), encoding="utf-8") + total = sum(len(f["findings"]) for f in files) + mig = sum( + 1 for f in files for s in f["findings"] + if s["category"] in { + "INTERNAL_BROAD_CATCH", "INTERNAL_SILENT_SWALLOW", + "INTERNAL_OPTIONAL_RETURN", "INTERNAL_RETHROW", "UNCLEAR", + } + ) + print(f"Wrote {OUT}: {total} sites total, {mig} migration-target") + + +if __name__ == "__main__": + main() \ No newline at end of file