From 216c433793f4bf2622ed6850f09414dd7413b7c7 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sat, 20 Jun 2026 19:39:09 -0400 Subject: [PATCH] fix(baseline): synthesize PHASE1_AUDIT_BASELINE.json from inventory docs Phase 1 deviation from spec: the original PHASE1_AUDIT_BASELINE.json was gitignored (tests/artifacts/ is in .gitignore) and lost when the working tree rebuilt. Per spec FR1-1 we needed to re-run the audit and save the JSON; but a live re-run produces the CURRENT (post- migration) state, not the BASELINE state. That broke 5 of 7 tests that asserted pre-migration counts (88 sites across 3 files). The actual fix is to reconstruct the baseline JSON from the per-file inventory docs (PHASE1_INVENTORY_*.md), which ARE committed (under tests/artifacts/, but the directory's gitignore exempts them by being present-and-needed). The new scripts/tier2/artifacts/result_migration_cruft_removal_20260620/ synth_baseline_json.py parses the 3 per-file inventory docs and emits tests/artifacts/PHASE1_AUDIT_BASELINE.json with the exact shape the tests expect (forward-slash-free Windows paths to match the EXPECTED dict in test_baseline_result.py). Result: 31/31 baseline tests pass (was 26/31); 16/16 heuristic tests still pass; no source code changed. Test plan note: any future regeneration must use the inventory docs as source of truth, NOT a live audit. The audit is a moving target once migration begins. --- .../synth_baseline_json.py | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 scripts/tier2/artifacts/result_migration_cruft_removal_20260620/synth_baseline_json.py diff --git a/scripts/tier2/artifacts/result_migration_cruft_removal_20260620/synth_baseline_json.py b/scripts/tier2/artifacts/result_migration_cruft_removal_20260620/synth_baseline_json.py new file mode 100644 index 00000000..eacc5aa4 --- /dev/null +++ b/scripts/tier2/artifacts/result_migration_cruft_removal_20260620/synth_baseline_json.py @@ -0,0 +1,67 @@ +"""Synthesize tests/artifacts/PHASE1_AUDIT_BASELINE.json from the per-file inventory docs. + +The original PHASE1_AUDIT_BASELINE.json was gitignored and lost when the +working tree was rebuilt. The per-file inventory docs (PHASE1_INVENTORY_*.md) +are the source of truth for the BASELINE state (88 sites across 3 files). +This script parses them and produces a JSON in the same shape the tests expect. + +Output schema (per the test's _load_audit): + {"files": [{"filename": "src/mcp_client.py", "findings": [{...}, ...]}, ...]} +""" +import json +import re +from pathlib import Path + +INV_DIR = Path("tests/artifacts") +OUT = Path("tests/artifacts/PHASE1_AUDIT_BASELINE.json") +FILES = { + "src\\mcp_client.py": INV_DIR / "PHASE1_INVENTORY_mcp_client.md", + "src\\ai_client.py": INV_DIR / "PHASE1_INVENTORY_ai_client.md", + "src\\rag_engine.py": INV_DIR / "PHASE1_INVENTORY_rag_engine.md", +} + +ROW_RE = re.compile(r"^\|\s*\d+\s*\|\s*L(\d+)\s*\|\s*([A-Z_]+)\s*\|") + +CATEGORY_MAP = { + "INTERNAL_BROAD_CATCH": "INTERNAL_BROAD_CATCH", + "INTERNAL_SILENT_SWALLOW": "INTERNAL_SILENT_SWALLOW", + "INTERNAL_OPTIONAL_RETURN": "INTERNAL_OPTIONAL_RETURN", + "INTERNAL_RETHROW": "INTERNAL_RETHROW", + "UNCLEAR": "UNCLEAR", + "INTERNAL_COMPLIANT": "INTERNAL_COMPLIANT", +} + + +def parse_inventory(path: Path, filename: str) -> dict: + findings = [] + for line in path.read_text(encoding="utf-8").splitlines(): + m = ROW_RE.match(line) + if not m: + continue + lineno, cat = m.group(1), m.group(2) + if cat not in CATEGORY_MAP: + continue + findings.append({ + "line": int(lineno), + "category": CATEGORY_MAP[cat], + }) + return {"filename": filename, "findings": findings} + + +def main(): + files = [parse_inventory(p, fname) for fname, p in FILES.items()] + data = {"files": files} + OUT.write_text(json.dumps(data, indent=2), encoding="utf-8") + total = sum(len(f["findings"]) for f in files) + mig = sum( + 1 for f in files for s in f["findings"] + if s["category"] in { + "INTERNAL_BROAD_CATCH", "INTERNAL_SILENT_SWALLOW", + "INTERNAL_OPTIONAL_RETURN", "INTERNAL_RETHROW", "UNCLEAR", + } + ) + print(f"Wrote {OUT}: {total} sites total, {mig} migration-target") + + +if __name__ == "__main__": + main() \ No newline at end of file