Private
Public Access
0
0

conductor(chronology): Phase 8 bulk verification + cross-check helpers (FR6)

This commit is contained in:
2026-06-20 17:57:05 -04:00
parent d24e5120fa
commit 271e689528
4 changed files with 178 additions and 3 deletions
+69
View File
@@ -0,0 +1,69 @@
"""Bulk cross-check of chronology.md rows.
Run from repo root: uv run python scripts/audit/check_chronology_rows.py
"""
from __future__ import annotations
import subprocess
import sys
from pathlib import Path
# Add repo root to path so we can import the helper
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
from scripts.audit.generate_chronology import walk_track_folders # noqa: E402
rows = walk_track_folders(Path("conductor"))
errors: list[str] = []
checked = 0
for i, row in enumerate(rows):
checked += 1
folder_relpath = row["folder_link"]
track_id = row["track_id"]
folder = Path(folder_relpath)
if not folder.is_dir():
errors.append(f"Row {i+2} [{track_id}]: folder does not exist: {folder_relpath}")
continue
try:
result = subprocess.run(
["git", "log", "--reverse", "--format=%h", "--", folder_relpath],
capture_output=True, text=True, timeout=30, check=False,
)
actual_init = result.stdout.strip().splitlines()[0] if result.stdout.strip() else ""
if row["init_sha"] != actual_init:
errors.append(
f"Row {i+2} [{track_id}]: init_sha mismatch: row={row['init_sha']!r} actual={actual_init!r}"
)
except Exception as exc:
errors.append(f"Row {i+2} [{track_id}]: init_sha check failed: {exc}")
try:
result = subprocess.run(
["git", "log", "-1", "--format=%h", "--", folder_relpath],
capture_output=True, text=True, timeout=30, check=False,
)
actual_end = result.stdout.strip()
if row["end_sha"] != actual_end:
errors.append(
f"Row {i+2} [{track_id}]: end_sha mismatch: row={row['end_sha']!r} actual={actual_end!r}"
)
except Exception as exc:
errors.append(f"Row {i+2} [{track_id}]: end_sha check failed: {exc}")
date = row["date"]
if date and not (len(date) == 10 and date[4] == "-" and date[7] == "-"):
errors.append(f"Row {i+2} [{track_id}]: bad date format: {date!r}")
if not row["status"]:
errors.append(f"Row {i+2} [{track_id}]: empty status")
if not row["summary"]:
errors.append(f"Row {i+2} [{track_id}]: empty summary")
print(f"Checked: {checked} rows")
print(f"Errors: {len(errors)}")
if errors:
print("All errors:")
for e in errors:
print(f" {e}")
+53
View File
@@ -0,0 +1,53 @@
"""Verify commit_count field in chronology rows."""
from __future__ import annotations
import subprocess
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
from scripts.audit.generate_chronology import walk_track_folders # noqa: E402
rows = walk_track_folders(Path("conductor"))
issues: list[str] = []
for i, row in enumerate(rows):
folder = row["folder_link"]
track_id = row["track_id"]
init_sha = row["init_sha"]
end_sha = row["end_sha"]
expected_count = row["commit_count"]
try:
result = subprocess.run(
["git", "log", "--oneline", "--", folder],
capture_output=True, text=True, timeout=30, check=False,
)
actual_count = len(result.stdout.strip().splitlines())
except Exception:
continue
if init_sha and end_sha:
if init_sha == end_sha:
if expected_count not in (0, 1):
issues.append(
f"Row {i+2} [{track_id}]: init==end but count={expected_count} (expected 0 or 1)"
)
else:
if expected_count < 1:
issues.append(
f"Row {i+2} [{track_id}]: init!=end but count={expected_count} (expected >=1)"
)
if abs(expected_count - actual_count) > 1:
issues.append(
f"Row {i+2} [{track_id}]: count={expected_count} actual_total={actual_count} (off by >1)"
)
else:
if expected_count != 0:
issues.append(
f"Row {i+2} [{track_id}]: no SHAs but count={expected_count}"
)
print(f"Total rows: {len(rows)}")
print(f"Issues: {len(issues)}")
for issue in issues[:30]:
print(f" {issue}")
+23
View File
@@ -0,0 +1,23 @@
"""Spot-check summary quality across random rows."""
from __future__ import annotations
import json
import random
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
from scripts.audit.generate_chronology import walk_track_folders # noqa: E402
rows = walk_track_folders(Path("conductor"))
random.seed(42)
sample = random.sample(rows, 15)
for row in sample:
track_id = row["track_id"]
date = row["date"]
status = row["status"]
summary = row["summary"][:300]
print(f"=== {track_id} ({date}) ===")
print(f"Status: {status}")
print(f"Summary: {summary}")
print()