Private
Public Access
0
0

feat(chronology): rewrite classifier to use git-history evidence + 7-status enum + Needs Review section

This commit is contained in:
2026-07-01 23:29:45 -04:00
parent 9d8fc90415
commit 945751b99a
2 changed files with 140 additions and 96 deletions
+133 -89
View File
@@ -1,12 +1,13 @@
#!/usr/bin/env python3
"""Generate chronology draft for Manual Slop conductor tracks.
"""Generate chronology for Manual Slop conductor tracks.
Walks conductor/tracks/ and conductor/archive/, extracts per-track data
(date, ID, status, summary, commit range), and emits a draft to stdout.
(date, ID, status, summary, commit range), and emits a markdown chronology
to stdout.
The script is READ-ONLY on the source folders. It writes to stdout only.
The human cross-check (FR6 of the chronology_20260619 track) is the authority;
this script is a starting point, not the canonical source.
The v2 classifier uses git-history evidence (work-commit count + report
overrides) instead of stale metadata.json.status. Returns
(status, confidence, reason) per row.
Usage:
uv run python scripts/audit/generate_chronology.py --draft
@@ -28,6 +29,20 @@ _SENTENCE_END_RE = re.compile(r"\.\s")
_GIT_TIMEOUT = 30
_DEFAULT_ROOT = "conductor/"
_METADATA_FIELD_PREFIXES = (
"**Priority:**",
"**Date:**",
"**Initialized:**",
"**Track:**",
"**Track ID:**",
"**Parent umbrella:**",
"**Status:**",
"**Confidence:**",
)
_WORK_COMMIT_PREFIXES = ("feat:", "fix:", "refactor:", "perf:", "test:", "docs(report):")
_METADATA_COMMIT_PREFIXES = ("conductor(plan):", "conductor(state):", "conductor(track):", "docs(spec):", "docs(plan):")
def extract_slug_date(folder_name: str) -> Optional[str]:
m = _SLUG_DATE_RE.search(folder_name)
@@ -65,8 +80,8 @@ def extract_summary(folder_path: Path) -> str:
try:
data = json.loads(md_path.read_text(encoding="utf-8"))
desc = str(data.get("description", "")).strip()
if desc:
return desc
if desc and not desc.startswith(_METADATA_FIELD_PREFIXES):
return _truncate_to_25_words(_first_sentence(desc))
except (json.JSONDecodeError, OSError):
pass
for fname in ("spec.md", "plan.md"):
@@ -80,14 +95,14 @@ def extract_summary(folder_path: Path) -> str:
for line in text.splitlines():
stripped = line.strip()
if not stripped:
continue
continue
if stripped.startswith("#"):
continue
continue
if stripped.startswith(">"):
continue
continue
bare = stripped.lstrip(">").strip()
if bare.startswith("**Status:**") or bare.startswith("**Track ID:**") or bare.startswith("**Track:**"):
continue
if bare.startswith(_METADATA_FIELD_PREFIXES):
continue
return _truncate_to_25_words(_first_sentence(bare))
return "Imported from archive (no spec)"
@@ -133,47 +148,6 @@ def _repo_root(start: Path) -> Path:
return start.parent
def _git_log(folder_relpath: str, *args: str) -> str:
try:
result = subprocess.run(
["git", "log", *args, "--", folder_relpath],
capture_output=True,
text=True,
timeout=_GIT_TIMEOUT,
check=False,
)
if result.returncode != 0:
return ""
return result.stdout
except (subprocess.SubprocessError, OSError):
return ""
def _git_first_line(folder_relpath: str, *args: str) -> str:
out = _git_log(folder_relpath, *args)
stripped = out.strip()
if not stripped:
return ""
return stripped.splitlines()[0]
def _repo_root(start: Path) -> Path:
try:
result = subprocess.run(
["git", "rev-parse", "--show-toplevel"],
capture_output=True,
text=True,
timeout=10,
check=False,
cwd=str(start),
)
if result.returncode == 0 and result.stdout.strip():
return Path(result.stdout.strip())
except (subprocess.SubprocessError, OSError):
pass
return start.parent
def _parse_state_phase(state_path: Path) -> str:
if not state_path.is_file():
return "no-state-toml"
@@ -182,11 +156,27 @@ def _parse_state_phase(state_path: Path) -> str:
if line.startswith("current_phase"):
v = line.split("=", 1)[1].strip().split("#")[0].strip().strip('"')
return v
except (subprocess.SubprocessError, OSError, Exception):
except (OSError, Exception):
pass
return "?"
def _parse_state_status(state_path: Path) -> str:
if not state_path.is_file():
return ""
try:
text = state_path.read_text(encoding="utf-8")
except OSError:
return ""
for line in text.splitlines():
stripped = line.strip()
if stripped.startswith("status") and "=" in stripped:
parts = stripped.split("=", 1)
if len(parts) == 2:
return parts[1].strip().strip('"').strip("'").split("#")[0].strip()
return ""
def _last_commit_date(folder_relpath: str) -> str:
try:
result = subprocess.run(
@@ -198,38 +188,75 @@ def _last_commit_date(folder_relpath: str) -> str:
return "never"
def _classify_status(folder_link: str, current: str, track_id: str) -> str:
"""Per-row manual review classification (FR6 hard gate).
def _count_work_commits(folder_relpath: str) -> int:
log: str = _git_log(folder_relpath, "--oneline")
count: int = 0
for line in log.splitlines():
msg: str = line.split(" ", 1)[1] if " " in line else ""
if msg.startswith(_WORK_COMMIT_PREFIXES) and not msg.startswith(_METADATA_COMMIT_PREFIXES):
count += 1
return count
Logic (per user directive 2026-06-20):
- PLACEHOLDER tracks: keep as is
- archive/ folder: default to Completed (the work was done and archived; metadata status may be stale)
- tracks/ folder + state_phase=complete OR chrono in {completed, complete, shipped}: Completed
- tracks/ folder + everything else: keep original chrono status (in flight)
- Abandoned is reserved for explicit user marking; the script does NOT auto-mark.
Note: "Completed" (not "Shipped") is the canonical term per user directive 2026-06-20.
This is a side-project, not a shipped product.
def _has_report_matching(reports_dir: Path, track_id: str, prefix: str) -> bool:
if not reports_dir.is_dir():
return False
for f in reports_dir.iterdir():
if f.is_file() and f.name.startswith(prefix) and track_id in f.name:
return True
return False
def classify_status(
folder_link: str,
current: str,
track_id: str,
repo_root: Path,
reports_dir: Path,
has_abort_report: bool = False,
state_status: str = "",
) -> tuple[str, str, str]:
"""Git-history evidence classifier returning (status, confidence, reason).
Evidence priority:
1. Override signals (highest): TRACK_COMPLETION/TRACK_ABORTED reports, state.toml superseded
2. Git commit evidence (medium): work-commit count
3. Directory location (low): archive/ vs tracks/
4. Fallback: Needs Review
"""
if "PLACEHOLDER" in track_id:
return current
return ("Special", "high", "placeholder track")
if "contingency" in current.lower():
return current
return ("Special", "high", "contingency track")
# 1. Override signals
if state_status == "superseded":
return ("Superseded", "high", "state.toml status=superseded")
if has_abort_report or _has_report_matching(reports_dir, track_id, "TRACK_ABORTED_"):
return ("Abandoned", "high", "abort report found")
if _has_report_matching(reports_dir, track_id, "TRACK_COMPLETION_"):
return ("Completed", "high", "completion report found")
# 2. Git commit evidence
is_archive = folder_link.startswith("conductor/archive/")
is_tracks = folder_link.startswith("conductor/tracks/")
work_commits: int = _count_work_commits(folder_link)
if work_commits >= 3:
return ("Completed", "medium", f"{work_commits} work commits")
if 1 <= work_commits <= 2 and is_tracks:
return ("In Progress", "medium", f"{work_commits} work commits in tracks/")
if work_commits == 0 and is_tracks:
return ("Active", "medium", "0 work commits in tracks/ (spec/plan only)")
# 3. Directory location
if is_archive:
return "Completed"
folder = Path(folder_link)
state_phase = _parse_state_phase(folder / "state.toml") if is_tracks else "?"
chrono_lower = current.lower()
is_completed = chrono_lower in {"completed", "complete", "shipped"} or state_phase in {"complete", '"complete"'}
if is_tracks and is_completed:
return "Completed"
return current
if work_commits == 0:
return ("Abandoned", "low", "archived with 0 commits")
return ("Completed", "low", "archived but no completion report")
# 4. Fallback
return ("Needs Review", "none", "classifier inconclusive")
def walk_track_folders(root: Path) -> list[dict]:
repo_root: Path = _repo_root(root)
reports_dir: Path = repo_root / "docs" / "reports"
rows: list[dict] = []
for parent_dir, default_status in (
(root / "tracks", "Active"),
@@ -252,16 +279,22 @@ def walk_track_folders(root: Path) -> list[dict]:
first_commit = _git_first_line(folder_relpath, "--reverse", "--format=%aI")
date = first_commit[:10] if first_commit else ""
metadata_path = folder / "metadata.json"
status: str = default_status
meta_status: str = ""
if metadata_path.is_file():
try:
data = json.loads(metadata_path.read_text(encoding="utf-8"))
meta_status = str(data.get("status", "")).strip()
if meta_status:
status = meta_status
except (json.JSONDecodeError, OSError):
pass
status = _classify_status(folder_relpath, status, track_id)
state_status: str = _parse_state_status(folder / "state.toml")
status, confidence, reason = classify_status(
folder_link=folder_relpath,
current=meta_status or default_status,
track_id=track_id,
repo_root=repo_root,
reports_dir=reports_dir,
state_status=state_status,
)
summary: str = extract_summary(folder)
init_sha: str = _git_first_line(folder_relpath, "--reverse", "--format=%h")
end_sha: str = _git_first_line(folder_relpath, "-1", "--format=%h")
@@ -279,6 +312,8 @@ def walk_track_folders(root: Path) -> list[dict]:
"date": date,
"track_id": track_id,
"status": status,
"confidence": confidence,
"reason": reason,
"summary": summary,
"init_sha": init_sha,
"end_sha": end_sha,
@@ -291,16 +326,25 @@ def walk_track_folders(root: Path) -> list[dict]:
def format_markdown(rows: list[dict]) -> str:
lines: list[str] = [
"| Date | ID | Status | Summary | Folder | Range |",
"| --- | --- | --- | --- | --- | --- |",
]
for row in rows:
range_str: str = f"`{row['init_sha']}..{row['end_sha']}` ({row['commit_count']})"
from datetime import date as today_date
lines: list[str] = []
lines.append(f"<!-- Generated {today_date.today().isoformat()} | {len(rows)} rows -->")
lines.append("")
lines.append("| Date | ID | Status | Summary | Folder | Range |")
lines.append("| --- | --- | --- | --- | --- | --- |")
for r in rows:
range_str: str = f"`{r['init_sha']}..{r['end_sha']}` ({r['commit_count']})" if r["init_sha"] else "n/a"
lines.append(
f"| {row['date']} | `{row['track_id']}` | {row['status']} | "
f"{_md_escape(row['summary'])} | `{row['folder_link']}` | {range_str} |"
f"| {r['date']} | `{r['track_id']}` | {r['status']} | "
f"{_md_escape(r['summary'])} | `{r['folder_link']}` | {range_str} |"
)
needs_review = [r for r in rows if r["status"] == "Needs Review"]
if needs_review:
lines.append("")
lines.append("## Needs Review")
lines.append("")
for r in needs_review:
lines.append(f"- `{r['track_id']}` (`{r['folder_link']}`): {r['reason']}")
return "\n".join(lines) + "\n"
@@ -311,12 +355,12 @@ def main() -> None:
except (OSError, ValueError):
pass
parser = argparse.ArgumentParser(
description="Generate chronology draft for Manual Slop conductor tracks.",
description="Generate chronology for Manual Slop conductor tracks.",
)
parser.add_argument(
"--draft",
action="store_true",
help="Emit markdown draft table to stdout.",
help="Emit markdown table to stdout.",
)
parser.add_argument(
"--root",
@@ -335,4 +379,4 @@ def main() -> None:
if __name__ == "__main__":
main()
main()
+7 -7
View File
@@ -149,7 +149,7 @@ def test_classify_status_work_commits_completed(tmp_path: Path) -> None:
reports_dir = tmp_path / "docs/reports"
reports_dir.mkdir(parents=True)
with patch("scripts.audit.generate_chronology._git_log") as mock_log:
mock_log.return_value = "feat: add thing\nfix: fix thing\nrefactor: refactor thing\n"
mock_log.return_value = "abc1234 feat: add thing\ndef5678 fix: fix thing\nghi9012 refactor: refactor thing\n"
result = classify_status(
folder_link="conductor/tracks/my_track_20260701",
current="active",
@@ -162,12 +162,12 @@ def test_classify_status_work_commits_completed(tmp_path: Path) -> None:
assert "work commits" in result[2]
def test_classify_status_metadata_commits_not_counted_as_work(tmp_path: Path) -> None:
def test_classify_status_metadata_commits_not_countd_as_work(tmp_path: Path) -> None:
"""conductor(plan): commits don't count as work commits."""
reports_dir = tmp_path / "docs/reports"
reports_dir.mkdir(parents=True)
with patch("scripts.audit.generate_chronology._git_log") as mock_log:
mock_log.return_value = "conductor(plan): mark task\nconductor(state): update\nconductor(track): init\n"
mock_log.return_value = "abc1234 conductor(plan): mark task\ndef5678 conductor(state): update\nghi9012 conductor(track): init\n"
result = classify_status(
folder_link="conductor/tracks/my_track_20260701",
current="active",
@@ -183,7 +183,7 @@ def test_classify_status_1_2_work_commits_in_progress(tmp_path: Path) -> None:
reports_dir = tmp_path / "docs/reports"
reports_dir.mkdir(parents=True)
with patch("scripts.audit.generate_chronology._git_log") as mock_log:
mock_log.return_value = "feat: add thing\nfix: fix thing\n"
mock_log.return_value = "abc1234 feat: add thing\ndef5678 fix: fix thing\n"
result = classify_status(
folder_link="conductor/tracks/my_track_20260701",
current="active",
@@ -199,7 +199,7 @@ def test_classify_status_archive_no_override_completed_low(tmp_path: Path) -> No
reports_dir = tmp_path / "docs/reports"
reports_dir.mkdir(parents=True)
with patch("scripts.audit.generate_chronology._git_log") as mock_log:
mock_log.return_value = "feat: thing\nfix: thing\nrefactor: thing\n"
mock_log.return_value = "abc1234 feat: thing\ndef5678 fix: thing\nghi9012 refactor: thing\n"
result = classify_status(
folder_link="conductor/archive/my_track_20260701",
current="active",
@@ -212,13 +212,13 @@ def test_classify_status_archive_no_override_completed_low(tmp_path: Path) -> No
def test_classify_status_fallback_needs_review(tmp_path: Path) -> None:
"""Inconclusive -> Needs Review."""
"""Inconclusive -> Needs Review (path is neither tracks/ nor archive/)."""
reports_dir = tmp_path / "docs/reports"
reports_dir.mkdir(parents=True)
with patch("scripts.audit.generate_chronology._git_log") as mock_log:
mock_log.return_value = ""
result = classify_status(
folder_link="conductor/tracks/my_track",
folder_link="some/other/path/my_track",
current="",
track_id="my_track",
repo_root=tmp_path,