manual_slop/scripts/video_analysis/synthesize_report.py

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import Any

from scripts.video_analysis import download_video, extract_keyframes, extract_transcript, ocr_frames
from scripts.video_analysis.error_types import ErrorInfo, make_error


PIPELINE_STAGES: list[str] = ["transcript", "download", "keyframes", "ocr", "report"]


@dataclass
class ReportContext:
 url: str
 slug: str
 output_dir: Path


@dataclass
class _Ok:
 value: Any

 def is_ok(self) -> bool:
  return True

 def is_err(self) -> bool:
  return False


@dataclass
class _Err:
 err: ErrorInfo

 def is_ok(self) -> bool:
  return False

 def is_err(self) -> bool:
  return True


def ok(value: Any) -> _Ok:
 return _Ok(value)


def err(error: ErrorInfo) -> _Err:
 return _Err(error)


def build_report_stub(slug: str, url: str, video_id: str) -> str:
 return f"""# <Video Title> ({slug})

**Source:** {url}
**YouTube ID:** {video_id}
**Date Added to Campaign:** 2026-06-21

> **Tier 3 worker prompt:** populate each section using the transcript.json, ocr.md, and frames/ artifacts in this directory.

## 1. TL;DR
## 2. Key Concepts
## 3. Frame Analysis
## 4. Transcript Highlights
## 5. Mathematical / Theoretical Content
## 6. Connections to Other Videos in Campaign
## 7. Open Questions / Follow-up
## 8. References
"""


def build_summary_stub(slug: str, title: str, author: str | None) -> str:
 return f"""# Summary: {slug}

**Title:** {title}
**Author:** {author or "(unknown)"}

<200-400 word summary to be filled in by the Tier 3 worker after reading report.md>
"""


def synthesize_report(url: str, slug: str, output_dir: Path, skip_video_download: bool = False) -> _Ok | _Err:
 artifacts = output_dir / "artifacts"
 frames_dir = artifacts / "frames"
 artifacts.mkdir(parents=True, exist_ok=True)
 transcript_path = artifacts / "transcript.json"
 frames_dir.mkdir(parents=True, exist_ok=True)
 t_result = extract_transcript.extract_transcript(url, transcript_path)
 if t_result.is_err():
  return t_result
 video_path = artifacts / "video.mp4"
 if not skip_video_download:
  d_result = download_video.download_video(url, video_path)
  if d_result.is_err():
   return d_result
  k_result = extract_keyframes.extract_keyframes(video_path, frames_dir)
  if k_result.is_err():
   return k_result
  ocr_path = artifacts / "ocr.md"
  o_result = ocr_frames.ocr_frames(frames_dir, ocr_path)
  if o_result.is_err():
   return o_result
 parsed = extract_transcript.parse_video_id(url)
 video_id = parsed.value if parsed.is_ok() else "UNKNOWN"
 report_path = output_dir / "report.md"
 report_path.write_text(build_report_stub(slug, url, video_id), encoding="utf-8")
 summary_path = output_dir / "summary.md"
 summary_path.write_text(build_summary_stub(slug, "<Title TBD>", None), encoding="utf-8")
 return ok({
  "transcript": str(transcript_path),
  "frames": str(frames_dir),
  "report": str(report_path),
  "summary": str(summary_path),
 })