manual_slop/tests/test_video_analysis_ocr_frames.py

from __future__ import annotations

from pathlib import Path
from unittest.mock import patch

from scripts.video_analysis.ocr_frames import (
 format_ocr_markdown,
 list_frame_files,
 ocr_frames,
)


def test_list_frame_files_returns_sorted(tmp_path: Path) -> None:
 (tmp_path / "frame_00001.jpg").write_bytes(b"x")
 (tmp_path / "frame_00002.jpg").write_bytes(b"x")
 (tmp_path / "frame_00010.jpg").write_bytes(b"x")
 (tmp_path / "ignored.txt").write_bytes(b"x")
 files = list_frame_files(tmp_path)
 assert len(files) == 3
 assert files[0].name == "frame_00001.jpg"
 assert files[2].name == "frame_00010.jpg"


def test_format_ocr_markdown_empty() -> None:
 out = format_ocr_markdown([])
 assert "# OCR Results" in out


def test_format_ocr_markdown_with_frames() -> None:
 frames = [("frame_00001.jpg", "Hello world", "2026-06-21T00:00:00Z")]
 out = format_ocr_markdown(frames)
 assert "frame_00001.jpg" in out
 assert "Hello world" in out


def test_ocr_frames_calls_backend(tmp_path: Path) -> None:
 (tmp_path / "frame_00001.jpg").write_bytes(b"fake-jpg-bytes")
 with patch("scripts.video_analysis.ocr_frames._ocr_single_image") as mock_ocr:
  mock_ocr.return_value = ("extracted text", 0.95)
  result = ocr_frames(tmp_path, tmp_path / "ocr.md", backend="tesseract")
  assert result.is_ok()
  assert (tmp_path / "ocr.md").exists()