from __future__ import annotations from pathlib import Path from unittest.mock import patch from scripts.video_analysis.ocr_frames import ( format_ocr_markdown, list_frame_files, ocr_frames, ) def test_list_frame_files_returns_sorted(tmp_path: Path) -> None: (tmp_path / "frame_00001.jpg").write_bytes(b"x") (tmp_path / "frame_00002.jpg").write_bytes(b"x") (tmp_path / "frame_00010.jpg").write_bytes(b"x") (tmp_path / "ignored.txt").write_bytes(b"x") files = list_frame_files(tmp_path) assert len(files) == 3 assert files[0].name == "frame_00001.jpg" assert files[2].name == "frame_00010.jpg" def test_format_ocr_markdown_empty() -> None: out = format_ocr_markdown([]) assert "# OCR Results" in out def test_format_ocr_markdown_with_frames() -> None: frames = [("frame_00001.jpg", "Hello world", "2026-06-21T00:00:00Z")] out = format_ocr_markdown(frames) assert "frame_00001.jpg" in out assert "Hello world" in out def test_ocr_frames_calls_backend(tmp_path: Path) -> None: (tmp_path / "frame_00001.jpg").write_bytes(b"fake-jpg-bytes") with patch("scripts.video_analysis.ocr_frames._ocr_single_image") as mock_ocr: mock_ocr.return_value = ("extracted text", 0.95) result = ocr_frames(tmp_path, tmp_path / "ocr.md", backend="tesseract") assert result.is_ok() assert (tmp_path / "ocr.md").exists()