43 lines
1.3 KiB
Python
43 lines
1.3 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
from scripts.video_analysis.ocr_frames import (
|
|
format_ocr_markdown,
|
|
list_frame_files,
|
|
ocr_frames,
|
|
)
|
|
|
|
|
|
def test_list_frame_files_returns_sorted(tmp_path: Path) -> None:
|
|
(tmp_path / "frame_00001.jpg").write_bytes(b"x")
|
|
(tmp_path / "frame_00002.jpg").write_bytes(b"x")
|
|
(tmp_path / "frame_00010.jpg").write_bytes(b"x")
|
|
(tmp_path / "ignored.txt").write_bytes(b"x")
|
|
files = list_frame_files(tmp_path)
|
|
assert len(files) == 3
|
|
assert files[0].name == "frame_00001.jpg"
|
|
assert files[2].name == "frame_00010.jpg"
|
|
|
|
|
|
def test_format_ocr_markdown_empty() -> None:
|
|
out = format_ocr_markdown([])
|
|
assert "# OCR Results" in out
|
|
|
|
|
|
def test_format_ocr_markdown_with_frames() -> None:
|
|
frames = [("frame_00001.jpg", "Hello world", "2026-06-21T00:00:00Z")]
|
|
out = format_ocr_markdown(frames)
|
|
assert "frame_00001.jpg" in out
|
|
assert "Hello world" in out
|
|
|
|
|
|
def test_ocr_frames_calls_backend(tmp_path: Path) -> None:
|
|
(tmp_path / "frame_00001.jpg").write_bytes(b"fake-jpg-bytes")
|
|
with patch("scripts.video_analysis.ocr_frames._ocr_single_image") as mock_ocr:
|
|
mock_ocr.return_value = ("extracted text", 0.95)
|
|
result = ocr_frames(tmp_path, tmp_path / "ocr.md", backend="tesseract")
|
|
assert result.is_ok()
|
|
assert (tmp_path / "ocr.md").exists()
|