"""Tests for scripts/video_analysis/extract_transcript.py. Per conductor/code_styleguides/error_handling.md, success returns Result.ok; failure returns Result.err with ErrorInfo. """ from __future__ import annotations import json from pathlib import Path from unittest.mock import patch from scripts.video_analysis.extract_transcript import ( extract_transcript, format_transcript_json, parse_video_id, ) def test_parse_video_id_youtu_be() -> None: result = parse_video_id("https://youtu.be/9vM4p9NN0Ts") assert result.is_ok() assert result.value == "9vM4p9NN0Ts" def test_parse_video_id_full_url() -> None: result = parse_video_id("https://www.youtube.com/watch?v=0yF9TvMeAzM") assert result.is_ok() assert result.value == "0yF9TvMeAzM" def test_parse_video_id_already_id() -> None: result = parse_video_id("yxkUvXs-hoQ") assert result.is_ok() assert result.value == "yxkUvXs-hoQ" def test_parse_video_id_invalid() -> None: result = parse_video_id("not-a-url") assert result.is_err() def test_extract_transcript_success(tmp_path: Path) -> None: fake_segments = [ {"start": 0.0, "duration": 5.0, "text": "Hello world"}, {"start": 5.0, "duration": 3.0, "text": "Goodbye world"}, ] with patch("scripts.video_analysis.extract_transcript._fetch_raw_transcript") as mock_fetch: mock_fetch.return_value = fake_segments result = extract_transcript("https://youtu.be/ABCDEFGHIJK", tmp_path / "transcript.json") assert result.is_ok() data = json.loads((tmp_path / "transcript.json").read_text()) assert data["video_id"] == "ABCDEFGHIJK" assert len(data["segments"]) == 2 assert data["plain"] == "Hello world\nGoodbye world" def test_extract_transcript_network_error(tmp_path: Path) -> None: with patch("scripts.video_analysis.extract_transcript._fetch_raw_transcript") as mock_fetch: mock_fetch.side_effect = Exception("network unreachable") result = extract_transcript("https://youtu.be/ABCDEFGHIJK", tmp_path / "transcript.json") assert result.is_err() def test_extract_transcript_retries_then_fails(tmp_path: Path) -> None: with patch("scripts.video_analysis.extract_transcript._fetch_raw_transcript") as mock_fetch: mock_fetch.side_effect = Exception("transient") result = extract_transcript("https://youtu.be/ABCDEFGHIJK", tmp_path / "transcript.json", retries=2) assert result.is_err() assert mock_fetch.call_count == 2 def test_format_transcript_json_structure() -> None: segments = [{"start": 0.0, "duration": 5.0, "text": "Hello"}] out = format_transcript_json("VID123", segments) assert out["video_id"] == "VID123" assert out["segments"] == segments assert out["plain"] == "Hello" assert "fetched_at" in out