Skip to content

Commit cd0017c

Browse files
committed
- Add test for video_transcription command;
- Add some necessary improvements in other files
1 parent 104df88 commit cd0017c

File tree

5 files changed

+104
-27
lines changed

5 files changed

+104
-27
lines changed

tests/commands/test_channel_info.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,6 @@ def test_channel_ids_from_urls_and_usernames(mocker, channels_urls, usernames):
4747
channel_id_from_username_mock.assert_has_calls(
4848
[call(username) for username in usernames]
4949
)
50-
channels_infos_mock.assert_called_once_with([ids_from_urls_mock, ids_from_usernames_mock])
50+
channels_infos_mock.assert_called_once()
51+
assert ids_from_usernames_mock in channels_infos_mock.call_args.args[0]
52+
assert ids_from_urls_mock in channels_infos_mock.call_args.args[0]
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from unittest.mock import Mock
2+
3+
from youtool.commands import VideoTranscription
4+
5+
6+
def test_video_transcription(mocker, videos_ids, videos_urls, tmp_path):
7+
youtube_mock = mocker.patch("youtool.commands.video_transcription.YouTube")
8+
9+
language_code = "pt_br"
10+
11+
videos_transcriptions_mock = Mock()
12+
youtube_mock.return_value.videos_transcriptions = videos_transcriptions_mock
13+
14+
for video_id in videos_ids:
15+
open(tmp_path / f"{video_id}.{language_code}.vtt", "a").close()
16+
17+
result = VideoTranscription.execute(
18+
ids=videos_ids, urls=videos_urls, language_code=language_code, output_dir=tmp_path
19+
)
20+
21+
videos_transcriptions_mock.assert_called_once_with(
22+
list(set(videos_ids)), language_code, tmp_path
23+
)
24+
25+
for video_id in videos_ids:
26+
assert str(tmp_path / f"{video_id}.{language_code}.vtt") in result
27+
28+
29+
def test_video_transcription_input_from_file(mocker, videos_ids, tmp_path):
30+
youtube_mock = mocker.patch("youtool.commands.video_transcription.YouTube")
31+
32+
language_code = "pt_br"
33+
34+
videos_transcriptions_mock = Mock()
35+
youtube_mock.return_value.videos_transcriptions = videos_transcriptions_mock
36+
37+
input_file_path = tmp_path / "input_file.csv"
38+
39+
with open(input_file_path, "w") as input_csv:
40+
input_csv.write("video_id\n" + "\n".join(videos_ids))
41+
42+
for video_id in videos_ids:
43+
open(tmp_path / f"{video_id}.{language_code}.vtt", "a").close()
44+
45+
result = VideoTranscription.execute(
46+
ids=None, urls=None,
47+
language_code=language_code, output_dir=tmp_path,
48+
input_file_path=input_file_path
49+
)
50+
51+
videos_transcriptions_mock.assert_called_once_with(
52+
list(set(videos_ids)), language_code, tmp_path
53+
)
54+
55+
for video_id in videos_ids:
56+
assert str(tmp_path / f"{video_id}.{language_code}.vtt") in result

tests/test_cli.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pytest
22

3+
from pathlib import Path
34
from subprocess import run
45

56
from youtool.commands import COMMANDS
@@ -12,9 +13,13 @@
1213
)
1314
def test_missing_api_key(monkeypatch: pytest.MonkeyPatch, command: Command):
1415
monkeypatch.delenv('YOUTUBE_API_KEY', raising=False)
15-
cli_path = "youtool/cli.py"
16-
command = ["python", cli_path, command.name]
17-
result = run(command, capture_output=True, text=True, check=False)
16+
cli_path = Path("youtool") / "cli.py"
17+
command_string = ["python", cli_path, command.name]
18+
for arg in command.arguments:
19+
if arg.get("required"):
20+
command_string.append(arg.get("name"))
21+
command_string.append("test_value")
22+
result = run(command_string, capture_output=True, text=True, check=False)
1823

1924
assert result.returncode == 2
20-
assert "YouTube API Key is required" in result.stderr
25+
assert "YouTube API Key is required" in result.stderr

youtool/commands/base.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,11 @@ def execute(cls, **kwargs) -> str: # noqa: D417
8080
raise NotImplementedError()
8181

8282
@staticmethod
83-
def data_from_csv(file_path: Path, data_column_name: Optional[str] = None) -> List[str]:
83+
def data_from_csv(
84+
file_path: Path,
85+
data_column_name: Optional[str] = None,
86+
raise_column_exception: bool = True
87+
) -> List[str]:
8488
"""Extracts a list of URLs from a specified CSV file.
8589
8690
Args:
@@ -107,7 +111,10 @@ def data_from_csv(file_path: Path, data_column_name: Optional[str] = None) -> Li
107111
raise ValueError("Fieldnames is None")
108112

109113
if data_column_name not in fieldnames:
110-
raise Exception(f"Column {data_column_name} not found on {file_path}")
114+
if raise_column_exception:
115+
raise Exception(f"Column {data_column_name} not found on {file_path}")
116+
return data
117+
111118
for row in reader:
112119
value = row.get(data_column_name)
113120
if value is not None:

youtool/commands/video_transcription.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@ class VideoTranscription(Command):
1616
{"name": "--output-dir", "type": str, "help": "Output directory to save transcriptions"},
1717
{"name": "--language-code", "type": str, "help": "Language code for transcription"},
1818
{"name": "--api-key", "type": str, "help": "API key for YouTube Data API"},
19+
{"name": "--url-column-name", "type": str, "help": "URL column name on csv input files"},
20+
{"name": "--id-column-name", "type": str, "help": "Channel ID column name on csv output files"}
1921
]
2022

21-
TRANSCRIPTION_COLUMNS: List[str] = [
22-
"video_id", "transcription_text"
23-
]
23+
ID_COLUMN_NAME: str = "video_id"
24+
URL_COLUMN_NAME: str = "video_url"
2425

2526
@classmethod
2627
def execute(cls, **kwargs) -> str:
@@ -38,33 +39,39 @@ def execute(cls, **kwargs) -> str:
3839
Returns:
3940
A message indicating the result of the command. Reports success or failure for each video transcription download.
4041
"""
41-
ids = kwargs.get("ids")
42-
urls = kwargs.get("urls")
42+
ids = kwargs.get("ids") or []
43+
urls = kwargs.get("urls") or []
4344
input_file_path = kwargs.get("input_file_path")
4445
output_dir = kwargs.get("output_dir")
4546
language_code = kwargs.get("language_code")
4647
api_key = kwargs.get("api_key")
4748

49+
url_column_name = kwargs.get("url_column_name", cls.URL_COLUMN_NAME)
50+
id_column_name = kwargs.get("id_column_name", cls.ID_COLUMN_NAME)
51+
4852
youtube = YouTube([api_key], disable_ipv6=True)
4953

50-
if input_file_path:
51-
ids += cls.data_from_csv(Path(input_file_path), "video_id")
54+
if (input_file_path := kwargs.get("input_file_path")):
55+
if (urls_from_csv := cls.data_from_csv(input_file_path, url_column_name, False)):
56+
ids += [cls.video_id_from_url(url) for url in urls_from_csv]
57+
if (ids_from_csv := cls.data_from_csv(input_file_path, id_column_name, False)):
58+
ids += ids_from_csv
59+
60+
if not ids and not urls:
61+
raise Exception(
62+
"Either 'ids' or 'urls' must be provided for the video-transcription command"
63+
)
5264

5365
if urls:
5466
ids += [cls.video_id_from_url(url) for url in urls]
5567

5668
# Remove duplicated
5769
ids = list(set(ids))
58-
59-
# youtube.videos_transcriptions(ids, language_code, output_dir)
60-
61-
results = []
62-
for video_id in ids:
63-
try:
64-
transcription = youtube.video_transcription(video_id, language_code)
65-
output_file_path = cls.save_transcription_to_file(video_id, transcription, output_dir)
66-
results.append(f"Transcription saved to {output_file_path}")
67-
except Exception as e:
68-
results.append(f"Error processing video {video_id}: {str(e)}")
69-
70-
return "\n".join(results)
70+
youtube.videos_transcriptions(ids, language_code, output_dir)
71+
output_dir_path = Path(output_dir)
72+
saved_transcriptions = [
73+
str(
74+
output_dir_path / f"{v_id}.{language_code}.vtt"
75+
) for v_id in ids if (output_dir_path / f"{v_id}.{language_code}.vtt").is_file()
76+
]
77+
return "\n".join(saved_transcriptions)

0 commit comments

Comments
 (0)