Skip to content

Commit 93a94da

Browse files
committed
Add video_transcription command
1 parent be977aa commit 93a94da

File tree

1 file changed

+70
-0
lines changed

1 file changed

+70
-0
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import csv
2+
from pathlib import Path
3+
from typing import List, Dict
4+
from .base import Command
5+
from youtool import YouTube
6+
7+
class VideoTranscription(Command):
8+
"""Download video transcriptions based on language code, path, and list of video IDs or URLs (or CSV filename with URLs/IDs inside).
9+
Download files to destination and report results."""
10+
11+
name = "video-transcription"
12+
arguments = [
13+
{"name": "--ids", "type": str, "help": "Video IDs", "nargs": "*"},
14+
{"name": "--urls", "type": str, "help": "Video URLs", "nargs": "*"},
15+
{"name": "--input-file-path", "type": str, "help": "CSV file path containing video IDs or URLs"},
16+
{"name": "--output-dir", "type": str, "help": "Output directory to save transcriptions"},
17+
{"name": "--language-code", "type": str, "help": "Language code for transcription"},
18+
{"name": "--api-key", "type": str, "help": "API key for YouTube Data API"},
19+
]
20+
21+
TRANSCRIPTION_COLUMNS: List[str] = [
22+
"video_id", "transcription_text"
23+
]
24+
25+
@classmethod
26+
def execute(cls, **kwargs) -> str:
27+
"""
28+
Execute the video-transcription command to download transcriptions of videos based on IDs or URLs and save them to files.
29+
30+
Args:
31+
ids: A list of YouTube video IDs.
32+
urls: A list of YouTube video URLs.
33+
input_file_path: Path to a CSV file containing YouTube video IDs or URLs.
34+
output_dir: Directory path to save the transcription files.
35+
language_code: Language code for the transcription language.
36+
api_key: The API key to authenticate with the YouTube Data API.
37+
38+
Returns:
39+
A message indicating the result of the command. Reports success or failure for each video transcription download.
40+
"""
41+
ids = kwargs.get("ids")
42+
urls = kwargs.get("urls")
43+
input_file_path = kwargs.get("input_file_path")
44+
output_dir = kwargs.get("output_dir")
45+
language_code = kwargs.get("language_code")
46+
api_key = kwargs.get("api_key")
47+
48+
youtube = YouTube([api_key], disable_ipv6=True)
49+
50+
if input_file_path:
51+
ids += cls.data_from_csv(Path(input_file_path), "video_id")
52+
53+
if urls:
54+
ids += [cls.video_id_from_url(url) for url in urls]
55+
56+
# Remove duplicated
57+
ids = list(set(ids))
58+
59+
# youtube.videos_transcriptions(ids, language_code, output_dir)
60+
61+
results = []
62+
for video_id in ids:
63+
try:
64+
transcription = youtube.video_transcription(video_id, language_code)
65+
output_file_path = cls.save_transcription_to_file(video_id, transcription, output_dir)
66+
results.append(f"Transcription saved to {output_file_path}")
67+
except Exception as e:
68+
results.append(f"Error processing video {video_id}: {str(e)}")
69+
70+
return "\n".join(results)

0 commit comments

Comments
 (0)