1+ import csv
2+ from pathlib import Path
3+ from typing import List , Dict
4+ from .base import Command
5+ from youtool import YouTube
6+
7+ class VideoTranscription (Command ):
8+ """Download video transcriptions based on language code, path, and list of video IDs or URLs (or CSV filename with URLs/IDs inside).
9+ Download files to destination and report results."""
10+
11+ name = "video-transcription"
12+ arguments = [
13+ {"name" : "--ids" , "type" : str , "help" : "Video IDs" , "nargs" : "*" },
14+ {"name" : "--urls" , "type" : str , "help" : "Video URLs" , "nargs" : "*" },
15+ {"name" : "--input-file-path" , "type" : str , "help" : "CSV file path containing video IDs or URLs" },
16+ {"name" : "--output-dir" , "type" : str , "help" : "Output directory to save transcriptions" },
17+ {"name" : "--language-code" , "type" : str , "help" : "Language code for transcription" },
18+ {"name" : "--api-key" , "type" : str , "help" : "API key for YouTube Data API" },
19+ ]
20+
21+ TRANSCRIPTION_COLUMNS : List [str ] = [
22+ "video_id" , "transcription_text"
23+ ]
24+
25+ @classmethod
26+ def execute (cls , ** kwargs ) -> str :
27+ """
28+ Execute the video-transcription command to download transcriptions of videos based on IDs or URLs and save them to files.
29+
30+ Args:
31+ ids: A list of YouTube video IDs.
32+ urls: A list of YouTube video URLs.
33+ input_file_path: Path to a CSV file containing YouTube video IDs or URLs.
34+ output_dir: Directory path to save the transcription files.
35+ language_code: Language code for the transcription language.
36+ api_key: The API key to authenticate with the YouTube Data API.
37+
38+ Returns:
39+ A message indicating the result of the command. Reports success or failure for each video transcription download.
40+ """
41+ ids = kwargs .get ("ids" )
42+ urls = kwargs .get ("urls" )
43+ input_file_path = kwargs .get ("input_file_path" )
44+ output_dir = kwargs .get ("output_dir" )
45+ language_code = kwargs .get ("language_code" )
46+ api_key = kwargs .get ("api_key" )
47+
48+ youtube = YouTube ([api_key ], disable_ipv6 = True )
49+
50+ if input_file_path :
51+ ids += cls .data_from_csv (Path (input_file_path ), "video_id" )
52+
53+ if urls :
54+ ids += [cls .video_id_from_url (url ) for url in urls ]
55+
56+ # Remove duplicated
57+ ids = list (set (ids ))
58+
59+ # youtube.videos_transcriptions(ids, language_code, output_dir)
60+
61+ results = []
62+ for video_id in ids :
63+ try :
64+ transcription = youtube .video_transcription (video_id , language_code )
65+ output_file_path = cls .save_transcription_to_file (video_id , transcription , output_dir )
66+ results .append (f"Transcription saved to { output_file_path } " )
67+ except Exception as e :
68+ results .append (f"Error processing video { video_id } : { str (e )} " )
69+
70+ return "\n " .join (results )
0 commit comments