From 4846c76c187a113a1e18417aefcc86616e2d2c9a Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Thu, 6 Nov 2025 23:30:09 -0300 Subject: [PATCH 01/18] Fix regex for channel ID extraction --- youtool/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtool/__init__.py b/youtool/__init__.py index 28bbe83..a8cca7f 100644 --- a/youtool/__init__.py +++ b/youtool/__init__.py @@ -11,7 +11,7 @@ import isodate # TODO: implement duration parser to remove dependency? import requests -REGEXP_CHANNEL_ID = re.compile('"externalId":"([^"]+)"') +REGEXP_CHANNEL_ID = re.compile('"channelId":"([^"]+)"') REGEXP_LOCATION_RADIUS = re.compile(r"^[0-9.]+(?:m|km|ft|mi)$") REGEXP_NAIVE_DATETIME = re.compile(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}[T ][0-9]{2}:[0-9]{2}:[0-9]{2}$") REGEXP_DATETIME_MILLIS = re.compile(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}[T ][0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+") From 2d7b47f6e29b9258f1ef13e17dfebd5dd94ebe12 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Fri, 7 Nov 2025 18:51:31 -0300 Subject: [PATCH 02/18] Implement YouTube CLI Tool with command structure and channel ID extraction --- youtool/cli.py | 46 ++++++++++++ youtool/commands/__init__.py | 10 +++ youtool/commands/base.py | 125 +++++++++++++++++++++++++++++++++ youtool/commands/channel_id.py | 97 +++++++++++++++++++++++++ 4 files changed, 278 insertions(+) create mode 100644 youtool/cli.py create mode 100644 youtool/commands/__init__.py create mode 100644 youtool/commands/base.py create mode 100644 youtool/commands/channel_id.py diff --git a/youtool/cli.py b/youtool/cli.py new file mode 100644 index 0000000..1403bef --- /dev/null +++ b/youtool/cli.py @@ -0,0 +1,46 @@ +import argparse +import os + +from youtool.commands import COMMANDS + + +def main(): + """Main function for the YouTube CLI Tool. + + This function sets up the argument parser for the CLI tool, including options for the YouTube API key and + command-specific subparsers. It then parses the command-line arguments, retrieving the YouTube API key + from either the command-line argument '--api-key' or the environment variable 'YOUTUBE_API_KEY'. If the API + key is not provided through any means, it raises an argparse.ArgumentError. + + Finally, the function executes the appropriate command based on the parsed arguments. If an exception occurs + during the execution of the command, it is caught and raised as an argparse error for proper handling. + + Raises: + argparse.ArgumentError: If the YouTube API key is not provided. + argparse.ArgumentError: If there is an error during the execution of the command. + """ + parser = argparse.ArgumentParser(description="CLI Tool for managing YouTube videos add playlists") + parser.add_argument("--api-key", type=str, help="YouTube API Key", dest="api_key") + parser.add_argument("--debug", type=bool, help="Debug mode", dest="debug") + + subparsers = parser.add_subparsers(required=True, dest="command", title="Command", help="Command to be executed") + + for command in COMMANDS: + command.parse_arguments(subparsers) + + args = parser.parse_args() + args.api_key = args.api_key or os.environ.get("YOUTUBE_API_KEY") + + if not args.api_key: + parser.error("YouTube API Key is required") + + try: + print(args.func(**args.__dict__)) + except Exception as error: + if args.debug: + raise error + parser.error(error) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/youtool/commands/__init__.py b/youtool/commands/__init__.py new file mode 100644 index 0000000..7828ee3 --- /dev/null +++ b/youtool/commands/__init__.py @@ -0,0 +1,10 @@ +from .base import Command +from .channel_id import ChannelId + +COMMANDS = [ + ChannelId +] + +__all__ = [ + "Command", "COMMANDS", "ChannelId", +] \ No newline at end of file diff --git a/youtool/commands/base.py b/youtool/commands/base.py new file mode 100644 index 0000000..c481df2 --- /dev/null +++ b/youtool/commands/base.py @@ -0,0 +1,125 @@ +import csv +import argparse + +from typing import List, Dict, Any, Optional +from io import StringIO +from pathlib import Path +from datetime import datetime + + +class Command: + """A base class for commands to inherit from, following a specific structure. + + Attributes: + name (str): The name of the command. + arguments (List[Dict[str, Any]]): A list of dictionaries, each representing an argument for the command. + """ + name: str + arguments: List[Dict[str, Any]] + + @classmethod + def generate_parser(cls, subparsers: argparse._SubParsersAction): + """Creates a parser for the command and adds it to the subparsers. + + Args: + subparsers (argparse._SubParsersAction): The subparsers action to add the parser to. + + Returns: + argparse.ArgumentParser: The parser for the command. + """ + return subparsers.add_parser(cls.name, help=cls.__doc__) + + @classmethod + def parse_arguments(cls, subparsers: argparse._SubParsersAction) -> None: + """Parses the arguments for the command and sets the command's execute method as the default function to call. + + Args: + subparsers (argparse._SubParsersAction): The subparsers action to add the parser to. + """ + parser = cls.generate_parser(subparsers) + groups = {} + + for argument in cls.arguments: + argument_copy = {**argument} + argument_name = argument_copy.pop("name") + + group_name = argument_copy.pop("mutually_exclusive_group", None) + if group_name: + if group_name not in groups: + groups[group_name] = parser.add_argument_group(group_name) + groups[group_name].add_argument(argument_name, **argument_copy) + else: + parser.add_argument(argument_name, **argument_copy) + parser.set_defaults(func=cls.execute) + + @classmethod + def execute(cls, **kwargs) -> str: # noqa: D417 + """Executes the command. + + This method should be overridden by subclasses to define the command's behavior. + + Args: + arguments (argparse.Namespace): The parsed arguments for the command. + """ + raise NotImplementedError() + + @staticmethod + def data_from_csv(file_path: Path, data_column_name: Optional[str] = None) -> List[str]: + """Extracts a list of URLs from a specified CSV file. + + Args: + file_path: The path to the CSV file containing the URLs. + data_column_name: The name of the column in the CSV file that contains the URLs. + If not provided, it defaults to `ChannelId.URL_COLUMN_NAME`. + + Returns: + A list of URLs extracted from the specified CSV file. + + Raises: + Exception: If the file path is invalid or the file cannot be found. + """ + data = [] + + if not file_path.is_file(): + raise FileNotFoundError(f"Invalid file path: {file_path}") + + with file_path.open('r', newline='') as csv_file: + reader = csv.DictReader(csv_file) + fieldnames = reader.fieldnames + + if fieldnames is None: + raise ValueError("Fieldnames is None") + + if data_column_name not in fieldnames: + raise Exception(f"Column {data_column_name} not found on {file_path}") + for row in reader: + value = row.get(data_column_name) + if value is not None: + data.append(str(value)) + return data + + @classmethod + def data_to_csv(cls, data: List[Dict], output_file_path: Optional[str] = None) -> str: + """Converts a list of channel IDs into a CSV file. + + Parameters: + channels_ids (List[str]): List of channel IDs to be written to the CSV. + output_file_path (str, optional): Path to the file where the CSV will be saved. If not provided, the CSV will be returned as a string. + channel_id_column_name (str, optional): Name of the column in the CSV that will contain the channel IDs. + If not provided, the default value defined in ChannelId.CHANNEL_ID_COLUMN_NAME will be used. + + Returns: + str: The path of the created CSV file or, if no path is provided, the contents of the CSV as a string. + """ + if output_file_path: + output_path = Path(output_file_path) + if output_path.is_dir(): + command_name = cls.name.replace("-", "_") + timestamp = datetime.now().strftime("%M%S%f") + output_file_path = output_path / f"{command_name}_{timestamp}.csv" + + with (Path(output_file_path).open('w', newline='') if output_file_path else StringIO()) as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=list(data[0].keys()) if data else []) + writer.writeheader() + writer.writerows(data) + return str(output_file_path) if output_file_path else csv_file.getvalue() \ No newline at end of file diff --git a/youtool/commands/channel_id.py b/youtool/commands/channel_id.py new file mode 100644 index 0000000..0051dd2 --- /dev/null +++ b/youtool/commands/channel_id.py @@ -0,0 +1,97 @@ + +from pathlib import Path + +from youtool import YouTube + +from .base import Command + + +class ChannelId(Command): + """Get channel IDs from a list of URLs (or CSV filename with URLs inside), generate CSV output (just the IDs).""" + name = "channel-id" + arguments = [ + { + "name": "--urls", + "type": str, + "help": "Channels urls", + "nargs": "*", + "mutually_exclusive_group": "input_source" + }, + { + "name": "--urls-file-path", + "type": str, + "help": "Channels urls csv file path", + "mutually_exclusive_group": "input_source" + }, + {"name": "--output-file-path", "type": str, "help": "Output csv file path"}, + {"name": "--url-column-name", "type": str, "help": "URL column name on csv input files"}, + {"name": "--id-column-name", "type": str, "help": "Channel ID column name on csv output files"} + ] + + URL_COLUMN_NAME: str = "channel_url" + CHANNEL_ID_COLUMN_NAME: str = "channel_id" + + @classmethod + def execute(cls, **kwargs) -> str: + """Execute the channel-id command to fetch YouTube channel IDs from URLs and save them to a CSV file. + + This method retrieves YouTube channel IDs from a list of provided URLs or from a file containing URLs. + It then saves these channel IDs to a CSV file if an output file path is specified. + + Args: + urls (list[str], optional): A list of YouTube channel URLs. Either this or urls_file_path must be provided. + urls_file_path (str, optional): Path to a CSV file containing YouTube channel URLs. + Requires url_column_name to specify the column with URLs. + output_file_path (str, optional): Path to the output CSV file where channel IDs will be saved. + If not provided, the result will be returned as a string. + api_key (str): The API key to authenticate with the YouTube Data API. + url_column_name (str, optional): The name of the column in the urls_file_path CSV file that contains the URLs. + Default is "url". + id_column_name (str, optional): The name of the column for channel IDs in the output CSV file. + Default is "channel_id". + + Returns: + str: A message indicating the result of the command. If output_file_path is specified, the message will + include the path to the generated CSV file. Otherwise, it will return the result as a string. + + Raises: + Exception: If neither urls nor urls_file_path is provided. + """ + urls = kwargs.get("urls") + urls_file_path = kwargs.get("urls_file_path") + output_file_path = kwargs.get("output_file_path") + api_key = kwargs.get("api_key") + + url_column_name = kwargs.get("url_column_name") + id_column_name = kwargs.get("id_column_name") + + urls = cls.resolve_urls(urls, urls_file_path, url_column_name) + + youtube = YouTube([api_key], disable_ipv6=True) + + channels_ids = [ + youtube.channel_id_from_url(url) for url in urls if url + ] + + result = cls.data_to_csv( + data=[ + { + (id_column_name or cls.CHANNEL_ID_COLUMN_NAME): channel_id + } for channel_id in channels_ids + ], + output_file_path=output_file_path + ) + + return result + + @classmethod + def resolve_urls(cls, urls, urls_file_path, url_column_name): + if urls_file_path and not urls: + urls = cls.data_from_csv( + file_path=Path(urls_file_path), + data_column_name=url_column_name or cls.URL_COLUMN_NAME + ) + + if not urls: + raise Exception("Either 'username' or 'url' must be provided for the channel-id command") + return urls \ No newline at end of file From 89983c78447a840c4ebd79bbb85c3d254f298cd3 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Fri, 7 Nov 2025 19:14:48 -0300 Subject: [PATCH 03/18] Make lint --- youtool/cli.py | 6 +++--- youtool/commands/__init__.py | 10 ++++----- youtool/commands/base.py | 18 ++++++++-------- youtool/commands/channel_id.py | 39 ++++++++++++++-------------------- 4 files changed, 33 insertions(+), 40 deletions(-) diff --git a/youtool/cli.py b/youtool/cli.py index 1403bef..898daf0 100644 --- a/youtool/cli.py +++ b/youtool/cli.py @@ -22,7 +22,7 @@ def main(): parser = argparse.ArgumentParser(description="CLI Tool for managing YouTube videos add playlists") parser.add_argument("--api-key", type=str, help="YouTube API Key", dest="api_key") parser.add_argument("--debug", type=bool, help="Debug mode", dest="debug") - + subparsers = parser.add_subparsers(required=True, dest="command", title="Command", help="Command to be executed") for command in COMMANDS: @@ -33,7 +33,7 @@ def main(): if not args.api_key: parser.error("YouTube API Key is required") - + try: print(args.func(**args.__dict__)) except Exception as error: @@ -43,4 +43,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/youtool/commands/__init__.py b/youtool/commands/__init__.py index 7828ee3..eac5630 100644 --- a/youtool/commands/__init__.py +++ b/youtool/commands/__init__.py @@ -1,10 +1,10 @@ from .base import Command from .channel_id import ChannelId -COMMANDS = [ - ChannelId -] +COMMANDS = [ChannelId] __all__ = [ - "Command", "COMMANDS", "ChannelId", -] \ No newline at end of file + "Command", + "COMMANDS", + "ChannelId", +] diff --git a/youtool/commands/base.py b/youtool/commands/base.py index c481df2..cf3e7a9 100644 --- a/youtool/commands/base.py +++ b/youtool/commands/base.py @@ -1,19 +1,19 @@ -import csv import argparse - -from typing import List, Dict, Any, Optional +import csv +from datetime import datetime from io import StringIO from pathlib import Path -from datetime import datetime +from typing import Any, Dict, List, Optional class Command: """A base class for commands to inherit from, following a specific structure. - + Attributes: name (str): The name of the command. arguments (List[Dict[str, Any]]): A list of dictionaries, each representing an argument for the command. """ + name: str arguments: List[Dict[str, Any]] @@ -83,13 +83,13 @@ def data_from_csv(file_path: Path, data_column_name: Optional[str] = None) -> Li if not file_path.is_file(): raise FileNotFoundError(f"Invalid file path: {file_path}") - with file_path.open('r', newline='') as csv_file: + with file_path.open("r", newline="") as csv_file: reader = csv.DictReader(csv_file) fieldnames = reader.fieldnames if fieldnames is None: raise ValueError("Fieldnames is None") - + if data_column_name not in fieldnames: raise Exception(f"Column {data_column_name} not found on {file_path}") for row in reader: @@ -118,8 +118,8 @@ def data_to_csv(cls, data: List[Dict], output_file_path: Optional[str] = None) - timestamp = datetime.now().strftime("%M%S%f") output_file_path = output_path / f"{command_name}_{timestamp}.csv" - with (Path(output_file_path).open('w', newline='') if output_file_path else StringIO()) as csv_file: + with Path(output_file_path).open("w", newline="") if output_file_path else StringIO() as csv_file: writer = csv.DictWriter(csv_file, fieldnames=list(data[0].keys()) if data else []) writer.writeheader() writer.writerows(data) - return str(output_file_path) if output_file_path else csv_file.getvalue() \ No newline at end of file + return str(output_file_path) if output_file_path else csv_file.getvalue() diff --git a/youtool/commands/channel_id.py b/youtool/commands/channel_id.py index 0051dd2..916fe6d 100644 --- a/youtool/commands/channel_id.py +++ b/youtool/commands/channel_id.py @@ -1,4 +1,3 @@ - from pathlib import Path from youtool import YouTube @@ -8,24 +7,25 @@ class ChannelId(Command): """Get channel IDs from a list of URLs (or CSV filename with URLs inside), generate CSV output (just the IDs).""" + name = "channel-id" arguments = [ { - "name": "--urls", - "type": str, - "help": "Channels urls", - "nargs": "*", - "mutually_exclusive_group": "input_source" + "name": "--urls", + "type": str, + "help": "Channels urls", + "nargs": "*", + "mutually_exclusive_group": "input_source", }, { - "name": "--urls-file-path", - "type": str, - "help": "Channels urls csv file path", - "mutually_exclusive_group": "input_source" + "name": "--urls-file-path", + "type": str, + "help": "Channels urls csv file path", + "mutually_exclusive_group": "input_source", }, {"name": "--output-file-path", "type": str, "help": "Output csv file path"}, {"name": "--url-column-name", "type": str, "help": "URL column name on csv input files"}, - {"name": "--id-column-name", "type": str, "help": "Channel ID column name on csv output files"} + {"name": "--id-column-name", "type": str, "help": "Channel ID column name on csv output files"}, ] URL_COLUMN_NAME: str = "channel_url" @@ -69,17 +69,11 @@ def execute(cls, **kwargs) -> str: youtube = YouTube([api_key], disable_ipv6=True) - channels_ids = [ - youtube.channel_id_from_url(url) for url in urls if url - ] + channels_ids = [youtube.channel_id_from_url(url) for url in urls if url] result = cls.data_to_csv( - data=[ - { - (id_column_name or cls.CHANNEL_ID_COLUMN_NAME): channel_id - } for channel_id in channels_ids - ], - output_file_path=output_file_path + data=[{(id_column_name or cls.CHANNEL_ID_COLUMN_NAME): channel_id} for channel_id in channels_ids], + output_file_path=output_file_path, ) return result @@ -88,10 +82,9 @@ def execute(cls, **kwargs) -> str: def resolve_urls(cls, urls, urls_file_path, url_column_name): if urls_file_path and not urls: urls = cls.data_from_csv( - file_path=Path(urls_file_path), - data_column_name=url_column_name or cls.URL_COLUMN_NAME + file_path=Path(urls_file_path), data_column_name=url_column_name or cls.URL_COLUMN_NAME ) if not urls: raise Exception("Either 'username' or 'url' must be provided for the channel-id command") - return urls \ No newline at end of file + return urls From 62896c4279bd67e1c914331f71b363c5b38b7b83 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Fri, 7 Nov 2025 19:59:10 -0300 Subject: [PATCH 04/18] Add entry points for youtool CLI in setup configuration --- setup.cfg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/setup.cfg b/setup.cfg index 77478cb..36ff81d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,6 +24,10 @@ packages = find: python_requires = >=3.7 install_requires = file: requirements/base.txt +[options.entry_points] +console_scripts = + youtool = youtool.cli:main + [options.extras_require] cli = file: requirements/cli.txt dev = file: requirements/dev.txt From 660edaf5472911727bc271d160bac92dd41cb5c5 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Fri, 7 Nov 2025 20:51:34 -0300 Subject: [PATCH 05/18] Update execute method documentation for channel-id command to clarify input options and error handling --- youtool/commands/channel_id.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/youtool/commands/channel_id.py b/youtool/commands/channel_id.py index 916fe6d..4056a5e 100644 --- a/youtool/commands/channel_id.py +++ b/youtool/commands/channel_id.py @@ -35,13 +35,16 @@ class ChannelId(Command): def execute(cls, **kwargs) -> str: """Execute the channel-id command to fetch YouTube channel IDs from URLs and save them to a CSV file. - This method retrieves YouTube channel IDs from a list of provided URLs or from a file containing URLs. - It then saves these channel IDs to a CSV file if an output file path is specified. - - Args: - urls (list[str], optional): A list of YouTube channel URLs. Either this or urls_file_path must be provided. - urls_file_path (str, optional): Path to a CSV file containing YouTube channel URLs. - Requires url_column_name to specify the column with URLs. + This command retrieves YouTube channel IDs from one of two possible inputs: + - a list of YouTube channel URLs (`--urls`), or + - a CSV file containing those URLs (`--urls-file-path`). + + Args: + urls (list[str]): List of YouTube channel URLs. + Mutually exclusive with `urls_file_path`. + urls_file_path (str): Path to a CSV file containing YouTube channel URLs. + Mutually exclusive with `urls`. + Requires url_column_name to specify the column with URLs. output_file_path (str, optional): Path to the output CSV file where channel IDs will be saved. If not provided, the result will be returned as a string. api_key (str): The API key to authenticate with the YouTube Data API. @@ -55,7 +58,7 @@ def execute(cls, **kwargs) -> str: include the path to the generated CSV file. Otherwise, it will return the result as a string. Raises: - Exception: If neither urls nor urls_file_path is provided. + ValueError: If neither `urls` nor `urls_file_path` is provided, or if both are provided at the same time. """ urls = kwargs.get("urls") urls_file_path = kwargs.get("urls_file_path") From c1a0f333ad6c287da91ad00889b6fd5f5b9c37ad Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Fri, 7 Nov 2025 22:27:10 -0300 Subject: [PATCH 06/18] Add debug mode option to CLI and update argument types for channel ID command --- youtool/cli.py | 2 +- youtool/commands/channel_id.py | 27 +++++++++++++-------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/youtool/cli.py b/youtool/cli.py index 898daf0..517c150 100644 --- a/youtool/cli.py +++ b/youtool/cli.py @@ -21,7 +21,7 @@ def main(): """ parser = argparse.ArgumentParser(description="CLI Tool for managing YouTube videos add playlists") parser.add_argument("--api-key", type=str, help="YouTube API Key", dest="api_key") - parser.add_argument("--debug", type=bool, help="Debug mode", dest="debug") + parser.add_argument("--debug", help="Debug mode", dest="debug", default=False, action="store_true") subparsers = parser.add_subparsers(required=True, dest="command", title="Command", help="Command to be executed") diff --git a/youtool/commands/channel_id.py b/youtool/commands/channel_id.py index 4056a5e..5bf45ad 100644 --- a/youtool/commands/channel_id.py +++ b/youtool/commands/channel_id.py @@ -19,11 +19,11 @@ class ChannelId(Command): }, { "name": "--urls-file-path", - "type": str, + "type": Path, "help": "Channels urls csv file path", "mutually_exclusive_group": "input_source", }, - {"name": "--output-file-path", "type": str, "help": "Output csv file path"}, + {"name": "--output-file-path", "type": Path, "help": "Output csv file path"}, {"name": "--url-column-name", "type": str, "help": "URL column name on csv input files"}, {"name": "--id-column-name", "type": str, "help": "Channel ID column name on csv output files"}, ] @@ -42,16 +42,16 @@ def execute(cls, **kwargs) -> str: Args: urls (list[str]): List of YouTube channel URLs. Mutually exclusive with `urls_file_path`. - urls_file_path (str): Path to a CSV file containing YouTube channel URLs. + urls_file_path (Path): Path to a CSV file containing YouTube channel URLs. Mutually exclusive with `urls`. Requires url_column_name to specify the column with URLs. - output_file_path (str, optional): Path to the output CSV file where channel IDs will be saved. - If not provided, the result will be returned as a string. - api_key (str): The API key to authenticate with the YouTube Data API. - url_column_name (str, optional): The name of the column in the urls_file_path CSV file that contains the URLs. - Default is "url". - id_column_name (str, optional): The name of the column for channel IDs in the output CSV file. - Default is "channel_id". + output_file_path (Path, optional): Path to the output CSV file where channel IDs will be saved. + If not provided, the result will be returned as a string. + api_key (str): The API key to authenticate with the YouTube Data API. + url_column_name (str, optional): The name of the column in the urls_file_path CSV file that contains the URLs. + Default is "url". + id_column_name (str, optional): The name of the column for channel IDs in the output CSV file. + Default is "channel_id". Returns: str: A message indicating the result of the command. If output_file_path is specified, the message will @@ -60,7 +60,7 @@ def execute(cls, **kwargs) -> str: Raises: ValueError: If neither `urls` nor `urls_file_path` is provided, or if both are provided at the same time. """ - urls = kwargs.get("urls") + urls = kwargs.get("urls") or [] urls_file_path = kwargs.get("urls_file_path") output_file_path = kwargs.get("output_file_path") api_key = kwargs.get("api_key") @@ -83,11 +83,10 @@ def execute(cls, **kwargs) -> str: @classmethod def resolve_urls(cls, urls, urls_file_path, url_column_name): - if urls_file_path and not urls: - urls = cls.data_from_csv( + if urls_file_path: + urls += cls.data_from_csv( file_path=Path(urls_file_path), data_column_name=url_column_name or cls.URL_COLUMN_NAME ) - if not urls: raise Exception("Either 'username' or 'url' must be provided for the channel-id command") return urls From b6a65a79b94bec836eb1598206f3507d23088ad2 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Fri, 7 Nov 2025 22:30:30 -0300 Subject: [PATCH 07/18] Add ChannelInfo command to retrieve YouTube channel information and update command exports --- youtool/commands/__init__.py | 4 +- youtool/commands/channel_info.py | 143 +++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 youtool/commands/channel_info.py diff --git a/youtool/commands/__init__.py b/youtool/commands/__init__.py index eac5630..f827894 100644 --- a/youtool/commands/__init__.py +++ b/youtool/commands/__init__.py @@ -1,10 +1,12 @@ from .base import Command from .channel_id import ChannelId +from .channel_info import ChannelInfo -COMMANDS = [ChannelId] +COMMANDS = [ChannelId, ChannelInfo] __all__ = [ "Command", "COMMANDS", "ChannelId", + "ChannelInfo", ] diff --git a/youtool/commands/channel_info.py b/youtool/commands/channel_info.py new file mode 100644 index 0000000..57a83b1 --- /dev/null +++ b/youtool/commands/channel_info.py @@ -0,0 +1,143 @@ +from pathlib import Path +from typing import Dict, List, Optional, Self + +from youtool import YouTube + +from .base import Command + + +class ChannelInfo(Command): + """Get channel info from a list of IDs (or CSV filename with IDs inside), generate CSV output + (same schema for `channel` dicts) + """ + + name = "channel-info" + arguments = [ + { + "name": "--urls", + "type": str, + "help": "Channel URLs", + "nargs": "*", + "mutually_exclusive_group": "input_source", + }, + {"name": "--usernames", "type": str, "help": "Channel usernames", "nargs": "*"}, + {"name": "--ids", "type": str, "help": "Channel IDs", "nargs": "*"}, + { + "name": "--urls-file-path", + "type": Path, + "help": "Channel URLs CSV file path", + "mutually_exclusive_group": "input_source", + }, + {"name": "--usernames-file-path", "type": Path, "help": "Channel usernames CSV file path"}, + {"name": "--ids-file-path", "type": Path, "help": "Channel IDs CSV file path"}, + {"name": "--output-file-path", "type": Path, "help": "Output CSV file path"}, + {"name": "--url-column-name", "type": str, "help": "URL column name on CSV input files"}, + {"name": "--username-column-name", "type": str, "help": "Username column name on CSV input files"}, + {"name": "--id-column-name", "type": str, "help": "ID column name on CSV input files"}, + ] + + URL_COLUMN_NAME: str = "channel_url" + USERNAME_COLUMN_NAME: str = "channel_username" + ID_COLUMN_NAME: str = "channel_id" + INFO_COLUMNS: List[str] = [ + "id", + "title", + "description", + "published_at", + "view_count", + "subscriber_count", + "video_count", + ] + + @staticmethod + def filter_fields(channel_info: Dict, info_columns: Optional[List] = None): + """Filters the fields of a dictionary containing channel information based on + specified columns. + + Args: + channel_info (Dict): A dictionary containing channel information. + info_columns (Optional[List], optional): A list specifying which fields + to include in the filtered output. If None, returns the entire + channel_info dictionary. Defaults to None. + + Returns: + Dict: A dictionary containing only the fields specified in info_columns + (if provided) or the entire channel_info dictionary if info_columns is None. + """ + return ( + {field: value for field, value in channel_info.items() if field in info_columns} + if info_columns + else channel_info + ) + + @classmethod + def execute(cls: Self, **kwargs) -> str: + """Execute the channel-info command to fetch YouTube channel information from URLs or + usernames and save them to a CSV file. + + Args: + urls (list[str], optional): A list of YouTube channel URLs. If not provided, `urls_file_path` must be specified. + usernames (list[str], optional): A list of YouTube channel usernames. If not provided, `usernames_file_path` must be specified. + ids (list[str], optional): A list of YouTube channel IDs. If not provided, `ids_file_path` must be specified. + urls_file_path (str, optional): Path to a CSV file containing YouTube channel URLs. + usernames_file_path (str, optional): Path to a CSV file containing YouTube channel usernames. + output_file_path (str, optional): Path to the output CSV file where channel information will be saved. + ids_file_path (str, optional): Path to a CSV file containing YouTube channel IDs. + api_key (str): The API key to authenticate with the YouTube Data API. + url_column_name (str, optional): The name of the column in the `urls_file_path` CSV file that contains the URLs. + Default is "channel_url". + username_column_name (str, optional): The name of the column in the `usernames_file_path` CSV file that contains the usernames. + Default is "channel_username". + info_columns (str, optional): Comma-separated list of columns to include in the output CSV. + Default is the class attribute `INFO_COLUMNS`. + + Returns: + str: A message indicating the result of the command. If `output_file_path` is specified, the message will + include the path to the generated CSV file. Otherwise, it will return the result as a string. + + Raises: + Exception: If neither `urls`, `usernames`, `urls_file_path` nor `usernames_file_path` is provided. + """ + + urls = kwargs.get("urls") + usernames = kwargs.get("usernames") + ids = kwargs.get("ids") + urls_file_path = kwargs.get("urls_file_path") + usernames_file_path = kwargs.get("usernames_file_path") + output_file_path = kwargs.get("output_file_path") + id_file_path = kwargs.get("ids_file_path") + api_key = kwargs.get("api_key") + + url_column_name = kwargs.get("url_column_name") or ChannelInfo.URL_COLUMN_NAME + username_column_name = kwargs.get("username_column_name") or ChannelInfo.USERNAME_COLUMN_NAME + id_column_name = kwargs.get("id_column_name") or ChannelInfo.ID_COLUMN_NAME + info_columns = kwargs.get("info_columns") + + info_columns = ( + [column.strip() for column in info_columns.split(",")] if info_columns else ChannelInfo.INFO_COLUMNS + ) + + if urls_file_path and not urls: + urls = ChannelInfo.data_from_csv(urls_file_path, url_column_name) + if usernames_file_path and not usernames: + usernames = ChannelInfo.data_from_csv(usernames_file_path, username_column_name) + if id_file_path and not ids: + ids = ChannelInfo.data_from_csv(id_file_path, id_column_name) + + if not urls and not usernames: + raise Exception("Either 'urls' or 'usernames' must be provided for the channel-info command") + + youtube = YouTube([api_key], disable_ipv6=True) + + channels_ids = [youtube.channel_id_from_url(url) for url in (urls or []) if url] + [ + youtube.channel_id_from_username(username) for username in (usernames or []) if username + ] + (ids or []) + channel_ids = list(set([channel_id for channel_id in channels_ids if channel_id])) + + return cls.data_to_csv( + data=[ + ChannelInfo.filter_fields(channel_info, info_columns) + for channel_info in (youtube.channels_infos(channel_ids) or []) + ], + output_file_path=output_file_path, + ) From 0c1db6d8aa5d902a6fbb4e016e6b61eafc257723 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Fri, 7 Nov 2025 22:49:23 -0300 Subject: [PATCH 08/18] Make lint --- youtool/commands/channel_info.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtool/commands/channel_info.py b/youtool/commands/channel_info.py index 57a83b1..b27d44c 100644 --- a/youtool/commands/channel_info.py +++ b/youtool/commands/channel_info.py @@ -129,9 +129,11 @@ def execute(cls: Self, **kwargs) -> str: youtube = YouTube([api_key], disable_ipv6=True) - channels_ids = [youtube.channel_id_from_url(url) for url in (urls or []) if url] + [ - youtube.channel_id_from_username(username) for username in (usernames or []) if username - ] + (ids or []) + channels_ids = ( + [youtube.channel_id_from_url(url) for url in (urls or []) if url] + + [youtube.channel_id_from_username(username) for username in (usernames or []) if username] + + (ids or []) + ) channel_ids = list(set([channel_id for channel_id in channels_ids if channel_id])) return cls.data_to_csv( From 3d44ac9e04128ddc1ac0705f7300ef7bf2a214ff Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Tue, 11 Nov 2025 23:44:29 -0300 Subject: [PATCH 09/18] Refactor ChannelInfo command to improve argument handling and ensure proper input validation --- youtool/commands/channel_info.py | 34 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/youtool/commands/channel_info.py b/youtool/commands/channel_info.py index b27d44c..5962774 100644 --- a/youtool/commands/channel_info.py +++ b/youtool/commands/channel_info.py @@ -79,10 +79,10 @@ def execute(cls: Self, **kwargs) -> str: urls (list[str], optional): A list of YouTube channel URLs. If not provided, `urls_file_path` must be specified. usernames (list[str], optional): A list of YouTube channel usernames. If not provided, `usernames_file_path` must be specified. ids (list[str], optional): A list of YouTube channel IDs. If not provided, `ids_file_path` must be specified. - urls_file_path (str, optional): Path to a CSV file containing YouTube channel URLs. - usernames_file_path (str, optional): Path to a CSV file containing YouTube channel usernames. - output_file_path (str, optional): Path to the output CSV file where channel information will be saved. - ids_file_path (str, optional): Path to a CSV file containing YouTube channel IDs. + urls_file_path (Path, optional): Path to a CSV file containing YouTube channel URLs. + usernames_file_path (Path, optional): Path to a CSV file containing YouTube channel usernames. + output_file_path (Path, optional): Path to the output CSV file where channel information will be saved. + ids_file_path (Path, optional): Path to a CSV file containing YouTube channel IDs. api_key (str): The API key to authenticate with the YouTube Data API. url_column_name (str, optional): The name of the column in the `urls_file_path` CSV file that contains the URLs. Default is "channel_url". @@ -99,13 +99,13 @@ def execute(cls: Self, **kwargs) -> str: Exception: If neither `urls`, `usernames`, `urls_file_path` nor `usernames_file_path` is provided. """ - urls = kwargs.get("urls") - usernames = kwargs.get("usernames") - ids = kwargs.get("ids") + urls = kwargs.get("urls") or [] + usernames = kwargs.get("usernames") or [] + ids = kwargs.get("ids") or [] urls_file_path = kwargs.get("urls_file_path") usernames_file_path = kwargs.get("usernames_file_path") output_file_path = kwargs.get("output_file_path") - id_file_path = kwargs.get("ids_file_path") + ids_file_path = kwargs.get("ids_file_path") api_key = kwargs.get("api_key") url_column_name = kwargs.get("url_column_name") or ChannelInfo.URL_COLUMN_NAME @@ -117,15 +117,15 @@ def execute(cls: Self, **kwargs) -> str: [column.strip() for column in info_columns.split(",")] if info_columns else ChannelInfo.INFO_COLUMNS ) - if urls_file_path and not urls: - urls = ChannelInfo.data_from_csv(urls_file_path, url_column_name) - if usernames_file_path and not usernames: - usernames = ChannelInfo.data_from_csv(usernames_file_path, username_column_name) - if id_file_path and not ids: - ids = ChannelInfo.data_from_csv(id_file_path, id_column_name) + if urls_file_path: + urls += ChannelInfo.data_from_csv(urls_file_path, url_column_name) + if usernames_file_path: + usernames += ChannelInfo.data_from_csv(usernames_file_path, username_column_name) + if ids_file_path: + ids += ChannelInfo.data_from_csv(ids_file_path, id_column_name) - if not urls and not usernames: - raise Exception("Either 'urls' or 'usernames' must be provided for the channel-info command") + if not urls and not usernames and not ids: + raise Exception("Either 'urls', 'usernames', or 'ids' must be provided for the channel-info command") youtube = YouTube([api_key], disable_ipv6=True) @@ -135,11 +135,11 @@ def execute(cls: Self, **kwargs) -> str: + (ids or []) ) channel_ids = list(set([channel_id for channel_id in channels_ids if channel_id])) - return cls.data_to_csv( data=[ ChannelInfo.filter_fields(channel_info, info_columns) for channel_info in (youtube.channels_infos(channel_ids) or []) + if channel_info ], output_file_path=output_file_path, ) From 65b07e1422614b493c7d397d484cc3dbf26ea272 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Tue, 11 Nov 2025 23:52:15 -0300 Subject: [PATCH 10/18] Add VideoInfo command to retrieve YouTube video information and update command exports --- youtool/commands/__init__.py | 6 +- youtool/commands/base.py | 38 +++++++++++- youtool/commands/video_info.py | 107 +++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 2 deletions(-) create mode 100644 youtool/commands/video_info.py diff --git a/youtool/commands/__init__.py b/youtool/commands/__init__.py index f827894..58cda5e 100644 --- a/youtool/commands/__init__.py +++ b/youtool/commands/__init__.py @@ -1,12 +1,16 @@ +from typing import List + from .base import Command from .channel_id import ChannelId from .channel_info import ChannelInfo +from .video_info import VideoInfo -COMMANDS = [ChannelId, ChannelInfo] +COMMANDS: List[Command] = [ChannelId, ChannelInfo, VideoInfo] __all__ = [ "Command", "COMMANDS", "ChannelId", "ChannelInfo", + "VideoInfo", ] diff --git a/youtool/commands/base.py b/youtool/commands/base.py index cf3e7a9..1d4b628 100644 --- a/youtool/commands/base.py +++ b/youtool/commands/base.py @@ -4,6 +4,7 @@ from io import StringIO from pathlib import Path from typing import Any, Dict, List, Optional +from urllib.parse import parse_qs, urlparse class Command: @@ -17,6 +18,20 @@ class Command: name: str arguments: List[Dict[str, Any]] + @staticmethod + def video_id_from_url(video_url: str) -> Optional[str]: + """Extracts the video ID from a YouTube URL. + + Args: + url (str): The YouTube video URL. + + Returns: + Optional[str]: The extracted video ID, or None if not found. + """ + parsed_url = urlparse(video_url) + parsed_url_query = dict(parse_qs(parsed_url.query)) + return parsed_url_query.get("v") + @classmethod def generate_parser(cls, subparsers: argparse._SubParsersAction): """Creates a parser for the command and adds it to the subparsers. @@ -52,8 +67,29 @@ def parse_arguments(cls, subparsers: argparse._SubParsersAction) -> None: parser.add_argument(argument_name, **argument_copy) parser.set_defaults(func=cls.execute) + @staticmethod + def filter_fields(video_info: Dict, info_columns: Optional[List] = None): + """Filters the fields of a dictionary containing video information based on + specified columns. + + Args: + video_info (Dict): A dictionary containing video information. + info_columns (Optional[List], optional): A list specifying which fields + to include in the filtered output. If None, returns the entire + video_info dictionary. Defaults to None. + + Returns: + Dict: A dictionary containing only the fields specified in info_columns + (if provided) or the entire video_info dictionary if info_columns is None. + """ + return ( + {field: value for field, value in video_info.items() if field in info_columns} + if info_columns + else video_info + ) + @classmethod - def execute(cls, **kwargs) -> str: # noqa: D417 + def execute(cls, **kwargs) -> str: """Executes the command. This method should be overridden by subclasses to define the command's behavior. diff --git a/youtool/commands/video_info.py b/youtool/commands/video_info.py new file mode 100644 index 0000000..dc4b6f9 --- /dev/null +++ b/youtool/commands/video_info.py @@ -0,0 +1,107 @@ +from pathlib import Path +from typing import List, Self + +from youtool import YouTube + +from .base import Command + + +class VideoInfo(Command): + """Get video info from a list of IDs or URLs (or CSV filename with URLs/IDs inside), generate CSV output (same schema for video dicts)")""" + + name = "video-info" + arguments = [ + {"name": "--ids", "type": str, "help": "Video IDs", "nargs": "*", "mutually_exclusive_group": "input_source"}, + {"name": "--urls", "type": str, "help": "Video URLs", "nargs": "*", "mutually_exclusive_group": "input_source"}, + { + "name": "--urls-file-path", + "type": Path, + "help": "Channels urls csv file path", + "mutually_exclusive_group": "input_source", + }, + {"name": "--ids-file-path", "type": Path, "help": "Channel IDs CSV file path"}, + {"name": "--output-file-path", "type": Path, "help": "Output CSV file path"}, + {"name": "--url_column_name", "type": str, "help": "URL column name on CSV input files"}, + {"name": "--id_column_name", "type": str, "help": "ID column name on CSV input files"}, + {"name": "--info_columns", "type": str, "help": "Comma-separated list of columns to include in the output CSV"}, + ] + + ID_COLUMN_NAME: str = "video_id" + URL_COLUMN_NAME: str = "video_url" + INFO_COLUMNS: List[str] = [ + "id", + "title", + "description", + "published_at", + "view_count", + "like_count", + "comment_count", + ] + + @classmethod + def execute(cls: Self, **kwargs) -> str: + """ + Execute the video-info command to fetch YouTube video information from IDs or URLs and save them to a CSV file. + + - a list of YouTube video IDs (`--ids`), or + - a list of YouTube video URLs (`--urls`), or + - a CSV file containing those URLs (`--urls-file-path`) or IDs (`--ids-file-path`). + + Args: + ids (list[str], optional): List of YouTube video IDs. + Mutually exclusive with `urls` and `input_file_path`. + urls (list[str], optional): List of YouTube video URLs. + Mutually exclusive with `ids` and `input_file_path`. + urls_file_path (Path, optional): Path to a CSV file containing YouTube video URLs. + ids_file_path (Path, optional): Path to a CSV file containing YouTube video IDs. + output_file_path (Path, optional): Path to the output CSV file where video information will be saved. + api_key (str): The API key to authenticate with the YouTube Data API. + url_column_name (str, optional): The name of the column in the input_file_path CSV file that contains the URLs. + Default is "video_url". + id_column_name (str, optional): The name of the column in the input_file_path CSV file that contains the IDs. + Default is "video_id". + info_columns (str, optional): Comma-separated list of columns to include in the output CSV. + Default is the class attribute INFO_COLUMNS. + + Returns: + str: A message indicating the result of the command. If output_file_path is specified, the message will + include the path to the generated CSV file. Otherwise, it will return the result as a string. + + Raises: + Exception: If neither ids, urls, nor input_file_path is provided. + """ + ids = kwargs.get("ids") or [] + urls = kwargs.get("urls") or [] + ids_file_path = kwargs.get("ids_file_path") + urls_file_path = kwargs.get("urls_file_path") + output_file_path = kwargs.get("output_file_path") + api_key = kwargs.get("api_key") + + url_column_name = kwargs.get("url_column_name") or VideoInfo.URL_COLUMN_NAME + id_column_name = kwargs.get("id_column_name") or VideoInfo.ID_COLUMN_NAME + + info_columns = kwargs.get("info_columns") + + info_columns = ( + [column.strip() for column in info_columns.split(",")] if info_columns else VideoInfo.INFO_COLUMNS + ) + if ids_file_path: + ids += cls.data_from_csv(ids_file_path, id_column_name) + if urls_file_path: + urls += cls.data_from_csv(urls_file_path, url_column_name) + + if not ids and not urls: + raise Exception("Either 'ids', 'urls' must be provided for the video-info command") + + youtube = YouTube([api_key], disable_ipv6=True) + + if urls: + ids += sum([cls.video_id_from_url(url) for url in urls], []) + + # Remove duplicated + ids = list(set(ids)) + videos_infos = list(youtube.videos_infos([_id for _id in ids if _id])) + return cls.data_to_csv( + data=[VideoInfo.filter_fields(video_info, info_columns) for video_info in videos_infos], + output_file_path=output_file_path, + ) From 95ff0bccea7613eec9d378892350ec11453e79e4 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Wed, 12 Nov 2025 00:15:38 -0300 Subject: [PATCH 11/18] Refactor filter_fields from ChannelInfo to base.py for reuse across commands --- youtool/commands/channel_info.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/youtool/commands/channel_info.py b/youtool/commands/channel_info.py index 5962774..6e76ed4 100644 --- a/youtool/commands/channel_info.py +++ b/youtool/commands/channel_info.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, List, Optional, Self +from typing import List, Self from youtool import YouTube @@ -49,27 +49,6 @@ class ChannelInfo(Command): "video_count", ] - @staticmethod - def filter_fields(channel_info: Dict, info_columns: Optional[List] = None): - """Filters the fields of a dictionary containing channel information based on - specified columns. - - Args: - channel_info (Dict): A dictionary containing channel information. - info_columns (Optional[List], optional): A list specifying which fields - to include in the filtered output. If None, returns the entire - channel_info dictionary. Defaults to None. - - Returns: - Dict: A dictionary containing only the fields specified in info_columns - (if provided) or the entire channel_info dictionary if info_columns is None. - """ - return ( - {field: value for field, value in channel_info.items() if field in info_columns} - if info_columns - else channel_info - ) - @classmethod def execute(cls: Self, **kwargs) -> str: """Execute the channel-info command to fetch YouTube channel information from URLs or From bcc65a553e7b64fe67985c053a310c56db7cc82e Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Thu, 13 Nov 2025 14:27:05 -0300 Subject: [PATCH 12/18] Add VideoSearch command to retrieve and export YouTube video information --- youtool/commands/__init__.py | 4 +- youtool/commands/video_search.py | 105 +++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 youtool/commands/video_search.py diff --git a/youtool/commands/__init__.py b/youtool/commands/__init__.py index 58cda5e..15aeb5e 100644 --- a/youtool/commands/__init__.py +++ b/youtool/commands/__init__.py @@ -4,8 +4,9 @@ from .channel_id import ChannelId from .channel_info import ChannelInfo from .video_info import VideoInfo +from .video_search import VideoSearch -COMMANDS: List[Command] = [ChannelId, ChannelInfo, VideoInfo] +COMMANDS: List[Command] = [ChannelId, ChannelInfo, VideoInfo, VideoSearch] __all__ = [ "Command", @@ -13,4 +14,5 @@ "ChannelId", "ChannelInfo", "VideoInfo", + "VideoSearch", ] diff --git a/youtool/commands/video_search.py b/youtool/commands/video_search.py new file mode 100644 index 0000000..e1da78e --- /dev/null +++ b/youtool/commands/video_search.py @@ -0,0 +1,105 @@ +from pathlib import Path +from typing import List, Self + +from youtool import YouTube + +from .base import Command + + +class VideoSearch(Command): + """ + Search video info from a list of IDs or URLs (or CSV filename with URLs/IDs inside), + generate CSV output (simplified video dict schema or option to get full video info) + """ + + name = "video-search" + arguments = [ + {"name": "--ids", "type": str, "help": "Video IDs", "nargs": "*", "mutually_exclusive_group": "input_source"}, + {"name": "--urls", "type": str, "help": "Video URLs", "nargs": "*", "mutually_exclusive_group": "input_source"}, + { + "name": "--ids-file-path", + "type": Path, + "help": "Channel IDs CSV file path", + "mutually_exclusive_group": "input_source", + }, + { + "name": "--urls-file-path", + "type": Path, + "help": "Channels urls csv file path", + "mutually_exclusive_group": "input_source", + }, + {"name": "--output-file-path", "type": Path, "help": "Output CSV file path"}, + {"name": "--url_column_name", "type": str, "help": "URL column name on csv input files"}, + {"name": "--id_column_name", "type": str, "help": "Channel ID column name on csv output files"}, + {"name": "--info_columns", "type": str, "help": "Comma-separated list of columns to include in the output CSV"}, + {"name": "--full-info", "action": "store_true", "help": "Option to get full video info", "default": False}, + ] + + ID_COLUMN_NAME: str = "video_id" + URL_COLUMN_NAME: str = "video_url" + INFO_COLUMNS: List[str] = ["id", "title", "published_at", "views"] + FULL_INFO_COLUMNS: List[str] = INFO_COLUMNS + ["description", "like_count", "comment_count"] + + @classmethod + def execute(cls: Self, **kwargs) -> str: + """ + Execute the video-search command to fetch YouTube video information from IDs or URLs and save them to a CSV file. + + - a list of YouTube video IDs (`--ids`), or + - a list of YouTube video URLs (`--urls`), or + - a CSV file containing those URLs (`--urls-file-path`) or IDs (`--ids-file-path`). + + Args: + ids (list[str], optional): A list of YouTube video IDs. If not provided, input_file_path must be specified. + urls (list[str], optional): A list of YouTube video URLs. If not provided, input_file_path must be specified. + ids_file_path (Path, optional): Path to a CSV file containing YouTube video IDs. + urls_file_path (Path, optional): Path to a CSV file containing YouTube video URLs. + output_file_path (Path, optional): Path to the output CSV file where video information will be saved. + api_key (str): The API key to authenticate with the YouTube Data API. + full_info (bool, optional): Flag to indicate whether to get full video info. Default is False. + url_column_name (str, optional): The name of the column in the input CSV file that contains the URLs. Default is "video_url". + id_column_name (str, optional): The name of the column in the input CSV file that contains the IDs. Default is "video_id". + + Returns: + str: A message indicating the result of the command. If output_file_path is specified, + the message will include the path to the generated CSV file. + Otherwise, it will return the result as a string. + + Raises: + Exception: If neither ids, urls, nor input_file_path is provided. + """ + ids = kwargs.get("ids") or [] + urls = kwargs.get("urls") or [] + ids_file_path = kwargs.get("ids_file_path") + urls_file_path = kwargs.get("urls_file_path") + output_file_path = kwargs.get("output_file_path") + api_key = kwargs.get("api_key") + + url_column_name = kwargs.get("url_column_name") or VideoSearch.URL_COLUMN_NAME + id_column_name = kwargs.get("id_column_name") or VideoSearch.ID_COLUMN_NAME + + info_columns = kwargs.get("info_columns") + full_info = kwargs.get("full_info", False) + + info_columns = VideoSearch.FULL_INFO_COLUMNS if full_info else VideoSearch.INFO_COLUMNS + + if ids_file_path: + ids += cls.data_from_csv(ids_file_path, id_column_name) + if urls_file_path: + urls += cls.data_from_csv(urls_file_path, url_column_name) + + if not ids and not urls: + raise Exception("Either ids, urls, ids_file_path or urls_file_path must be provided") + + youtube = YouTube([api_key], disable_ipv6=True) + + if urls: + ids += sum([cls.video_id_from_url(url) for url in urls], []) + + # Remove duplicated + ids = list(set(ids)) + videos_infos = list(youtube.videos_infos([_id for _id in ids if _id])) + return cls.data_to_csv( + data=[VideoSearch.filter_fields(video_info, info_columns) for video_info in videos_infos], + output_file_path=output_file_path, + ) From bf0eebe06a8e6b190386e3771dbf553a255f3300 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Thu, 13 Nov 2025 20:11:50 -0300 Subject: [PATCH 13/18] Add VideoComments command to retrieve and export YouTube video comments --- youtool/commands/__init__.py | 4 ++- youtool/commands/video_comments.py | 46 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 youtool/commands/video_comments.py diff --git a/youtool/commands/__init__.py b/youtool/commands/__init__.py index 15aeb5e..77326d9 100644 --- a/youtool/commands/__init__.py +++ b/youtool/commands/__init__.py @@ -3,10 +3,11 @@ from .base import Command from .channel_id import ChannelId from .channel_info import ChannelInfo +from .video_comments import VideoComments from .video_info import VideoInfo from .video_search import VideoSearch -COMMANDS: List[Command] = [ChannelId, ChannelInfo, VideoInfo, VideoSearch] +COMMANDS: List[Command] = [ChannelId, ChannelInfo, VideoInfo, VideoSearch, VideoComments] __all__ = [ "Command", @@ -15,4 +16,5 @@ "ChannelInfo", "VideoInfo", "VideoSearch", + "VideoComments", ] diff --git a/youtool/commands/video_comments.py b/youtool/commands/video_comments.py new file mode 100644 index 0000000..820b449 --- /dev/null +++ b/youtool/commands/video_comments.py @@ -0,0 +1,46 @@ +from typing import List, Self + +from youtool import YouTube + +from .base import Command + + +class VideoComments(Command): + """ + Get comments from a video ID, generate CSV output + """ + + name = "video-comments" + arguments = [ + {"name": "--ids", "type": str, "help": "Video ID", "required": True}, + {"name": "--output-file-path", "type": str, "help": "Output CSV file path"}, + ] + + COMMENT_COLUMNS: List[str] = ["comment_id", "author_display_name", "text_display", "like_count", "published_at"] + + @classmethod + def execute(cls: Self, **kwargs) -> str: + """ + Execute the get-comments command to fetch comments from a YouTube video and save them to a CSV file. + + - a YouTube video ID (`--ids`). + + Args: + ids (str): The ID of the YouTube video. + output_file_path (str): Path to the output CSV file where comments will be saved. + api_key (str): The API key to authenticate with the YouTube Data API. + + Returns: + A message indicating the result of the command. If output_file_path is specified, + the message will include the path to the generated CSV file. + Otherwise, it will return the result as a string. + """ + ids = kwargs.get("ids") + output_file_path = kwargs.get("output_file_path") + api_key = kwargs.get("api_key") + + youtube = YouTube([api_key], disable_ipv6=True) + + comments = list(youtube.video_comments(ids)) + + return cls.data_to_csv(data=comments, output_file_path=output_file_path) From 08e5b3b251877d3377b3975e4825f4f547056438 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Fri, 14 Nov 2025 11:57:06 -0300 Subject: [PATCH 14/18] Update output file path type to Path in VideoComments command --- youtool/commands/video_comments.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtool/commands/video_comments.py b/youtool/commands/video_comments.py index 820b449..50ee459 100644 --- a/youtool/commands/video_comments.py +++ b/youtool/commands/video_comments.py @@ -1,3 +1,5 @@ +from pathlib import Path + from typing import List, Self from youtool import YouTube @@ -13,7 +15,7 @@ class VideoComments(Command): name = "video-comments" arguments = [ {"name": "--ids", "type": str, "help": "Video ID", "required": True}, - {"name": "--output-file-path", "type": str, "help": "Output CSV file path"}, + {"name": "--output-file-path", "type": Path, "help": "Output CSV file path"}, ] COMMENT_COLUMNS: List[str] = ["comment_id", "author_display_name", "text_display", "like_count", "published_at"] @@ -27,7 +29,7 @@ def execute(cls: Self, **kwargs) -> str: Args: ids (str): The ID of the YouTube video. - output_file_path (str): Path to the output CSV file where comments will be saved. + output_file_path (Path): Path to the output CSV file where comments will be saved. api_key (str): The API key to authenticate with the YouTube Data API. Returns: From 598f4e4e2d106304222a534fa39fd62d205a0c49 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Fri, 21 Nov 2025 12:34:26 -0300 Subject: [PATCH 15/18] Make lint --- youtool/commands/video_comments.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtool/commands/video_comments.py b/youtool/commands/video_comments.py index 50ee459..468158e 100644 --- a/youtool/commands/video_comments.py +++ b/youtool/commands/video_comments.py @@ -1,5 +1,4 @@ from pathlib import Path - from typing import List, Self from youtool import YouTube From 08cc8d4dfe2ce2d1f56ecb169eabc590eb7fcff0 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Fri, 14 Nov 2025 12:35:38 -0300 Subject: [PATCH 16/18] Add VideoLiveChat command to retrieve and export live chat comments from YouTube videos --- youtool/commands/__init__.py | 4 +- youtool/commands/video_livechat.py | 78 ++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 youtool/commands/video_livechat.py diff --git a/youtool/commands/__init__.py b/youtool/commands/__init__.py index 77326d9..a7dd476 100644 --- a/youtool/commands/__init__.py +++ b/youtool/commands/__init__.py @@ -5,9 +5,10 @@ from .channel_info import ChannelInfo from .video_comments import VideoComments from .video_info import VideoInfo +from .video_livechat import VideoLiveChat from .video_search import VideoSearch -COMMANDS: List[Command] = [ChannelId, ChannelInfo, VideoInfo, VideoSearch, VideoComments] +COMMANDS: List[Command] = [ChannelId, ChannelInfo, VideoInfo, VideoSearch, VideoComments, VideoLiveChat] __all__ = [ "Command", @@ -17,4 +18,5 @@ "VideoInfo", "VideoSearch", "VideoComments", + "VideoLiveChat", ] diff --git a/youtool/commands/video_livechat.py b/youtool/commands/video_livechat.py new file mode 100644 index 0000000..5eacfb6 --- /dev/null +++ b/youtool/commands/video_livechat.py @@ -0,0 +1,78 @@ +from datetime import datetime +from pathlib import Path +from typing import List, Optional, Self + +from youtool import YouTube + +from .base import Command + + +class VideoLiveChat(Command): + """Get live chat comments from a video ID, generate CSV output (same schema for chat_message dicts)""" + + name = "video-livechat" + arguments = [ + {"name": "--ids", "type": str, "help": "Video ID", "required": True}, + {"name": "--output-file-path", "type": Path, "help": "Output CSV file path"}, + {"name": "--expand-emojis", "action": "store_true", "help": "Expand emojis in chat messages"}, + ] + + CHAT_COLUMNS: List[str] = [ + "id", + "video_id", + "created_at", + "type", + "action", + "video_time", + "author", + "author_id", + "author_image_url", + "text", + "money_currency", + "money_amount", + ] + + @staticmethod + def parse_timestamp(timestamp: str) -> str: + try: + return datetime.fromisoformat(timestamp.replace("Z", "")).strftime("%Y-%m-%d %H:%M:%S") + except Exception: + return timestamp + + @staticmethod + def parse_decimal(value: Optional[str]) -> Optional[float]: + if value is None: + return None + try: + return float(str(value).replace(",", "")) + except Exception: + return None + + @classmethod + def execute(cls: Self, **kwargs) -> str: + """ + Execute the video-livechat command to fetch live chat messages from a YouTube video and save them to a CSV file. + + - a YouTube video ID (`--ids`). + + Args: + ids (str): The ID of the YouTube video. + output_file_path (Path): Path to the output CSV file where chat messages will be saved. + expand_emojis (bool): Whether to expand emojis in chat messages. Defaults to True. + api_key (str): The API key to authenticate with the YouTube Data API. + + Returns: + A message indicating the result of the command. If output_file_path is specified, + the message will include the path to the generated CSV file. + Otherwise, it will return the result as a string. + """ + ids = kwargs.get("ids") + output_file_path = kwargs.get("output_file_path") + expand_emojis = kwargs.get("expand_emojis", True) + api_key = kwargs.get("api_key") + + youtube = YouTube([api_key], disable_ipv6=True) + + chat_messages = list(youtube.video_livechat(ids, expand_emojis)) + + return cls.data_to_csv(data=chat_messages, output_file_path=output_file_path) From 049ac9062e877c53f0b873e3d275b1910c69086b Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Fri, 21 Nov 2025 12:05:11 -0300 Subject: [PATCH 17/18] Add VideoTranscription command to download and save YouTube video transcriptions; Updated version requests lib --- requirements/base.txt | 2 +- youtool/commands/__init__.py | 12 +++- youtool/commands/video_transcription.py | 88 +++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 youtool/commands/video_transcription.py diff --git a/requirements/base.txt b/requirements/base.txt index ea93b32..a0305f9 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,2 +1,2 @@ isodate -requests +requests==2.32.4 diff --git a/youtool/commands/__init__.py b/youtool/commands/__init__.py index a7dd476..175b783 100644 --- a/youtool/commands/__init__.py +++ b/youtool/commands/__init__.py @@ -7,8 +7,17 @@ from .video_info import VideoInfo from .video_livechat import VideoLiveChat from .video_search import VideoSearch +from .video_transcription import VideoTranscription -COMMANDS: List[Command] = [ChannelId, ChannelInfo, VideoInfo, VideoSearch, VideoComments, VideoLiveChat] +COMMANDS: List[Command] = [ + ChannelId, + ChannelInfo, + VideoInfo, + VideoSearch, + VideoComments, + VideoLiveChat, + VideoTranscription, +] __all__ = [ "Command", @@ -19,4 +28,5 @@ "VideoSearch", "VideoComments", "VideoLiveChat", + "VideoTranscription", ] diff --git a/youtool/commands/video_transcription.py b/youtool/commands/video_transcription.py new file mode 100644 index 0000000..fdcebdf --- /dev/null +++ b/youtool/commands/video_transcription.py @@ -0,0 +1,88 @@ +from pathlib import Path + +from youtool import YouTube + +from .base import Command + + +class VideoTranscription(Command): + """Download video transcriptions from YouTube videos based on IDs or URLs (or CSV filename with URLs/IDs inside), and save them to files.""" + + name = "video-transcription" + arguments = [ + {"name": "--ids", "type": str, "help": "Video IDs", "nargs": "*", "mutually_exclusive_group": "input_source"}, + {"name": "--urls", "type": str, "help": "Video URLs", "nargs": "*", "mutually_exclusive_group": "input_source"}, + { + "name": "--urls-file-path", + "type": Path, + "help": "Channels urls csv file path", + "mutually_exclusive_group": "input_source", + }, + {"name": "--ids-file-path", "type": Path, "help": "Channel IDs CSV file path", "mutually_exclusive_group": "input_source"}, + {"name": "--output-dir", "type": Path, "help": "Output directory to save transcriptions", "required": True}, + {"name": "--language-code", "type": str, "help": "Language code for transcription", "required": True}, + {"name": "--url_column_name", "type": str, "help": "URL column name on CSV input files"}, + {"name": "--id_column_name", "type": str, "help": "ID column name on CSV input files"}, + ] + + ID_COLUMN_NAME: str = "video_id" + URL_COLUMN_NAME: str = "video_url" + + @classmethod + def execute(cls, **kwargs) -> str: + """Execute the video-transcription command to download transcriptions of videos from IDs or URLs and save them to a CSV file. + + - a list of YouTube video IDs (`--ids`), or + - a list of YouTube video URLs (`--urls`), or + - a CSV file containing those URLs (`--urls-file-path`) or IDs (`--ids-file-path`). + + Args: + ids (list[str], optional): List of YouTube video IDs. + Mutually exclusive with `urls` and `input_file_path`. + urls (list[str], optional): List of YouTube video URLs. + Mutually exclusive with `ids` and `input_file_path`. + urls_file_path (Path, optional): Path to a CSV file containing YouTube video URLs. + ids_file_path (Path, optional): Path to a CSV file containing YouTube video IDs. + output_dir (Path, optional): Path to the output CSV file where video information will be saved. + language_code (str): Language code for the transcription language. + api_key (str): The API key to authenticate with the YouTube Data API. + url_column_name (str, optional): Column name for URLs in the CSV input file. Defaults to "video_url". + id_column_name (str, optional): Column name for IDs in the CSV output file. Defaults to "video_id". + + Returns: + str: A message indicating the result of the command. Reports success or failure for each video transcription download. + """ + ids = kwargs.get("ids") or [] + urls = kwargs.get("urls") or [] + ids_file_path = kwargs.get("ids_file_path") + urls_file_path = kwargs.get("urls_file_path") + output_dir = kwargs.get("output_dir") + language_code = kwargs.get("language_code") + api_key = kwargs.get("api_key") + + url_column_name = kwargs.get("url_column_name") or VideoTranscription.URL_COLUMN_NAME + id_column_name = kwargs.get("id_column_name") or VideoTranscription.ID_COLUMN_NAME + + youtube = YouTube([api_key], disable_ipv6=True) + + if ids_file_path: + ids += cls.data_from_csv(ids_file_path, id_column_name) + if urls_file_path: + urls += cls.data_from_csv(urls_file_path, url_column_name) + + if not ids and not urls: + raise Exception("Either 'ids' or 'urls' must be provided for the video-transcription command") + + if urls: + ids += sum([cls.video_id_from_url(url) for url in urls], []) + + # Remove duplicated + ids = list(set(ids)) + youtube.videos_transcriptions(ids, language_code, output_dir) + output_dir_path = Path(output_dir) + saved_transcriptions = [ + str(output_dir_path / f"{v_id}.{language_code}.vtt") + for v_id in ids + if (output_dir_path / f"{v_id}.{language_code}.vtt").is_file() + ] + return "\n".join(saved_transcriptions) From e40301c9676c448fd67db7ffe89f6f21e3af4ae5 Mon Sep 17 00:00:00 2001 From: aninhasalesp Date: Fri, 21 Nov 2025 12:33:27 -0300 Subject: [PATCH 18/18] Make lint --- youtool/commands/video_transcription.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtool/commands/video_transcription.py b/youtool/commands/video_transcription.py index fdcebdf..aeec362 100644 --- a/youtool/commands/video_transcription.py +++ b/youtool/commands/video_transcription.py @@ -18,7 +18,12 @@ class VideoTranscription(Command): "help": "Channels urls csv file path", "mutually_exclusive_group": "input_source", }, - {"name": "--ids-file-path", "type": Path, "help": "Channel IDs CSV file path", "mutually_exclusive_group": "input_source"}, + { + "name": "--ids-file-path", + "type": Path, + "help": "Channel IDs CSV file path", + "mutually_exclusive_group": "input_source", + }, {"name": "--output-dir", "type": Path, "help": "Output directory to save transcriptions", "required": True}, {"name": "--language-code", "type": str, "help": "Language code for transcription", "required": True}, {"name": "--url_column_name", "type": str, "help": "URL column name on CSV input files"},