From fce72f230601ae978f803eb3f5dc7ad06bab6331 Mon Sep 17 00:00:00 2001 From: chmod77 Date: Tue, 25 Mar 2025 23:26:36 +0300 Subject: [PATCH 01/20] chore: docstring cleanup Signed-off-by: chmod77 Signed-off-by: Peeter Piegaze <1153481+ppiegaze@users.noreply.github.com> --- flytekit/__init__.py | 232 +++++++++---------------- flytekit/bin/entrypoint.py | 152 +++++++++++----- flytekit/clients/auth_helper.py | 15 +- flytekit/clients/friendly.py | 110 +++++------- flytekit/clients/raw.py | 68 +++----- flytekit/configuration/__init__.py | 161 ++++++++--------- flytekit/configuration/file.py | 5 +- flytekit/core/annotation.py | 3 +- flytekit/core/array_node_map_task.py | 7 +- flytekit/core/base_sql_task.py | 6 +- flytekit/core/base_task.py | 36 ++-- flytekit/core/checkpointer.py | 9 +- flytekit/core/condition.py | 11 +- flytekit/core/context_manager.py | 38 ++-- flytekit/core/dynamic_workflow_task.py | 6 +- flytekit/core/launch_plan.py | 12 +- flytekit/core/legacy_map_task.py | 3 +- flytekit/core/notification.py | 10 +- flytekit/core/python_function_task.py | 7 +- flytekit/core/resources.py | 8 +- flytekit/core/schedule.py | 6 +- flytekit/core/task.py | 39 +++-- flytekit/core/testing.py | 3 +- flytekit/tools/module_loader.py | 2 +- 24 files changed, 459 insertions(+), 490 deletions(-) diff --git a/flytekit/__init__.py b/flytekit/__init__.py index 74a50aff7f..a6eb11091e 100644 --- a/flytekit/__init__.py +++ b/flytekit/__init__.py @@ -1,209 +1,133 @@ """ -===================== Core Flytekit -===================== - -.. currentmodule:: flytekit +------------ This package contains all of the most common abstractions you'll need to write Flyte workflows and extend Flytekit. Basic Authoring -=============== +-------------- These are the essentials needed to get started writing tasks and workflows. -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ - - task - workflow - kwtypes - current_context - ExecutionParameters - FlyteContext - map_task - ~core.workflow.ImperativeWorkflow - ~core.node_creation.create_node - ~core.promise.NodeOutput - FlyteContextManager - -.. important:: - - Tasks and Workflows can both be locally run, assuming the relevant tasks are capable of local execution. - This is useful for unit testing. +- task +- workflow +- kwtypes +- current_context +- ExecutionParameters +- FlyteContext +- map_task +- ImperativeWorkflow +- create_node +- NodeOutput +- FlyteContextManager + +> [!NOTE] +> **Local Execution** +> +> Tasks and Workflows can both be locally run, assuming the relevant tasks are capable of local execution. +> This is useful for unit testing. Branching and Conditionals -========================== +------------------------- Branches and conditionals can be expressed explicitly in Flyte. These conditions are evaluated -in the flyte engine and hence should be used for control flow. ``dynamic workflows`` can be used to perform custom conditional logic not supported by flytekit - -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ +in the flyte engine and hence should be used for control flow. "dynamic workflows" can be used to perform custom conditional logic not supported by flytekit. - conditional +- conditional Customizing Tasks & Workflows -============================== +---------------------------- -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ - - TaskMetadata - Wrapper object that allows users to specify Task - Resources - Things like CPUs/Memory, etc. - WorkflowFailurePolicy - Customizes what happens when a workflow fails. - PodTemplate - Custom PodTemplate for a task. +- TaskMetadata - Wrapper object that allows users to specify Task +- Resources - Things like CPUs/Memory, etc. +- WorkflowFailurePolicy - Customizes what happens when a workflow fails. +- PodTemplate - Custom PodTemplate for a task. Dynamic and Nested Workflows -============================== -See the :py:mod:`Dynamic ` module for more information. - -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ +--------------------------- +See the Dynamic module for more information. - dynamic +- dynamic Signaling -========= +-------- -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ - - approve - sleep - wait_for_input +- approve +- sleep +- wait_for_input Scheduling -============================ - -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ +---------- - CronSchedule - FixedRate +- CronSchedule +- FixedRate Notifications -============================ +----------- -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ - - Email - PagerDuty - Slack +- Email +- PagerDuty +- Slack Reference Entities -==================== - -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ +----------------- - get_reference_entity - LaunchPlanReference - TaskReference - WorkflowReference - reference_task - reference_workflow - reference_launch_plan +- get_reference_entity +- LaunchPlanReference +- TaskReference +- WorkflowReference +- reference_task +- reference_workflow +- reference_launch_plan Core Task Types -================= +------------- -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ - - SQLTask - ContainerTask - PythonFunctionTask - PythonInstanceTask - LaunchPlan +- SQLTask +- ContainerTask +- PythonFunctionTask +- PythonInstanceTask +- LaunchPlan Secrets and SecurityContext -============================ - -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ +-------------------------- - Secret - SecurityContext +- Secret +- SecurityContext Common Flyte IDL Objects -========================= - -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ - - AuthRole - Labels - Annotations - WorkflowExecutionPhase - Blob - BlobMetadata - Literal - Scalar - LiteralType - BlobType +----------------------- + +- AuthRole +- Labels +- Annotations +- WorkflowExecutionPhase +- Blob +- BlobMetadata +- Literal +- Scalar +- LiteralType +- BlobType Task Utilities -============== +------------ -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ - - HashMethod - Cache - CachePolicy - VersionParameters +- HashMethod Artifacts -========= - -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ +-------- - Artifact +- Artifact Documentation -============= - -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ - - Description - Documentation - SourceCode +----------- +- Description +- Documentation +- SourceCode """ import os diff --git a/flytekit/bin/entrypoint.py b/flytekit/bin/entrypoint.py index 3011544dec..51ddb7ed2b 100644 --- a/flytekit/bin/entrypoint.py +++ b/flytekit/bin/entrypoint.py @@ -44,7 +44,10 @@ from flytekit.deck.deck import _output_deck from flytekit.exceptions.base import FlyteException from flytekit.exceptions.system import FlyteNonRecoverableSystemException -from flytekit.exceptions.user import FlyteRecoverableException, FlyteUserRuntimeException +from flytekit.exceptions.user import ( + FlyteRecoverableException, + FlyteUserRuntimeException, +) from flytekit.interfaces.stats.taggable import get_stats as _get_stats from flytekit.loggers import logger, user_space_logger from flytekit.models import dynamic_job as _dynamic_job @@ -52,7 +55,9 @@ from flytekit.models.core import errors as _error_models from flytekit.models.core import execution as _execution_models from flytekit.models.core import identifier as _identifier -from flytekit.tools.fast_registration import download_distribution as _download_distribution +from flytekit.tools.fast_registration import ( + download_distribution as _download_distribution, +) from flytekit.tools.module_loader import load_object_from_module from flytekit.utils.pbhash import compute_hash_string @@ -74,7 +79,9 @@ def _compute_array_job_index(): if os.environ.get("BATCH_JOB_ARRAY_INDEX_OFFSET"): offset = int(os.environ.get("BATCH_JOB_ARRAY_INDEX_OFFSET")) if os.environ.get("BATCH_JOB_ARRAY_INDEX_VAR_NAME"): - return offset + int(os.environ.get(os.environ.get("BATCH_JOB_ARRAY_INDEX_VAR_NAME"))) + return offset + int( + os.environ.get(os.environ.get("BATCH_JOB_ARRAY_INDEX_VAR_NAME")) + ) return offset @@ -84,15 +91,16 @@ def _build_error_file_name() -> str: For distributed tasks, all workers upload error files which must not overwrite each other, leading to a race condition. A uuid is included to prevent this. - Returns - ------- - str - Name of the error file. + :rtype: str + :return: Name of the error file. + """ dist_error_strategy = get_one_of("FLYTE_INTERNAL_DIST_ERROR_STRATEGY", "_F_DES") if not dist_error_strategy: return _constants.ERROR_FILE_NAME - error_file_name_base, error_file_name_extension = os.path.splitext(_constants.ERROR_FILE_NAME) + error_file_name_base, error_file_name_extension = os.path.splitext( + _constants.ERROR_FILE_NAME + ) error_file_name_base += f"-{uuid.uuid4().hex}" return f"{error_file_name_base}{error_file_name_extension}" @@ -102,10 +110,7 @@ def _get_worker_name() -> str: For distributed tasks, the backend plugin can set a worker name to be used for error reporting. - Returns - ------- - str - Name of the worker + :return: Name of the worker """ dist_error_strategy = get_one_of("FLYTE_INTERNAL_DIST_ERROR_STRATEGY", "_F_DES") if not dist_error_strategy: @@ -139,11 +144,17 @@ def _dispatch_execute( ): """ Dispatches execute to PythonTask + Step1: Download inputs and load into a literal map + Step2: Invoke task - dispatch_execute + Step3: + a: [Optional] Record outputs to output_prefix + b: OR if IgnoreOutputs is raised, then ignore uploading outputs + c: OR if an unhandled exception is retrieved - record it as an errors.pb :param ctx: FlyteContext @@ -170,7 +181,9 @@ def _dispatch_execute( # Step1 local_inputs_file = os.path.join(ctx.execution_state.working_dir, "inputs.pb") ctx.file_access.get_data(inputs_path, local_inputs_file) - input_proto = utils.load_proto_from_file(_literals_pb2.LiteralMap, local_inputs_file) + input_proto = utils.load_proto_from_file( + _literals_pb2.LiteralMap, local_inputs_file + ) idl_input_literals = _literal_models.LiteralMap.from_flyte_idl(input_proto) # Step2 @@ -180,7 +193,9 @@ def _dispatch_execute( # Step3a if isinstance(outputs, VoidPromise): logger.warning("Task produces no outputs") - output_file_dict = {_constants.OUTPUT_FILE_NAME: _literal_models.LiteralMap(literals={})} + output_file_dict = { + _constants.OUTPUT_FILE_NAME: _literal_models.LiteralMap(literals={}) + } elif isinstance(outputs, _literal_models.LiteralMap): # The keys in this map hold the filenames to the offloaded proto literals. offloaded_literals: Dict[str, _literal_models.Literal] = {} @@ -190,8 +205,12 @@ def _dispatch_execute( min_offloaded_size = -1 max_offloaded_size = -1 if offloading_enabled: - min_offloaded_size = int(os.environ.get("_F_L_MIN_SIZE_MB", "10")) * 1024 * 1024 - max_offloaded_size = int(os.environ.get("_F_L_MAX_SIZE_MB", "1000")) * 1024 * 1024 + min_offloaded_size = ( + int(os.environ.get("_F_L_MIN_SIZE_MB", "10")) * 1024 * 1024 + ) + max_offloaded_size = ( + int(os.environ.get("_F_L_MAX_SIZE_MB", "1000")) * 1024 * 1024 + ) # Go over each output and create a separate offloaded in case its size is too large for k, v in outputs.literals.items(): @@ -207,7 +226,9 @@ def _dispatch_execute( ) if min_offloaded_size != -1 and lit.ByteSize() >= min_offloaded_size: - logger.debug(f"Literal {k} is too large to be inlined, offloading to metadata bucket") + logger.debug( + f"Literal {k} is too large to be inlined, offloading to metadata bucket" + ) inferred_type = task_def.interface.outputs[k].type # In the case of map tasks we need to use the type of the collection as inferred type as the task @@ -234,7 +255,10 @@ def _dispatch_execute( offloaded_literals[offloaded_filename] = v outputs = _literal_models.LiteralMap(literals=literal_map_copy) - output_file_dict = {_constants.OUTPUT_FILE_NAME: outputs, **offloaded_literals} + output_file_dict = { + _constants.OUTPUT_FILE_NAME: outputs, + **offloaded_literals, + } elif isinstance(outputs, _dynamic_job.DynamicJobSpec): output_file_dict = {_constants.FUTURES_FILE_NAME: outputs} else: @@ -254,7 +278,9 @@ def _dispatch_execute( except FlyteUserRuntimeException as e: # Step3b if isinstance(e.value, IgnoreOutputs): - logger.warning(f"User-scoped IgnoreOutputs received! Outputs.pb will not be uploaded. reason {e}!!") + logger.warning( + f"User-scoped IgnoreOutputs received! Outputs.pb will not be uploaded. reason {e}!!" + ) return # Step3c @@ -275,7 +301,9 @@ def _dispatch_execute( ) ) if task_def is not None: - logger.error(f"Exception when executing task {task_def.name or task_def.id.name}, reason {str(e)}") + logger.error( + f"Exception when executing task {task_def.name or task_def.id.name}, reason {str(e)}" + ) else: logger.error(f"Exception when loading_task, reason {str(e)}") logger.error("!! Begin User Error Captured by Flyte !!") @@ -318,13 +346,22 @@ def _dispatch_execute( logger.error("!! End Error Captured by Flyte !!") for k, v in output_file_dict.items(): - utils.write_proto_to_file(v.to_flyte_idl(), os.path.join(ctx.execution_state.engine_dir, k)) + utils.write_proto_to_file( + v.to_flyte_idl(), os.path.join(ctx.execution_state.engine_dir, k) + ) - ctx.file_access.put_data(ctx.execution_state.engine_dir, output_prefix, is_multipart=True) - logger.info(f"Engine folder written successfully to the output prefix {output_prefix}") + ctx.file_access.put_data( + ctx.execution_state.engine_dir, output_prefix, is_multipart=True + ) + logger.info( + f"Engine folder written successfully to the output prefix {output_prefix}" + ) if task_def is not None and not getattr(task_def, "disable_deck", True): - _output_deck(task_name=task_def.name.split(".")[-1], new_user_params=ctx.user_space_params) + _output_deck( + task_name=task_def.name.split(".")[-1], + new_user_params=ctx.user_space_params, + ) logger.debug("Finished _dispatch_execute") @@ -348,7 +385,11 @@ def get_traceback_str(e: Exception) -> str: root_exception = e.__cause__ if e.__cause__ else e indentation = " " exception_str = textwrap.indent( - text="".join(traceback.format_exception(type(root_exception), root_exception, root_exception.__traceback__)), + text="".join( + traceback.format_exception( + type(root_exception), root_exception, root_exception.__traceback__ + ) + ), prefix=indentation, ) # Second, format a summary exception message @@ -365,15 +406,8 @@ def get_container_error_timestamp(e: Optional[Exception] = None) -> Timestamp: If a flyte exception is passed, use its timestamp, otherwise, use the current time. - Parameters - ---------- - e : Exception, optional - Exception that has occurred. - - Returns - ------- - Timestamp - Timestamp to be reported in ContainerError + :param e: Exception that has occurred. Optional. + :return: Timestamp to be reported in ContainerError """ timestamp = None if isinstance(e, FlyteException): @@ -441,8 +475,12 @@ def setup_execution( checkpointer = None if checkpoint_path is not None: - checkpointer = SyncCheckpoint(checkpoint_dest=checkpoint_path, checkpoint_src=prev_checkpoint) - logger.debug(f"Checkpointer created with source {prev_checkpoint} and dest {checkpoint_path}") + checkpointer = SyncCheckpoint( + checkpoint_dest=checkpoint_path, checkpoint_src=prev_checkpoint + ) + logger.debug( + f"Checkpointer created with source {prev_checkpoint} and dest {checkpoint_path}" + ) execution_parameters = ExecutionParameters( execution_id=_identifier.WorkflowExecutionIdentifier( @@ -470,7 +508,9 @@ def setup_execution( raw_output_prefix=raw_output_data_prefix, output_metadata_prefix=output_metadata_prefix, checkpoint=checkpointer, - task_id=_identifier.Identifier(_identifier.ResourceType.TASK, tk_project, tk_domain, tk_name, tk_version), + task_id=_identifier.Identifier( + _identifier.ResourceType.TASK, tk_project, tk_domain, tk_name, tk_version + ), ) metadata = { @@ -487,7 +527,9 @@ def setup_execution( execution_metadata=metadata, ) except TypeError: # would be thrown from DataPersistencePlugins.find_plugin - logger.error(f"No data plugin found for raw output prefix {raw_output_data_prefix}") + logger.error( + f"No data plugin found for raw output prefix {raw_output_data_prefix}" + ) raise ctx = ctx.new_builder().with_file_access(file_access).build() @@ -619,7 +661,12 @@ def _execute_map_task( raise ValueError(f"Resolver args cannot be <1, got {resolver_args}") with setup_execution( - raw_output_data_prefix, output_prefix, checkpoint_path, prev_checkpoint, dynamic_addl_distro, dynamic_dest_dir + raw_output_data_prefix, + output_prefix, + checkpoint_path, + prev_checkpoint, + dynamic_addl_distro, + dynamic_dest_dir, ) as ctx: working_dir = os.getcwd() if all(os.path.realpath(path) != working_dir for path in sys.path): @@ -628,7 +675,9 @@ def _execute_map_task( mtr = load_object_from_module(resolver)() def load_task(): - return mtr.load_task(loader_args=resolver_args, max_concurrency=max_concurrency) + return mtr.load_task( + loader_args=resolver_args, max_concurrency=max_concurrency + ) # Special case for the map task resolver, we need to append the task index to the output prefix. # TODO: (https://github.com/flyteorg/flyte/issues/5011) Remove legacy map task @@ -647,7 +696,9 @@ def load_task(): def normalize_inputs( - raw_output_data_prefix: Optional[str], checkpoint_path: Optional[str], prev_checkpoint: Optional[str] + raw_output_data_prefix: Optional[str], + checkpoint_path: Optional[str], + prev_checkpoint: Optional[str], ): # Backwards compatibility - if Propeller hasn't filled this in, then it'll come through here as the original # template string, so let's explicitly set it to None so that the downstream functions will know to fall back @@ -656,7 +707,11 @@ def normalize_inputs( raw_output_data_prefix = None if checkpoint_path == "{{.checkpointOutputPrefix}}": checkpoint_path = None - if prev_checkpoint == "{{.prevCheckpointPrefix}}" or prev_checkpoint == "" or prev_checkpoint == '""': + if ( + prev_checkpoint == "{{.prevCheckpointPrefix}}" + or prev_checkpoint == "" + or prev_checkpoint == '""' + ): prev_checkpoint = None return raw_output_data_prefix, checkpoint_path, prev_checkpoint @@ -725,7 +780,9 @@ def execute_task_cmd( @click.option("--additional-distribution", required=False) @click.option("--dest-dir", required=False) @click.argument("task-execute-cmd", nargs=-1, type=click.UNPROCESSED) -def fast_execute_task_cmd(additional_distribution: str, dest_dir: str, task_execute_cmd: List[str]): +def fast_execute_task_cmd( + additional_distribution: str, dest_dir: str, task_execute_cmd: List[str] +): """ Downloads a compressed code distribution specified by additional-distribution and then calls the underlying task execute command for the updated code. @@ -739,7 +796,14 @@ def fast_execute_task_cmd(additional_distribution: str, dest_dir: str, task_exec cmd = [] for arg in task_execute_cmd: if arg == "--resolver": - cmd.extend(["--dynamic-addl-distro", additional_distribution, "--dynamic-dest-dir", dest_dir]) + cmd.extend( + [ + "--dynamic-addl-distro", + additional_distribution, + "--dynamic-dest-dir", + dest_dir, + ] + ) cmd.append(arg) # Use the commandline to run the task execute command rather than calling it directly in python code diff --git a/flytekit/clients/auth_helper.py b/flytekit/clients/auth_helper.py index 317a16c5f4..151a6d84ba 100644 --- a/flytekit/clients/auth_helper.py +++ b/flytekit/clients/auth_helper.py @@ -118,6 +118,7 @@ def upgrade_channel_to_proxy_authenticated(cfg: PlatformConfig, in_channel: grpc """ If activated in the platform config, given a grpc.Channel, preferably a secure channel, it returns a composed channel that uses Interceptor to perform authentication with a proxy in front of Flyte + :param cfg: PlatformConfig :param in_channel: grpc.Channel Precreated channel :return: grpc.Channel. New composite channel @@ -179,19 +180,23 @@ def get_channel(cfg: PlatformConfig, **kwargs) -> grpc.Channel: Creates a new grpc.Channel given a platformConfig. It is possible to pass additional options to the underlying channel. Examples for various options are as below - .. code-block:: python + ```python get_channel(cfg=PlatformConfig(...)) + ``` - .. code-block:: python + ```python :caption: Additional options to insecure / secure channel. Example `options` and `compression` refer to grpc guide get_channel(cfg=PlatformConfig(...), options=..., compression=...) - .. code-block:: python - :caption: Create secure channel with custom `grpc.ssl_channel_credentials` + ``` - get_channel(cfg=PlatformConfig(insecure=False,...), credentials=...) + + :caption: Create secure channel with custom `grpc.ssl_channel_credentials` + ```python + get_channel(cfg=PlatformConfig(insecure=False,...), credentials=...) + ``` :param cfg: PlatformConfig diff --git a/flytekit/clients/friendly.py b/flytekit/clients/friendly.py index f52b9afe36..fe5c4ad750 100644 --- a/flytekit/clients/friendly.py +++ b/flytekit/clients/friendly.py @@ -39,10 +39,13 @@ class SynchronousFlyteClient(_RawSynchronousFlyteClient): This is a low-level client that users can use to make direct gRPC service calls to the control plane. See the :std:doc:`service spec `. This is more user-friendly interface than the :py:class:`raw client ` so users should try to use this class - first. Create a client by :: + first. Create a client by + + ```python SynchronousFlyteClient("your.domain:port", insecure=True) # insecure should be True if your flyteadmin deployment doesn't have SSL enabled + ``` """ @@ -65,9 +68,8 @@ def create_task(self, task_identifer, task_spec): This will create a task definition in the Admin database. Once successful, the task object can be retrieved via the client or viewed via the UI or command-line interfaces. - .. note :: - - Overwrites are not supported so any request for a given project, domain, name, and version that exists in + > [!NOTE] + > Overwrites are not supported so any request for a given project, domain, name, and version that exists in the database must match the existing definition exactly. Furthermore, as long as the request remains identical, calling this method multiple times will result in success. @@ -88,14 +90,12 @@ def list_task_ids_paginated(self, project, domain, limit=100, token=None, sort_b This returns a page of identifiers for the tasks for a given project and domain. Filters can also be specified. - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param Text project: The namespace of the project to list. @@ -129,14 +129,12 @@ def list_tasks_paginated(self, identifier, limit=100, token=None, filters=None, This returns a page of task metadata for tasks in a given project and domain. Optionally, specifying a name will limit the results to only tasks with that name in the given project and domain. - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param flytekit.models.common.NamedEntityIdentifier identifier: NamedEntityIdentifier to list. @@ -193,9 +191,8 @@ def create_workflow(self, workflow_identifier, workflow_spec): This will create a workflow definition in the Admin database. Once successful, the workflow object can be retrieved via the client or viewed via the UI or command-line interfaces. - .. note :: - - Overwrites are not supported so any request for a given project, domain, name, and version that exists in + > [!NOTE] + > Overwrites are not supported so any request for a given project, domain, name, and version that exists in the database must match the existing definition exactly. Furthermore, as long as the request remains identical, calling this method multiple times will result in success. @@ -218,14 +215,12 @@ def list_workflow_ids_paginated(self, project, domain, limit=100, token=None, so This returns a page of identifiers for the workflows for a given project and domain. Filters can also be specified. - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param: Text project: The namespace of the project to list. @@ -259,14 +254,12 @@ def list_workflows_paginated(self, identifier, limit=100, token=None, filters=No This returns a page of workflow meta-information for workflows in a given project and domain. Optionally, specifying a name will limit the results to only workflows with that name in the given project and domain. - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param flytekit.models.common.NamedEntityIdentifier identifier: NamedEntityIdentifier to list. @@ -323,9 +316,8 @@ def create_launch_plan(self, launch_plan_identifer, launch_plan_spec): This will create a launch plan definition in the Admin database. Once successful, the launch plan object can be retrieved via the client or viewed via the UI or command-line interfaces. - .. note :: - - Overwrites are not supported so any request for a given project, domain, name, and version that exists in + > [!NOTE] + > Overwrites are not supported so any request for a given project, domain, name, and version that exists in the database must match the existing definition exactly. This also means that as long as the request remains identical, calling this method multiple times will result in success. @@ -375,14 +367,12 @@ def list_launch_plan_ids_paginated(self, project, domain, limit=100, token=None, This returns a page of identifiers for the launch plans for a given project and domain. Filters can also be specified. - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param: Text project: The namespace of the project to list. @@ -416,14 +406,12 @@ def list_launch_plans_paginated(self, identifier, limit=100, token=None, filters This returns a page of launch plan meta-information for launch plans in a given project and domain. Optionally, specifying a name will limit the results to only workflows with that name in the given project and domain. - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param flytekit.models.common.NamedEntityIdentifier identifier: NamedEntityIdentifier to list. @@ -463,14 +451,12 @@ def list_active_launch_plans_paginated( This returns a page of currently active launch plan meta-information for launch plans in a given project and domain. - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param Text project: @@ -611,14 +597,12 @@ def list_executions_paginated(self, project, domain, limit=100, token=None, filt """ This returns a page of executions in a given project and domain. - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param Text project: Project in which to list executions. @@ -873,14 +857,12 @@ def list_projects_paginated(self, limit=100, token=None, filters=None, sort_by=N """ This returns a page of projects. - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param int limit: [Optional] The maximum number of entries to return. Must be greater than 0. The maximum diff --git a/flytekit/clients/raw.py b/flytekit/clients/raw.py index 0ffbc4ce58..4d9a7031a1 100644 --- a/flytekit/clients/raw.py +++ b/flytekit/clients/raw.py @@ -28,11 +28,12 @@ class RawSynchronousFlyteClient(object): This client should be usable regardless of environment in which this is used. In other words, configurations should be explicit as opposed to inferred from the environment or a configuration file. To create a client, - .. code-block:: python + ```python from flytekit.configuration import PlatformConfig RawSynchronousFlyteClient(PlatformConfig(endpoint="a.b.com", insecure=True)) # or SynchronousFlyteClient(PlatformConfig(endpoint="a.b.com", insecure=True)) + ``` """ _dataproxy_stub: DataProxyServiceStub @@ -89,9 +90,8 @@ def create_task(self, task_create_request): This will create a task definition in the Admin database. Once successful, the task object can be retrieved via the client or viewed via the UI or command-line interfaces. - .. note :: - - Overwrites are not supported so any request for a given project, domain, name, and version that exists in + > [!NOTE] + > Overwrites are not supported so any request for a given project, domain, name, and version that exists in the database must match the existing definition exactly. This also means that as long as the request remains identical, calling this method multiple times will result in success. @@ -109,18 +109,15 @@ def list_task_ids_paginated(self, identifier_list_request): This returns a page of identifiers for the tasks for a given project and domain. Filters can also be specified. - .. note :: - - The name field in the TaskListRequest is ignored. + > [!NOTE] + > The name field in the TaskListRequest is ignored. - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param: flyteidl.admin.common_pb2.NamedEntityIdentifierListRequest identifier_list_request: @@ -134,14 +131,12 @@ def list_tasks_paginated(self, resource_list_request): This returns a page of task metadata for tasks in a given project and domain. Optionally, specifying a name will limit the results to only tasks with that name in the given project and domain. - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param: flyteidl.admin.common_pb2.ResourceListRequest resource_list_request: @@ -183,9 +178,8 @@ def create_workflow(self, workflow_create_request): This will create a workflow definition in the Admin database. Once successful, the workflow object can be retrieved via the client or viewed via the UI or command-line interfaces. - .. note :: - - Overwrites are not supported so any request for a given project, domain, name, and version that exists in + > [!NOTE] + > Overwrites are not supported so any request for a given project, domain, name, and version that exists in the database must match the existing definition exactly. This also means that as long as the request remains identical, calling this method multiple times will result in success. @@ -203,18 +197,15 @@ def list_workflow_ids_paginated(self, identifier_list_request): This returns a page of identifiers for the workflows for a given project and domain. Filters can also be specified. - .. note :: + > [!NOTE] + > The name field in the WorkflowListRequest is ignored. - The name field in the WorkflowListRequest is ignored. - - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param: flyteidl.admin.common_pb2.NamedEntityIdentifierListRequest identifier_list_request: @@ -228,14 +219,12 @@ def list_workflows_paginated(self, resource_list_request): This returns a page of workflow meta-information for workflows in a given project and domain. Optionally, specifying a name will limit the results to only workflows with that name in the given project and domain. - .. note :: - - This is a paginated API. Use the token field in the request to specify a page offset token. + > [!NOTE] + > This is a paginated API. Use the token field in the request to specify a page offset token. The user of the API is responsible for providing this token. - .. note :: - - If entries are added to the database between requests for different pages, it is possible to receive + > [!NOTE] + > If entries are added to the database between requests for different pages, it is possible to receive entries on the second page that also appeared on the first. :param: flyteidl.admin.common_pb2.ResourceListRequest resource_list_request: @@ -265,9 +254,8 @@ def create_launch_plan(self, launch_plan_create_request): This will create a launch plan definition in the Admin database. Once successful, the launch plan object can be retrieved via the client or viewed via the UI or command-line interfaces. - .. note :: - - Overwrites are not supported so any request for a given project, domain, name, and version that exists in + > [!NOTE] + > Overwrites are not supported so any request for a given project, domain, name, and version that exists in the database must match the existing definition exactly. This also means that as long as the request remains identical, calling this method multiple times will result in success. diff --git a/flytekit/configuration/__init__.py b/flytekit/configuration/__init__.py index bee6feb8be..66209589c5 100644 --- a/flytekit/configuration/__init__.py +++ b/flytekit/configuration/__init__.py @@ -1,68 +1,76 @@ """ -===================== -Configuration -===================== +# Configuration -.. currentmodule:: flytekit.configuration - -Flytekit Configuration Sources -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +## Flytekit Configuration Sources There are multiple ways to configure flytekit settings: -**Command Line Arguments**: This is the recommended way of setting configuration values for many cases. -For example, see `pyflyte package `_ command. +### Command Line Arguments +This is the recommended way of setting configuration values for many cases. For example, see `pyflyte package` command. + +### Python Config Object +A `Config` object can be used directly, e.g. when initializing a `FlyteRemote` object. See the Control Plane design docs for examples on how to specify a `Config` object. -**Python Config Object**: A :py:class:`~flytekit.configuration.Config` object can by used directly, e.g. when -initializing a :py:class:`~flytefit.remote.remote.FlyteRemote` object. See :doc:`here ` for examples on -how to specify a ``Config`` object. +### Environment Variables +Users can specify these at compile time, but when your task is run, Flyte Propeller will also set configuration to ensure correct interaction with the platform. The environment variables must be specified with the format `FLYTE_{SECTION}_{OPTION}`, all in upper case. For example, to specify the `PlatformConfig.endpoint` setting, the environment variable would be `FLYTE_PLATFORM_URL`. -**Environment Variables**: Users can specify these at compile time, but when your task is run, Flyte Propeller will -also set configuration to ensure correct interaction with the platform. The environment variables must be specified -with the format ``FLYTE_{SECTION}_{OPTION}``, all in upper case. For example, to specify the -:py:class:`PlatformConfig.endpoint ` setting, the environment variable would -be ``FLYTE_PLATFORM_URL``. +> [!NOTE] +> Environment variables won't work for image configuration, which need to be specified with the `pyflyte package --image ...` option or in a configuration file. -.. note:: +### YAML Format Configuration File +A configuration file that contains settings for both `flytectl` and `flytekit`. This is the recommended configuration file format. Invoke the `flytectl config init` command to create a boilerplate `~/.flyte/config.yaml` file, and `flytectl --help` to learn about all of the configuration yaml options. - Environment variables won't work for image configuration, which need to be specified with the - `pyflyte package --image ... `_ option or in a configuration - file. +Example `config.yaml` file: +```yaml +# Sample config file -**YAML Format Configuration File**: A configuration file that contains settings for both -`flytectl `__ and ``flytekit``. This is the recommended configuration -file format. Invoke the :ref:`flytectl config init ` command to create a boilerplate -``~/.flyte/config.yaml`` file, and ``flytectl --help`` to learn about all of the configuration yaml options. +admin: + # For GRPC endpoints you might want to use dns:///flyte.myexample.com + endpoint: dns:///localhost:8089 + authType: Pkce + insecure: true -.. dropdown:: See example ``config.yaml`` file - :animate: fade-in-slide-down +logger: + show-source: true + level: 0 - .. literalinclude:: ../../tests/flytekit/unit/configuration/configs/sample.yaml - :language: yaml - :caption: config.yaml +console: + endpoint: http://localhost:8080 + insecure: true -**INI Format Configuration File**: A configuration file for ``flytekit``. By default, ``flytekit`` will look for a -file in two places: +# This section is used only in the control plane to trigger a remote execution +storage: + type: minio + stow: + kind: s3 + config: + auth_type: accesskey + access_key_id: minio + secret_key: miniostorage + endpoint: http://localhost:9000 + region: us-east-1 + disable_ssl: true + addressing_style: "path" -1. First, a file named ``flytekit.config`` in the Python interpreter's working directory. -2. A file in ``~/.flyte/config`` in the home directory as detected by Python. -.. dropdown:: See example ``flytekit.config`` file - :animate: fade-in-slide-down +### INI Format Configuration File +A configuration file for `flytekit`. By default, `flytekit` will look for a file in two places: - .. literalinclude:: ../../tests/flytekit/unit/configuration/configs/images.config - :language: ini - :caption: flytekit.config +1. First, a file named `flytekit.config` in the Python interpreter's working directory. +2. A file in `~/.flyte/config` in the home directory as detected by Python. -.. warning:: +Example `flytekit.config` file: +```ini +[sdk] +workflow_packages=my_cool_workflows, other_workflows +``` - The INI format configuration is considered a legacy configuration format. We recommend using the yaml format - instead if you're using a configuration file. +> [!WARNING] +> The INI format configuration is considered a legacy configuration format. We recommend using the yaml format instead if you're using a configuration file. -How is configuration used? -^^^^^^^^^^^^^^^^^^^^^^^^^^ +## How is configuration used? -Configuration usage can roughly be bucketed into the following areas, +Configuration usage can roughly be bucketed into the following areas: - **Compile-time settings**: these are settings like the default image and named images, where to look for Flyte code, etc. - **Platform settings**: Where to find the Flyte backend (Admin DNS, whether to use SSL) @@ -70,60 +78,31 @@ - **Data access settings**: Is there a custom S3 endpoint in use? Backoff/retry behavior for accessing S3/GCS, key and password, etc. - **Other settings** - Statsd configuration, which is a run-time applicable setting but is not necessarily relevant to the Flyte platform. -Configuration Objects ---------------------- - -The following objects are encapsulated in a parent object called ``Config``. - -.. autosummary:: - :template: custom.rst - :toctree: generated/ - :nosignatures: - - ~Config - -.. _configuration-compile-time-settings: - -Serialization Time Settings -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -These are serialization/compile-time settings that are used when using commands like -`pyflyte package `_ or `pyflyte register `_. These -configuration settings are typically passed in as flags to the above CLI commands. +## Configuration Objects -The image configurations are typically either passed in via an `--image `_ flag, -or can be specified in the ``yaml`` or ``ini`` configuration files (see examples above). +The following objects are encapsulated in a parent object called `Config`: -.. autosummary:: - :template: custom.rst - :toctree: generated/ - :nosignatures: +### Serialization Time Settings - ~Image - ~ImageConfig - ~SerializationSettings - ~FastSerializationSettings +These are serialization/compile-time settings that are used when using commands like `pyflyte package` or `pyflyte register`. These configuration settings are typically passed in as flags to the above CLI commands. -.. _configuration-execution-time-settings: +The image configurations are typically either passed in via an `--image` flag, or can be specified in the `yaml` or `ini` configuration files (see examples above). -Execution Time Settings -^^^^^^^^^^^^^^^^^^^^^^^ +- **Image**: Represents a container image with optional configuration overrides. +- **ImageConfig**: Represents an image configuration for a given project/domain combination. +- **SerializationSettings**: Controls how to serialize Flyte entities when registering with Admin. +- **FastSerializationSettings**: Configuration for faster serialization settings. -Users typically shouldn't be concerned with these configurations, as they are typically set by FlytePropeller or -FlyteAdmin. The configurations below are useful for authenticating to a Flyte backend, configuring data access -credentials, secrets, and statsd metrics. +### Execution Time Settings -.. autosummary:: - :template: custom.rst - :toctree: generated/ - :nosignatures: +Users typically shouldn't be concerned with these configurations, as they are typically set by FlytePropeller or FlyteAdmin. The configurations below are useful for authenticating to a Flyte backend, configuring data access credentials, secrets, and statsd metrics. - ~PlatformConfig - ~StatsConfig - ~SecretsConfig - ~S3Config - ~GCSConfig - ~DataConfig +- **PlatformConfig**: Configuration for how to connect to the Flyte platform. +- **StatsConfig**: Configuration for how to emit statsd metrics. +- **SecretsConfig**: Configuration for how to access secrets. +- **S3Config**: Amazon S3 specific configuration. +- **GCSConfig**: Google Cloud Storage specific configuration. +- **DataConfig**: Configuration for data access. """ diff --git a/flytekit/configuration/file.py b/flytekit/configuration/file.py index 521bc72f61..f36e285ded 100644 --- a/flytekit/configuration/file.py +++ b/flytekit/configuration/file.py @@ -285,9 +285,8 @@ def set_if_exists(d: dict, k: str, v: typing.Any) -> dict: Given a dict ``d`` sets the key ``k`` with value of config ``v``, if the config value ``v`` is set and return the updated dictionary. - .. note:: - - The input dictionary ``d`` will be mutated. + > [!NOTE] + > The input dictionary ``d`` will be mutated. """ if _exists(v): d[k] = v diff --git a/flytekit/core/annotation.py b/flytekit/core/annotation.py index b4a70a6469..769855f614 100644 --- a/flytekit/core/annotation.py +++ b/flytekit/core/annotation.py @@ -14,11 +14,12 @@ class FlyteAnnotation: For a task definition: - .. code-block:: python + ```python @task def x(a: typing.Annotated[int, FlyteAnnotation({"foo": {"bar": 1}})]): return + ``` """ diff --git a/flytekit/core/array_node_map_task.py b/flytekit/core/array_node_map_task.py index 96b062ded0..5f3e8b9588 100644 --- a/flytekit/core/array_node_map_task.py +++ b/flytekit/core/array_node_map_task.py @@ -426,9 +426,9 @@ def array_node_map_task( ): """Map task that uses the ``ArrayNode`` construct.. - .. important:: + > [!IMPORTANT] - This is an experimental drop-in replacement for :py:func:`~flytekit.map_task`. + > This is an experimental drop-in replacement for `~flytekit.map_task`. :param task_function: This argument is implicitly passed and represents the repeatable function :param concurrency: If specified, this limits the number of mapped tasks than can run in parallel to the given batch @@ -454,7 +454,7 @@ class ArrayNodeMapTaskResolver(tracker.TrackedInstance, TaskResolverMixin): But in cases in which `j` is bound to a fixed value by using `functools.partial` we need a way to ensure that the interface is not simply interpolated, but only the unbound inputs are interpolated. - .. code-block:: python + ```python def foo((i: int, j: str) -> str: ... @@ -462,6 +462,7 @@ def foo((i: int, j: str) -> str: mt = map_task(functools.partial(foo, j=10)) print(mt.interface) + ``` output: diff --git a/flytekit/core/base_sql_task.py b/flytekit/core/base_sql_task.py index 500e19c260..c3d839b2ed 100644 --- a/flytekit/core/base_sql_task.py +++ b/flytekit/core/base_sql_task.py @@ -9,8 +9,8 @@ class SQLTask(PythonTask[T]): """ - Base task types for all SQL tasks. See :py:class:`flytekit.extras.sqlite3.task.SQLite3Task` - and :py:class:`flytekitplugins.athena.task.AthenaTask` for examples of how to use it as a base class. + Base task types for all SQL tasks. See `flytekit.extras.sqlite3.task.SQLite3Task` + and `flytekitplugins.athena.task.AthenaTask` for examples of how to use it as a base class. .. autoclass:: flytekit.extras.sqlite3.task.SQLite3Task :noindex: @@ -31,7 +31,7 @@ def __init__( ): """ This SQLTask should mostly just be used as a base class for other SQL task types and should not be used - directly. See :py:class:`flytekit.extras.sqlite3.task.SQLite3Task` + directly. See `flytekit.extras.sqlite3.task.SQLite3Task` """ super().__init__( task_type=task_type, diff --git a/flytekit/core/base_task.py b/flytekit/core/base_task.py index 41da032fee..ed6c2af22b 100644 --- a/flytekit/core/base_task.py +++ b/flytekit/core/base_task.py @@ -1,20 +1,24 @@ """ -============================== -:mod:`flytekit.core.base_task` -============================== - -.. currentmodule:: flytekit.core.base_task - -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ - - kwtypes - PythonTask - Task - TaskResolverMixin - IgnoreOutputs +# flytekit.core.base_task + +This module provides the core task-related functionality in Flytekit. + +## Core Components + +### kwtypes +Utility for creating keyword type annotations for tasks. + +### PythonTask +Base class for Python-based task implementations. + +### Task +The base class for all Flyte tasks. + +### TaskResolverMixin +Mixin class that helps resolve a task implementation. + +### IgnoreOutputs +Exception that can be raised to ignore task outputs. """ diff --git a/flytekit/core/checkpointer.py b/flytekit/core/checkpointer.py index d0fdf129e4..5662774831 100644 --- a/flytekit/core/checkpointer.py +++ b/flytekit/core/checkpointer.py @@ -21,10 +21,10 @@ def restore(self, path: typing.Union[Path, str]) -> typing.Optional[Path]: Given a path, if a previous checkpoint exists, will be downloaded to this path. If download is successful the downloaded path is returned - .. note: + > [!NOTE] - Download will not be performed, if the checkpoint was previously restored. The method will return the - previously downloaded path. + > Download will not be performed, if the checkpoint was previously restored. The method will return the + previously downloaded path. """ raise NotImplementedError("Use one of the derived classes") @@ -37,10 +37,11 @@ def save(self, cp: typing.Union[Path, str, io.BufferedReader]): Usage: If you have a io.BufferedReader then the following should work - .. code-block: python + ```python with input_file.open(mode="rb") as b: checkpointer.save(b) + ``` """ raise NotImplementedError("Use one of the derived classes") diff --git a/flytekit/core/condition.py b/flytekit/core/condition.py index 50403574c1..4233868ccf 100644 --- a/flytekit/core/condition.py +++ b/flytekit/core/condition.py @@ -38,16 +38,16 @@ class ConditionalSection: for Compilation mode. It is advised to derive the class and re-implement the `start_branch` and `end_branch` methods to override the compilation behavior - .. note:: + > [!NOTE] - Conditions can only be used within a workflow context. + > Conditions can only be used within a workflow context. Usage: - .. code-block:: python + ```python v = conditional("fractions").if_((my_input > 0.1) & (my_input < 1.0)).then(...)... - + ``` """ def __init__(self, name: str): @@ -488,7 +488,7 @@ def conditional(name: str) -> ConditionalSection: Example of a condition usage. Note the nesting and the assignment to a LHS variable - .. code-block:: python + ```python v = ( conditional("fractions") @@ -507,6 +507,7 @@ def conditional(name: str) -> ConditionalSection: .else_() .then(double(n=my_input)) ) + ``` """ ctx = FlyteContextManager.current_context() diff --git a/flytekit/core/context_manager.py b/flytekit/core/context_manager.py index 6378f42706..a1ae6b2f5c 100644 --- a/flytekit/core/context_manager.py +++ b/flytekit/core/context_manager.py @@ -68,17 +68,18 @@ class ExecutionParameters(object): """ This is a run-time user-centric context object that is accessible to every @task method. It can be accessed using - .. code-block:: python + + ```python + flytekit.current_context() + ``` - flytekit.current_context() - - This object provides the following + This object provides the following objections * a statsd handler * a logging handler - * the execution ID as an :py:class:`flytekit.models.core.identifier.WorkflowExecutionIdentifier` object + * the execution ID as an `flytekit.models.core.identifier.WorkflowExecutionIdentifier` object * a working directory for the user to write arbitrary files to - Please do not confuse this object with the :py:class:`flytekit.FlyteContext` object. + Please do not confuse this object with the `flytekit.FlyteContext` object. """ @dataclass(init=False) @@ -254,9 +255,9 @@ def execution_date(self) -> datetime: This is a datetime representing the time at which a workflow was started. This is consistent across all tasks executed in a workflow or sub-workflow. - .. note:: + > [!NOTE] - Do NOT use this execution_date to drive any production logic. It might be useful as a tag for data to help + > Do NOT use this execution_date to drive any production logic. It might be useful as a tag for data to help in debugging. """ return self._execution_date @@ -267,9 +268,9 @@ def execution_id(self) -> _identifier.WorkflowExecutionIdentifier: This is the identifier of the workflow execution within the underlying engine. It will be consistent across all task executions in a workflow or sub-workflow execution. - .. note:: + > [!NOTE] - Do NOT use this execution_id to drive any production logic. This execution ID should only be used as a tag + > Do NOT use this execution_id to drive any production logic. This execution ID should only be used as a tag on output data to link back to the workflow run that created it. """ return self._execution_id @@ -468,8 +469,8 @@ class CompilationState(object): prefix (str): This is because we may one day want to be able to have subworkflows inside other workflows. If users choose to not specify their node names, then we can end up with multiple "n0"s. This prefix allows us to give those nested nodes a distinct name, as well as properly identify them in the workflow. - mode (int): refer to :py:class:`flytekit.extend.ExecutionState.Mode` - task_resolver (Optional[TaskResolverMixin]): Please see :py:class:`flytekit.extend.TaskResolverMixin` + mode (int): refer to `flytekit.extend.ExecutionState.Mode` + task_resolver (Optional[TaskResolverMixin]): Please see `flytekit.extend.TaskResolverMixin` nodes (Optional[List]): Stores currently compiled nodes so far. """ @@ -688,9 +689,9 @@ class FlyteContext(object): compile workflows, serialize Flyte entities, etc. Even though this object as a ``current_context`` function on it, it should not be called directly. Please use the - :py:class:`flytekit.FlyteContextManager` object instead. + `flytekit.FlyteContextManager` object instead. - Please do not confuse this object with the :py:class:`flytekit.ExecutionParameters` object. + Please do not confuse this object with the `flytekit.ExecutionParameters` object. """ file_access: FileAccessProvider @@ -796,18 +797,20 @@ def get_deck(self) -> typing.Union[str, "IPython.core.display.HTML"]: # type:ig The return value depends on the execution environment. In a notebook, the return value is compatible with IPython.display and should be rendered in the notebook. - .. code-block:: python + ```python with flytekit.new_context() as ctx: my_task(...) ctx.get_deck() + ``` OR if you wish to explicitly display - .. code-block:: python + ```python from IPython import display display(ctx.get_deck()) + ``` """ from flytekit.deck.deck import _get_deck @@ -918,7 +921,7 @@ class FlyteContextManager(object): Typical usage is - .. code-block:: python + ```python FlyteContextManager.initialize() with FlyteContextManager.with_context(o) as ctx: @@ -928,6 +931,7 @@ class FlyteContextManager(object): FlyteContextManager.push_context() # but correspondingly a pop_context should be called FlyteContextManager.pop_context() + ``` """ signal_handlers: typing.List[typing.Callable[[int, FrameType], typing.Any]] = [] diff --git a/flytekit/core/dynamic_workflow_task.py b/flytekit/core/dynamic_workflow_task.py index a9ff5055db..5ce9e41d4b 100644 --- a/flytekit/core/dynamic_workflow_task.py +++ b/flytekit/core/dynamic_workflow_task.py @@ -31,7 +31,7 @@ The resulting workflow is passed back to the Flyte engine and is run as a :std:ref:`subworkflow `. Simple usage -.. code-block:: +```python @dynamic def my_dynamic_subwf(a: int) -> (typing.List[str], int): @@ -39,16 +39,18 @@ def my_dynamic_subwf(a: int) -> (typing.List[str], int): for i in range(a): s.append(t1(a=i)) return s, 5 +``` Note in the code block that we call the Python ``range`` operator on the input. This is typically not allowed in a workflow but it is here. You can even express dependencies between tasks. -.. code-block:: +```python @dynamic def my_dynamic_subwf(a: int, b: int) -> int: x = t1(a=a) return t2(b=b, x=x) +``` See the :std:ref:`cookbook ` for a longer discussion. """ # noqa: W293 diff --git a/flytekit/core/launch_plan.py b/flytekit/core/launch_plan.py index 05ba393dd4..5d973fde3a 100644 --- a/flytekit/core/launch_plan.py +++ b/flytekit/core/launch_plan.py @@ -25,18 +25,18 @@ class LaunchPlan(object): Every workflow is registered with a default launch plan, which is just a launch plan with none of the additional attributes set - no default values, fixed values, schedules, etc. Assuming you have the following workflow - .. code-block:: python + ```python @workflow def wf(a: int, c: str) -> str: ... - + ``` Create the default launch plan with - .. code-block:: python + ```python LaunchPlan.get_or_create(workflow=my_wf) - + ``` If you specify additional parameters, you'll also have to give the launch plan a unique name. Default and fixed inputs can be expressed as Python native values like so: @@ -59,10 +59,10 @@ def wf(a: int, c: str) -> str: :language: python :dedent: 4 - .. code-block:: python + ```python from flytekit.models.common import Annotations, AuthRole, Labels, RawOutputDataConfig - + ``` Then use as follows .. literalinclude:: ../../../tests/flytekit/unit/core/test_launch_plan.py diff --git a/flytekit/core/legacy_map_task.py b/flytekit/core/legacy_map_task.py index 82f5fb1da7..80ab891e04 100644 --- a/flytekit/core/legacy_map_task.py +++ b/flytekit/core/legacy_map_task.py @@ -367,7 +367,7 @@ class MapTaskResolver(TrackedInstance, TaskResolverMixin): But in cases in which `j` is bound to a fixed value by using `functools.partial` we need a way to ensure that the interface is not simply interpolated, but only the unbound inputs are interpolated. - .. code-block:: python + ```python def foo((i: int, j: str) -> str: ... @@ -375,6 +375,7 @@ def foo((i: int, j: str) -> str: mt = map_task(functools.partial(foo, j=10)) print(mt.interface) + ``` output: diff --git a/flytekit/core/notification.py b/flytekit/core/notification.py index c964c67568..a6f5095cce 100644 --- a/flytekit/core/notification.py +++ b/flytekit/core/notification.py @@ -60,11 +60,12 @@ class PagerDuty(Notification): """ This notification should be used when sending emails to the PagerDuty service. - .. code-block:: python + ```python from flytekit.models.core.execution import WorkflowExecutionPhase PagerDuty(phases=[WorkflowExecutionPhase.SUCCEEDED], recipients_email=["my-team@email.com"]) + ``` """ def __init__(self, phases: List[int], recipients_email: List[str]): @@ -72,6 +73,7 @@ def __init__(self, phases: List[int], recipients_email: List[str]): :param list[int] phases: A required list of phases for which to fire the event. Events can only be fired for terminal phases. Phases should be as defined in: flytekit.models.core.execution.WorkflowExecutionPhase :param list[str] recipients_email: A required non-empty list of recipients for the notification. + """ super(PagerDuty, self).__init__(phases, pager_duty=_common_model.PagerDutyNotification(recipients_email)) @@ -80,11 +82,12 @@ class Email(Notification): """ This notification should be used when sending regular emails to people. - .. code-block:: python + ```python from flytekit.models.core.execution import WorkflowExecutionPhase Email(phases=[WorkflowExecutionPhase.SUCCEEDED], recipients_email=["my-team@email.com"]) + ``` """ def __init__(self, phases: List[int], recipients_email: List[str]): @@ -100,11 +103,12 @@ class Slack(Notification): """ This notification should be used when sending emails to the Slack. - .. code-block:: python + ```python from flytekit.models.core.execution import WorkflowExecutionPhase Slack(phases=[WorkflowExecutionPhase.SUCCEEDED], recipients_email=["my-team@email.com"]) + ``` """ def __init__(self, phases: List[int], recipients_email: List[str]): diff --git a/flytekit/core/python_function_task.py b/flytekit/core/python_function_task.py index 48d29e6625..f6932a82a7 100644 --- a/flytekit/core/python_function_task.py +++ b/flytekit/core/python_function_task.py @@ -81,12 +81,13 @@ class PythonInstanceTask(PythonAutoContainerTask[T], ABC): # type: ignore a platform defined execute method. (Execute needs to be overridden). This base class ensures that the module loader will invoke the right class automatically, by capturing the module name and variable in the module name. - .. code-block: python + ```python x = MyInstanceTask(name="x", .....) # this can be invoked as x(a=5) # depending on the interface of the defined task + ``` """ @@ -112,12 +113,12 @@ class PythonFunctionTask(PythonAutoContainerTask[T]): # type: ignore It is advised this task is used using the @task decorator as follows - .. code-block: python + ```python @task def my_func(a: int) -> str: ... - + ``` In the above code, the name of the function, the module, and the interface (inputs = int and outputs = str) will be auto detected. """ diff --git a/flytekit/core/resources.py b/flytekit/core/resources.py index 27e781e7ad..44d4f6fbd0 100644 --- a/flytekit/core/resources.py +++ b/flytekit/core/resources.py @@ -18,7 +18,7 @@ class Resources(DataClassJSONMixin): """ This class is used to specify both resource requests and resource limits. - .. code-block:: python + ```python Resources(cpu="1", mem="2048") # This is 1 CPU and 2 KB of memory Resources(cpu="100m", mem="2Gi") # This is 1/10th of a CPU and 2 gigabytes of memory @@ -27,16 +27,16 @@ class Resources(DataClassJSONMixin): # For Kubernetes-based tasks, pods use ephemeral local storage for scratch space, caching, and for logs. # This allocates 1Gi of such local storage. Resources(ephemeral_storage="1Gi") - + ``` When used together with `@task(resources=)`, you a specific the request and limits with one object. When the value is set to a tuple or list, the first value is the request and the second value is the limit. If the value is a single value, then both the requests and limit is set to that value. For example, the `Resource(cpu=("1", "2"), mem=1024)` will set the cpu request to 1, cpu limit to 2, mem limit and request to 1024. - .. note:: + > [!NOTE] - Persistent storage is not currently supported on the Flyte backend. + > Persistent storage is not currently supported on the Flyte backend. Please see the :std:ref:`User Guide ` for detailed examples. Also refer to the `K8s conventions. `__ diff --git a/flytekit/core/schedule.py b/flytekit/core/schedule.py index 891fb17a24..ac42eeedbc 100644 --- a/flytekit/core/schedule.py +++ b/flytekit/core/schedule.py @@ -26,11 +26,12 @@ class CronSchedule(_schedule_models.Schedule): This uses standard `cron format `__ in case where you are using default native scheduler using the schedule attribute. - .. code-block:: + ``` CronSchedule( schedule="*/1 * * * *", # Following schedule runs every min ) + ``` See the :std:ref:`User Guide ` for further examples. """ @@ -160,11 +161,12 @@ class FixedRate(_schedule_models.Schedule): """ Use this class to schedule a fixed-rate interval for a launch plan. - .. code-block:: python + ```python from datetime import timedelta FixedRate(duration=timedelta(minutes=10)) + ``` See the :std:ref:`fixed rate intervals` chapter in the cookbook for additional usage examples. """ diff --git a/flytekit/core/task.py b/flytekit/core/task.py index 25c12c4b4a..e13bbd602c 100644 --- a/flytekit/core/task.py +++ b/flytekit/core/task.py @@ -34,12 +34,12 @@ class TaskPlugins(object): Every task that the user wishes to use should be available in this factory. Usage - .. code-block:: python + ```python TaskPlugins.register_pythontask_plugin(config_object_type, plugin_object_type) # config_object_type is any class that will be passed to the plugin_object as task_config # Plugin_object_type is a derivative of ``PythonFunctionTask`` - + ``` Examples of available task plugins include different query-based plugins such as :py:class:`flytekitplugins.athena.task.AthenaTask` and :py:class:`flytekitplugins.hive.task.HiveTask`, kubeflow operators like :py:class:`plugins.kfpytorch.flytekitplugins.kfpytorch.task.PyTorchFunctionTask` and @@ -61,11 +61,12 @@ def register_pythontask_plugin(cls, plugin_config_type: type, plugin: Type[Pytho """ Use this method to register a new plugin into Flytekit. Usage :: - .. code-block:: python + ```python TaskPlugins.register_pythontask_plugin(config_object_type, plugin_object_type) # config_object_type is any class that will be passed to the plugin_object as task_config # Plugin_object_type is a derivative of ``PythonFunctionTask`` + ``` """ if plugin_config_type in cls._PYTHONFUNCTION_TASK_PLUGINS: found = cls._PYTHONFUNCTION_TASK_PLUGINS[plugin_config_type] @@ -231,20 +232,21 @@ def task( For a simple python task, - .. code-block:: python + ```python @task def my_task(x: int, y: typing.Dict[str, str]) -> str: ... + ``` For specific task types - .. code-block:: python + ```python @task(task_config=Spark(), retries=3) def my_task(x: int, y: typing.Dict[str, str]) -> str: ... - + ``` Please see some cookbook :std:ref:`task examples ` for additional information. :param _task_function: This argument is implicitly passed and represents the decorated function @@ -281,7 +283,7 @@ def my_task(x: int, y: typing.Dict[str, str]) -> str: bloat because of various dependencies and a dependency is only required for this or a set of tasks, and they vary from the default. - .. code-block:: python + ```python # Use default image name `fqn` and alter the tag to `tag-{{default.tag}}` tag of the default image # with a prefix. In this case, it is assumed that the image like @@ -295,6 +297,7 @@ def foo(): @task(container_image='{{.images.xyz.fqn}}:{{images.default.tag}}') def foo2(): ... + ``` :param environment: Environment variables that should be added for this tasks execution :param requests: Specify compute resource requests for your task. For Pod-plugin tasks, these values will apply only to the primary container. @@ -322,7 +325,7 @@ def foo2(): For example this is useful to run launchplans dynamically, because launchplans must be registered on flyteadmin before they can be run. Tasks and workflows do not have this requirement. - .. code-block:: python + ```python @workflow def workflow0(): @@ -336,6 +339,7 @@ def workflow0(): def launch_dynamically(): # To run a sub-launchplan it must have previously been registered on flyteadmin. return [launchplan0]*10 + ``` :param task_resolver: Provide a custom task resolver. :param disable_deck: (deprecated) If true, this task will not output deck html file :param enable_deck: If true, this task will output deck html file @@ -578,7 +582,7 @@ def eager( For example: - .. code-block:: python + ```python from flytekit import task, eager @@ -601,22 +605,22 @@ async def eager_workflow(x: int) -> int: result = asyncio.run(eager_workflow(x=1)) print(f"Result: {result}") # "Result: 4" - + ``` Unlike :py:func:`dynamic workflows `, eager workflows are not compiled into a workflow spec, but uses python's `async `__ capabilities to execute flyte entities. - .. note:: + > [!NOTE] - Eager workflows only support `@task`, `@workflow`, and `@eager` entities. Conditionals are not supported, use a + > Eager workflows only support `@task`, `@workflow`, and `@eager` entities. Conditionals are not supported, use a plain Python if statement instead. - .. important:: + > [!IMPORTANT] - A ``client_secret_group`` and ``client_secret_key`` is needed for authenticating via + > A ``client_secret_group`` and ``client_secret_key`` is needed for authenticating via :py:class:`~flytekit.remote.remote.FlyteRemote` using the ``client_credentials`` authentication, which is configured via :py:class:`~flytekit.configuration.PlatformConfig`. - .. code-block:: python + ```python from flytekit.remote import FlyteRemote from flytekit.configuration import Config @@ -629,18 +633,19 @@ async def eager_workflow(x: int) -> int: async def eager_workflow(x: int) -> int: out = await add_one(x) return await double(one) - + ``` Where ``config.yaml`` contains is a flytectl-compatible config file. For more details, see `here `__. When using a sandbox cluster started with ``flytectl demo start``, however, the ``client_secret_group`` and ``client_secret_key`` are not needed, : - .. code-block:: python + ```python @eager async def eager_workflow(x: int) -> int: ... + ``` """ if _fn is None: diff --git a/flytekit/core/testing.py b/flytekit/core/testing.py index 4eabfaddd6..3ac703563f 100644 --- a/flytekit/core/testing.py +++ b/flytekit/core/testing.py @@ -20,7 +20,7 @@ def task_mock(t: PythonTask) -> typing.Generator[MagicMock, None, None]: Usage: - .. code-block:: python + ```python @task def t1(i: int) -> int: @@ -30,6 +30,7 @@ def t1(i: int) -> int: m.side_effect = lambda x: x t1(10) # The mock is valid only within this context + ``` """ if not isinstance(t, PythonTask) and not isinstance(t, WorkflowBase) and not isinstance(t, ReferenceEntity): diff --git a/flytekit/tools/module_loader.py b/flytekit/tools/module_loader.py index 977a194fbd..c16faa548b 100644 --- a/flytekit/tools/module_loader.py +++ b/flytekit/tools/module_loader.py @@ -43,7 +43,7 @@ def just_load_modules(pkgs: List[str]): def load_object_from_module(object_location: str) -> Any: """ - # TODO: Handle corner cases, like where the first part is [] maybe + TODO: Handle corner cases, like where the first part is [] maybe """ class_obj = object_location.split(".") class_obj_mod = class_obj[:-1] # e.g. ['flytekit', 'core', 'python_auto_container'] From 55320f9c2eff91755bfde927eced8a8f5d9b3f17 Mon Sep 17 00:00:00 2001 From: chmod77 Date: Wed, 26 Mar 2025 06:35:03 +0300 Subject: [PATCH 02/20] chore: docstring cleanup Signed-off-by: chmod77 Signed-off-by: Peeter Piegaze <1153481+ppiegaze@users.noreply.github.com> --- flytekit/__init__.py | 11 +-- flytekit/clients/auth_helper.py | 3 +- flytekit/configuration/__init__.py | 3 +- flytekit/core/base_task.py | 3 +- flytekit/core/checkpointer.py | 1 - flytekit/core/condition.py | 3 +- flytekit/core/context_manager.py | 2 - flytekit/core/notification.py | 5 +- flytekit/core/resources.py | 1 - flytekit/core/shim_task.py | 7 +- flytekit/core/task.py | 7 +- flytekit/core/tracker.py | 6 +- flytekit/core/type_engine.py | 11 +-- flytekit/deck/deck.py | 3 +- flytekit/exceptions/eager.py | 3 +- flytekit/extras/accelerators.py | 12 ++-- flytekit/extras/sqlite3/task.py | 5 +- flytekit/extras/tasks/shell.py | 8 +-- flytekit/lazy_import/lazy_module.py | 3 +- flytekit/remote/__init__.py | 108 ++++++++++++---------------- flytekit/remote/backfill.py | 8 +-- flytekit/remote/remote.py | 5 +- flytekit/types/directory/types.py | 8 +-- flytekit/types/file/file.py | 3 +- 24 files changed, 105 insertions(+), 124 deletions(-) diff --git a/flytekit/__init__.py b/flytekit/__init__.py index a6eb11091e..eda6fcdc1b 100644 --- a/flytekit/__init__.py +++ b/flytekit/__init__.py @@ -200,9 +200,10 @@ def current_context() -> ExecutionParameters: Usage - .. code-block:: python + ```python flytekit.current_context().logging.info(...) + ``` Available params are documented in :py:class:`flytekit.core.context_manager.ExecutionParams`. There are some special params, that should be available @@ -223,7 +224,7 @@ def load_implicit_plugins(): entrypoint specification to their setup.py. The following example shows how we can autoload a module called fsspec (whose init files contains the necessary plugin registration step) - .. code-block:: + ``` # note the group is always ``flytekit.plugins`` setup( @@ -231,17 +232,17 @@ def load_implicit_plugins(): entry_points={'flytekit.plugins': 'fsspec=flytekitplugins.fsspec'}, ... ) - + ``` This works as long as the fsspec module has - .. code-block:: + ``` # For data persistence plugins DataPersistencePlugins.register_plugin(f"{k}://", FSSpecPersistence, force=True) # OR for type plugins TypeEngine.register(PanderaTransformer()) # etc - + ``` """ discovered_plugins = entry_points(group="flytekit.plugins") for p in discovered_plugins: diff --git a/flytekit/clients/auth_helper.py b/flytekit/clients/auth_helper.py index 151a6d84ba..213edd92e6 100644 --- a/flytekit/clients/auth_helper.py +++ b/flytekit/clients/auth_helper.py @@ -238,7 +238,8 @@ def wrap_exceptions_channel(cfg: PlatformConfig, in_channel: grpc.Channel) -> gr Wraps the input channel with RetryExceptionWrapperInterceptor. This wrapper will cover all exceptions and raise Exception from the Family flytekit.exceptions - .. note:: This channel should be usually the outermost channel. This channel will raise a FlyteException + > [!NOTE] + > This channel should be usually the outermost channel. This channel will raise a FlyteException :param cfg: PlatformConfig :param in_channel: grpc.Channel diff --git a/flytekit/configuration/__init__.py b/flytekit/configuration/__init__.py index 66209589c5..dc27c8f434 100644 --- a/flytekit/configuration/__init__.py +++ b/flytekit/configuration/__init__.py @@ -337,7 +337,7 @@ def from_images(cls, default_image: str, m: typing.Optional[typing.Dict[str, str Allows you to programmatically create an ImageConfig. Usually only the default_image is required, unless your workflow uses multiple images - .. code:: python + ```python ImageConfig.from_dict( "ghcr.io/flyteorg/flytecookbook:v1.0.0", @@ -346,6 +346,7 @@ def from_images(cls, default_image: str, m: typing.Optional[typing.Dict[str, str "other": "...", } ) + ``` :return: """ diff --git a/flytekit/core/base_task.py b/flytekit/core/base_task.py index ed6c2af22b..e83cf33847 100644 --- a/flytekit/core/base_task.py +++ b/flytekit/core/base_task.py @@ -101,9 +101,10 @@ def kwtypes(**kwargs) -> OrderedDict[str, Type]: """ This is a small helper function to convert the keyword arguments to an OrderedDict of types. - .. code-block:: python + ```python kwtypes(a=int, b=str) + ``` """ d = collections.OrderedDict() for k, v in kwargs.items(): diff --git a/flytekit/core/checkpointer.py b/flytekit/core/checkpointer.py index 5662774831..c43208a5c8 100644 --- a/flytekit/core/checkpointer.py +++ b/flytekit/core/checkpointer.py @@ -22,7 +22,6 @@ def restore(self, path: typing.Union[Path, str]) -> typing.Optional[Path]: If download is successful the downloaded path is returned > [!NOTE] - > Download will not be performed, if the checkpoint was previously restored. The method will return the previously downloaded path. diff --git a/flytekit/core/condition.py b/flytekit/core/condition.py index 4233868ccf..0f754cd6c2 100644 --- a/flytekit/core/condition.py +++ b/flytekit/core/condition.py @@ -39,8 +39,7 @@ class ConditionalSection: to override the compilation behavior > [!NOTE] - - > Conditions can only be used within a workflow context. + > Conditions can only be used within a workflow context. Usage: diff --git a/flytekit/core/context_manager.py b/flytekit/core/context_manager.py index a1ae6b2f5c..86eaa2fe45 100644 --- a/flytekit/core/context_manager.py +++ b/flytekit/core/context_manager.py @@ -256,7 +256,6 @@ def execution_date(self) -> datetime: executed in a workflow or sub-workflow. > [!NOTE] - > Do NOT use this execution_date to drive any production logic. It might be useful as a tag for data to help in debugging. """ @@ -269,7 +268,6 @@ def execution_id(self) -> _identifier.WorkflowExecutionIdentifier: task executions in a workflow or sub-workflow execution. > [!NOTE] - > Do NOT use this execution_id to drive any production logic. This execution ID should only be used as a tag on output data to link back to the workflow run that created it. """ diff --git a/flytekit/core/notification.py b/flytekit/core/notification.py index a6f5095cce..ce31b4b78e 100644 --- a/flytekit/core/notification.py +++ b/flytekit/core/notification.py @@ -2,9 +2,8 @@ Notifications are primarily used when defining Launch Plans (also can be used when launching executions) and will trigger the Flyte platform to send emails when a workflow run reaches certain stages (fails or succeeds, etc.). -.. note:: - - Notifications require some setup and configuration on the Flyte platform side. Please contact your Flyte platform +> [!NOTE] +> Notifications require some setup and configuration on the Flyte platform side. Please contact your Flyte platform admins to get this feature enabled. See :std:ref:`cookbook:setting up workflow notifications` Each notification type takes a list of :py:class:`flytekit.models.core.execution.WorkflowExecutionPhase` and a list of diff --git a/flytekit/core/resources.py b/flytekit/core/resources.py index 44d4f6fbd0..859f3476a1 100644 --- a/flytekit/core/resources.py +++ b/flytekit/core/resources.py @@ -35,7 +35,6 @@ class Resources(DataClassJSONMixin): mem limit and request to 1024. > [!NOTE] - > Persistent storage is not currently supported on the Flyte backend. Please see the :std:ref:`User Guide ` for detailed examples. diff --git a/flytekit/core/shim_task.py b/flytekit/core/shim_task.py index b205bbab08..35e42c858a 100644 --- a/flytekit/core/shim_task.py +++ b/flytekit/core/shim_task.py @@ -25,10 +25,9 @@ class ExecutableTemplateShimTask(object): Basically at execution time (both locally and on a Flyte cluster), the task template is given to the executor, which is responsible for computing and returning the results. - .. note:: - - The interface at execution time will have to derived from the Flyte IDL interface, which means it may be lossy. - This is because when a task is serialized from Python into the ``TaskTemplate`` some information is lost because + > [!NOTE] + > The interface at execution time will have to derived from the Flyte IDL interface, which means it may be lossy. + This is because when a task is serialized from Python into the ``TaskTemplate`` some information is lost because Flyte IDL can't keep track of every single Python type (or Java type if writing in the Java flytekit). This class also implements the ``dispatch_execute`` and ``execute`` functions to make it look like a ``PythonTask`` diff --git a/flytekit/core/task.py b/flytekit/core/task.py index e13bbd602c..31508b7bb8 100644 --- a/flytekit/core/task.py +++ b/flytekit/core/task.py @@ -310,9 +310,8 @@ def foo2(): Refer to :py:class:`Secret` to understand how to specify the request for a secret. It may change based on the backend provider. - .. note:: - - During local execution, the secrets will be pulled from the local environment variables + > [!NOTE] + > During local execution, the secrets will be pulled from the local environment variables with the format `{GROUP}_{GROUP_VERSION}_{KEY}`, where all the characters are capitalized and the prefix is not used. @@ -610,12 +609,10 @@ async def eager_workflow(x: int) -> int: uses python's `async `__ capabilities to execute flyte entities. > [!NOTE] - > Eager workflows only support `@task`, `@workflow`, and `@eager` entities. Conditionals are not supported, use a plain Python if statement instead. > [!IMPORTANT] - > A ``client_secret_group`` and ``client_secret_key`` is needed for authenticating via :py:class:`~flytekit.remote.remote.FlyteRemote` using the ``client_credentials`` authentication, which is configured via :py:class:`~flytekit.configuration.PlatformConfig`. diff --git a/flytekit/core/tracker.py b/flytekit/core/tracker.py index 8ead705cd4..d008d85f78 100644 --- a/flytekit/core/tracker.py +++ b/flytekit/core/tracker.py @@ -178,12 +178,13 @@ def isnested(func: Callable) -> bool: This would essentially be any function with a `..` (defined within a function) e.g. - .. code:: python + ```python def foo(): def foo_inner(): pass pass + ``` In the above example `foo_inner` is the local function or a nested function. """ @@ -194,7 +195,7 @@ def foo_inner(): def is_functools_wrapped_module_level(func: Callable) -> bool: """Returns true if the function is a functools.wraps-updated function that is defined in the module-level scope. - .. code:: python + ```python import functools @@ -221,6 +222,7 @@ def foo_inner(*args, **kwargs): is_functools_wrapped_module_level(foo) # True is_functools_wrapped_module_level(bar) # False + ``` In this case, applying this function to ``foo`` returns true because ``foo`` was defined in the module-level scope. Applying this function to ``bar`` returns false because it's being assigned to ``foo_inner``, which is a diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index c53884eced..a2285b8998 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -475,7 +475,7 @@ class DataclassTransformer(TypeTransformer[object]): Example - .. code-block:: python + ```python @dataclass class Test(DataClassJsonMixin): @@ -485,10 +485,11 @@ class Test(DataClassJsonMixin): from marshmallow_jsonschema import JSONSchema t = Test(a=10,b="e") JSONSchema().dump(t.schema()) + ``` Output will look like - .. code-block:: json + ```python {'$schema': 'http://json-schema.org/draft-07/schema#', 'definitions': {'TestSchema': {'properties': {'a': {'title': 'a', @@ -498,10 +499,10 @@ class Test(DataClassJsonMixin): 'type': 'object', 'additionalProperties': False}}, '$ref': '#/definitions/TestSchema'} + ``` - .. note:: - - The schema support is experimental and is useful for auto-completing in the UI/CLI + > [!NOTE] + > The schema support is experimental and is useful for auto-completing in the UI/CLI """ diff --git a/flytekit/deck/deck.py b/flytekit/deck/deck.py index c8f9fd6644..f9a45f0472 100644 --- a/flytekit/deck/deck.py +++ b/flytekit/deck/deck.py @@ -41,7 +41,7 @@ class Deck: scatter plots or Markdown text. In addition, users can create new decks to render their data with custom renderers. - .. code-block:: python + ```python iris_df = px.data.iris() @@ -61,6 +61,7 @@ def t1() -> str: @task() def t2() -> Annotated[pd.DataFrame, TopFrameRenderer(10)]: return iris_df + ``` """ def __init__(self, name: str, html: Optional[str] = "", auto_add_to_deck: bool = True): diff --git a/flytekit/exceptions/eager.py b/flytekit/exceptions/eager.py index 806c66884d..5b599aee7a 100644 --- a/flytekit/exceptions/eager.py +++ b/flytekit/exceptions/eager.py @@ -4,7 +4,7 @@ class EagerException(Exception): This exception should be used in an :py:func:`@eager ` workflow function to catch exceptions that are raised by tasks or subworkflows. - .. code-block:: python + ```python from flytekit import task from flytekit.exceptions.eager import EagerException @@ -28,4 +28,5 @@ async def eager_workflow(x: int) -> int: # and raised as an EagerException raise return await double(x=out) + ``` """ diff --git a/flytekit/extras/accelerators.py b/flytekit/extras/accelerators.py index 624fd8192a..d2aff6b6d6 100644 --- a/flytekit/extras/accelerators.py +++ b/flytekit/extras/accelerators.py @@ -12,7 +12,7 @@ If you want to use a specific GPU device, you can pass the device name directly to the task decorator, e.g.: -.. code-block:: +```python @task( limits=Resources(gpu="1"), @@ -20,7 +20,7 @@ ) def my_task() -> None: ... - +``` Base Classes ------------ @@ -51,7 +51,7 @@ def my_task() -> None: If using the constants, you can import them directly from the module, e.g.: -.. code-block:: +```python from flytekit.extras.accelerators import T4 @@ -61,10 +61,10 @@ def my_task() -> None: ) def my_task() -> None: ... - +``` if you want to use a fractional GPU, you can use the ``partitioned`` method on the accelerator constant, e.g.: -.. code-block:: +```python from flytekit.extras.accelerators import A100 @@ -74,7 +74,7 @@ def my_task() -> None: ) def my_task() -> None: ... - +``` .. currentmodule:: flytekit.extras.accelerators .. autosummary:: diff --git a/flytekit/extras/sqlite3/task.py b/flytekit/extras/sqlite3/task.py index 51dcaccd61..866898fe8e 100644 --- a/flytekit/extras/sqlite3/task.py +++ b/flytekit/extras/sqlite3/task.py @@ -57,9 +57,8 @@ class SQLite3Task(PythonCustomizedContainerTask[SQLite3Config], SQLTask[SQLite3C """ Run client side SQLite3 queries that optionally return a FlyteSchema object. - .. note:: - - This is a pre-built container task. That is, your user container will not be used at task execution time. + > [!NOTE] + > This is a pre-built container task. That is, your user container will not be used at task execution time. Instead the image defined in this task definition will be used instead. .. literalinclude:: ../../../tests/flytekit/unit/extras/sqlite3/test_task.py diff --git a/flytekit/extras/tasks/shell.py b/flytekit/extras/tasks/shell.py index 32ae33fcc7..7c7a4e34ec 100644 --- a/flytekit/extras/tasks/shell.py +++ b/flytekit/extras/tasks/shell.py @@ -394,12 +394,12 @@ def __init__( template. The template itself will export the desired environment variables, and subsequently execute the desired "raw" script with the specified arguments. - .. note:: - This means that within your workflow, you can dynamically control the env variables, arguments, and even the + > [!NOTE] + > This means that within your workflow, you can dynamically control the env variables, arguments, and even the actual script you want to run. - .. note:: - The downside is that a dynamic workflow will be required. The "raw" script passed in at execution time must + > [!NOTE] + > The downside is that a dynamic workflow will be required. The "raw" script passed in at execution time must be at the specified location. These args are forwarded directly to the parent `ShellTask` constructor as behavior does not diverge diff --git a/flytekit/lazy_import/lazy_module.py b/flytekit/lazy_import/lazy_module.py index 3112682d9d..9bd23da8cf 100644 --- a/flytekit/lazy_import/lazy_module.py +++ b/flytekit/lazy_import/lazy_module.py @@ -30,10 +30,11 @@ def is_imported(module_name): def lazy_module(fullname): """ This function is used to lazily import modules. It is used in the following way: - .. code-block:: python + ```python from flytekit.lazy_import import lazy_module sklearn = lazy_module("sklearn") sklearn.svm.SVC() + ``` :param Text fullname: The full name of the module to import """ if fullname in sys.modules: diff --git a/flytekit/remote/__init__.py b/flytekit/remote/__init__.py index dd92a813f2..c4b30b202c 100644 --- a/flytekit/remote/__init__.py +++ b/flytekit/remote/__init__.py @@ -1,88 +1,70 @@ """ -===================== -Remote Access -===================== +# Remote Access -.. currentmodule:: flytekit.remote +This module provides utilities for performing operations on tasks, workflows, launchplans, and executions. For example, the following code fetches and executes a workflow: -This module provides utilities for performing operations on tasks, workflows, launchplans, and executions, for example, -the following code fetches and executes a workflow: +```python +# create a remote object from flyte config and environment variables +FlyteRemote(config=Config.auto()) +FlyteRemote(config=Config.auto(config_file=....)) +FlyteRemote(config=Config(....)) +``` -.. code-block:: python +# Or if you need to specify a custom cert chain +# (options and compression are also respected keyword arguments) +FlyteRemote(private_key=your_private_key_bytes, root_certificates=..., certificate_chain=...) - # create a remote object from flyte config and environment variables - FlyteRemote(config=Config.auto()) - FlyteRemote(config=Config.auto(config_file=....)) - FlyteRemote(config=Config(....)) +# fetch a workflow from the flyte backend +remote = FlyteRemote(...) +flyte_workflow = remote.fetch_workflow(name="my_workflow", version="v1") - # Or if you need to specify a custom cert chain - # (options and compression are also respected keyword arguments) - FlyteRemote(private_key=your_private_key_bytes, root_certificates=..., certificate_chain=...) +# execute the workflow, wait=True will return the execution object after it's completed +workflow_execution = remote.execute(flyte_workflow, inputs={"a": 1, "b": 10}, wait=True) - # fetch a workflow from the flyte backend - remote = FlyteRemote(...) - flyte_workflow = remote.fetch_workflow(name="my_workflow", version="v1") +# inspect the execution's outputs +print(workflow_execution.outputs) +``` - # execute the workflow, wait=True will return the execution object after it's completed - workflow_execution = remote.execute(flyte_workflow, inputs={"a": 1, "b": 10}, wait=True) +## Entrypoint - # inspect the execution's outputs - print(workflow_execution.outputs) +### FlyteRemote +The main class for interacting with a Flyte backend. -.. _remote-entrypoint: +### Options +Configuration options for the FlyteRemote client. -Entrypoint -========== +## Entities -.. autosummary:: - :template: custom.rst - :toctree: generated/ - :nosignatures: +### FlyteTask +Represents a registered Flyte task. - ~remote.FlyteRemote - ~remote.Options +### FlyteWorkflow +Represents a registered Flyte workflow. -.. _remote-flyte-entities: +### FlyteLaunchPlan +Represents a registered Flyte launch plan. -Entities -======== +## Entity Components -.. autosummary:: - :template: custom.rst - :toctree: generated/ - :nosignatures: +### FlyteNode +Base class for nodes in a Flyte workflow. - ~entities.FlyteTask - ~entities.FlyteWorkflow - ~entities.FlyteLaunchPlan +### FlyteTaskNode +Represents a task node in a Flyte workflow. -.. _remote-flyte-entity-components: +### FlyteWorkflowNode +Represents a subworkflow node in a Flyte workflow. -Entity Components -================= +## Execution Objects -.. autosummary:: - :template: custom.rst - :toctree: generated/ - :nosignatures: +### FlyteWorkflowExecution +Represents an execution of a Flyte workflow. - ~entities.FlyteNode - ~entities.FlyteTaskNode - ~entities.FlyteWorkflowNode +### FlyteTaskExecution +Represents an execution of a Flyte task. -.. _remote-flyte-execution-objects: - -Execution Objects -================= - -.. autosummary:: - :template: custom.rst - :toctree: generated/ - :nosignatures: - - ~executions.FlyteWorkflowExecution - ~executions.FlyteTaskExecution - ~executions.FlyteNodeExecution +### FlyteNodeExecution +Represents an execution of a node within a workflow. """ diff --git a/flytekit/remote/backfill.py b/flytekit/remote/backfill.py index 166f2e4745..639818e9e0 100644 --- a/flytekit/remote/backfill.py +++ b/flytekit/remote/backfill.py @@ -24,22 +24,22 @@ def create_backfill_workflow( the Backfill plan is generated as (start_date - exclusive, end_date inclusive) - .. code-block:: python + ```python :caption: Correct usage for dates example lp = Launchplan.get_or_create(...) start_date = datetime.datetime(2023, 1, 1) end_date = start_date + datetime.timedelta(days=10) wf = create_backfill_workflow(start_date, end_date, for_lp=lp) + ``` - - .. code-block:: python + ```python :caption: Incorrect date example wf = create_backfill_workflow(end_date, start_date, for_lp=lp) # end_date is before start_date # OR wf = create_backfill_workflow(start_date, start_date, for_lp=lp) # start and end date are same - + ``` :param start_date: datetime generate a backfill starting at this datetime (exclusive) :param end_date: datetime generate a backfill ending at this datetime (inclusive) diff --git a/flytekit/remote/remote.py b/flytekit/remote/remote.py index 6c2b065792..4d9cf34cd7 100644 --- a/flytekit/remote/remote.py +++ b/flytekit/remote/remote.py @@ -1714,9 +1714,8 @@ def execute( :param serialization_settings: Optionally provide serialization settings, in case the entity being run needs to first be registered. If not provided, a default will be used. - .. note: - - The ``name`` and ``version`` arguments do not apply to ``FlyteTask``, ``FlyteLaunchPlan``, and + > [!NOTE] + > The ``name`` and ``version`` arguments do not apply to ``FlyteTask``, ``FlyteLaunchPlan``, and ``FlyteWorkflow`` entity inputs. These values are determined by referencing the entity identifier values. """ if entity.python_interface: diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index 699278b0b6..5d75872ee0 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -43,9 +43,8 @@ def noop(): ... class FlyteDirectory(SerializableType, DataClassJsonMixin, os.PathLike, typing.Generic[T]): path: PathType = field(default=None, metadata=config(mm_field=fields.String())) # type: ignore """ - .. warning:: - - This class should not be used on very large datasets, as merely listing the dataset will cause + > [!WARNING] + > This class should not be used on very large datasets, as merely listing the dataset will cause the entire dataset to be downloaded. Listing on S3 and other backend object stores is not consistent and we should not need data to be downloaded to list. @@ -345,7 +344,7 @@ def listdir(cls, directory: FlyteDirectory) -> typing.List[typing.Union[FlyteDir In addition, it will return a list of FlyteFile and FlyteDirectory objects that have ability to lazily download the contents of the file/folder. For example: - .. code-block:: python + ```python entity = FlyteDirectory.listdir(directory) for e in entity: @@ -356,6 +355,7 @@ def listdir(cls, directory: FlyteDirectory) -> typing.List[typing.Union[FlyteDir open(entity[0], "r") # This will download the file to the local disk. open(entity[0], "r") # flytekit will read data from the local disk if you open it again. + ``` """ final_path = directory.path diff --git a/flytekit/types/file/file.py b/flytekit/types/file/file.py index 4ba06c2008..07623d2404 100644 --- a/flytekit/types/file/file.py +++ b/flytekit/types/file/file.py @@ -399,7 +399,7 @@ def open( ): """Returns a streaming File handle - .. code-block:: python + ```python @task def copy_file(ff: FlyteFile) -> FlyteFile: @@ -408,6 +408,7 @@ def copy_file(ff: FlyteFile) -> FlyteFile: with new_file.open("wb") as w: w.write(r.read()) return new_file + ``` :param mode: Open mode. For example: 'r', 'w', 'rb', 'rt', 'wb', etc. :type mode: str From 85de871b24df8ac43cbae0ec5a73aa2cf06b320f Mon Sep 17 00:00:00 2001 From: chmod77 Date: Thu, 27 Mar 2025 20:48:13 +0300 Subject: [PATCH 03/20] chore: cleanup docstrings Signed-off-by: chmod77 Signed-off-by: Peeter Piegaze <1153481+ppiegaze@users.noreply.github.com> --- flytekit/__init__.py | 35 ++------ flytekit/clients/friendly.py | 11 ++- flytekit/core/data_persistence.py | 14 ---- flytekit/core/legacy_map_task.py | 6 +- flytekit/core/local_fsspec.py | 12 --- flytekit/core/python_auto_container.py | 8 +- .../core/python_customized_container_task.py | 8 +- flytekit/core/python_function_task.py | 17 ---- flytekit/core/resources.py | 2 +- flytekit/core/schedule.py | 4 +- flytekit/core/task.py | 4 +- flytekit/deck/__init__.py | 13 +-- flytekit/extend/__init__.py | 27 +----- flytekit/extras/accelerators.py | 82 ++++++------------- flytekit/remote/__init__.py | 63 +++++--------- flytekit/types/directory/__init__.py | 15 ++-- flytekit/types/directory/types.py | 8 +- flytekit/types/error/__init__.py | 13 --- flytekit/types/iterator/__init__.py | 10 --- flytekit/types/pickle/__init__.py | 9 -- flytekit/types/structured/__init__.py | 11 --- .../flytekitplugins/hive/task.py | 2 +- .../flytekitplugins/kfmpi/task.py | 8 +- .../flytekitplugins/kfpytorch/task.py | 8 +- .../flytekitplugins/kftensorflow/task.py | 4 +- .../remote/workflows/basic/basic_workflow.py | 2 +- 26 files changed, 101 insertions(+), 295 deletions(-) diff --git a/flytekit/__init__.py b/flytekit/__init__.py index eda6fcdc1b..3b771f12c2 100644 --- a/flytekit/__init__.py +++ b/flytekit/__init__.py @@ -1,11 +1,8 @@ """ -Core Flytekit ------------- - This package contains all of the most common abstractions you'll need to write Flyte workflows and extend Flytekit. -Basic Authoring --------------- +## Basic Authoring + These are the essentials needed to get started writing tasks and workflows. @@ -28,25 +25,23 @@ > This is useful for unit testing. -Branching and Conditionals -------------------------- +### Branching and Conditionals + Branches and conditionals can be expressed explicitly in Flyte. These conditions are evaluated in the flyte engine and hence should be used for control flow. "dynamic workflows" can be used to perform custom conditional logic not supported by flytekit. -- conditional -Customizing Tasks & Workflows ----------------------------- +### Customizing Tasks & Workflows - TaskMetadata - Wrapper object that allows users to specify Task - Resources - Things like CPUs/Memory, etc. - WorkflowFailurePolicy - Customizes what happens when a workflow fails. - PodTemplate - Custom PodTemplate for a task. -Dynamic and Nested Workflows ---------------------------- +#### Dynamic and Nested Workflows + See the Dynamic module for more information. - dynamic @@ -112,22 +107,6 @@ - LiteralType - BlobType -Task Utilities ------------- - -- HashMethod - -Artifacts --------- - -- Artifact - -Documentation ------------ - -- Description -- Documentation -- SourceCode """ import os diff --git a/flytekit/clients/friendly.py b/flytekit/clients/friendly.py index fe5c4ad750..9d1d46cf6e 100644 --- a/flytekit/clients/friendly.py +++ b/flytekit/clients/friendly.py @@ -42,9 +42,8 @@ class SynchronousFlyteClient(_RawSynchronousFlyteClient): first. Create a client by ```python - - SynchronousFlyteClient("your.domain:port", insecure=True) - # insecure should be True if your flyteadmin deployment doesn't have SSL enabled + SynchronousFlyteClient("your.domain:port", insecure=True) + # insecure should be True if your flyteadmin deployment doesn't have SSL enabled ``` """ @@ -69,9 +68,9 @@ def create_task(self, task_identifer, task_spec): retrieved via the client or viewed via the UI or command-line interfaces. > [!NOTE] - > Overwrites are not supported so any request for a given project, domain, name, and version that exists in - the database must match the existing definition exactly. Furthermore, as long as the request - remains identical, calling this method multiple times will result in success. + > Overwrites are not supported so any request for a given project, domain, name, and version that exists in + the database must match the existing definition exactly. Furthermore, as long as the request + remains identical, calling this method multiple times will result in success. :param flytekit.models.core.identifier.Identifier task_identifer: The identifier for this task. :param flytekit.models.task.TaskSpec task_spec: This is the actual definition of the task that diff --git a/flytekit/core/data_persistence.py b/flytekit/core/data_persistence.py index b7c9deda46..321851210e 100644 --- a/flytekit/core/data_persistence.py +++ b/flytekit/core/data_persistence.py @@ -1,21 +1,7 @@ """ -====================================== -:mod:`flytekit.core.data_persistence` -====================================== - -.. currentmodule:: flytekit.core.data_persistence - The Data persistence module is used by core flytekit and most of the core TypeTransformers to manage data fetch & store, between the durable backend store and the runtime environment. This is designed to be a pluggable system, with a default simple implementation that ships with the core. - -.. autosummary:: - :toctree: generated/ - :template: custom.rst - :nosignatures: - - FileAccessProvider - """ import asyncio diff --git a/flytekit/core/legacy_map_task.py b/flytekit/core/legacy_map_task.py index 80ab891e04..4214a92140 100644 --- a/flytekit/core/legacy_map_task.py +++ b/flytekit/core/legacy_map_task.py @@ -329,9 +329,9 @@ def map_task( There are two plugins to run maptasks that ship as part of flyteplugins: 1. K8s Array - 2. `AWS batch `_ + 2. [`AWS batch`](https://docs.flyte.org/en/latest/deployment/plugin_setup/aws/batch.html) - Enabling a plugin is controlled in the plugin configuration at `values-sandbox.yaml `_. + Enabling a plugin is controlled in the plugin configuration at [`values-sandbox.yaml`](https://github.com/flyteorg/flyte/blob/10cee9f139824512b6c5be1667d321bdbc8835fa/charts/flyte/values-sandbox.yaml#L152-L162). **K8s Array** @@ -339,7 +339,7 @@ def map_task( **AWS batch** - Learn more about ``AWS batch`` setup configuration `here `_. + Learn more about ``AWS batch`` setup configuration [`here`](https://docs.flyte.org/en/latest/deployment/plugin_setup/aws/batch.html#deployment-plugin-setup-aws-array). A custom plugin can also be implemented to handle the task type. diff --git a/flytekit/core/local_fsspec.py b/flytekit/core/local_fsspec.py index 91fe93ad6f..2c517d4c30 100644 --- a/flytekit/core/local_fsspec.py +++ b/flytekit/core/local_fsspec.py @@ -1,17 +1,5 @@ """ -====================================== -:mod:`flytekit.core.local_fsspec` -====================================== -.. currentmodule:: flytekit.core.local_fsspec - - -.. autosummary:: - :toctree: generated/ - :template: custom.rst - :nosignatures: - - FlyteLocalFileSystem """ diff --git a/flytekit/core/python_auto_container.py b/flytekit/core/python_auto_container.py index aa0327299b..9e7fe224d8 100644 --- a/flytekit/core/python_auto_container.py +++ b/flytekit/core/python_auto_container.py @@ -74,10 +74,10 @@ def __init__( to provide secrets and if secrets are available in the configured secrets store. Possible options for secret stores are - - `Vault `__ - - `Confidant `__ - - `Kube secrets `__ - - `AWS Parameter store `__ + - [`Vault`](https://www.vaultproject.io/) + - [`Confidant`](https://lyft.github.io/confidant) + - [`Kube secrets`](https://kubernetes.io/docs/concepts/configuration/secret) + - [`AWS Parameter store`](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html) :param pod_template: Custom PodTemplate for this task. :param pod_template_name: The name of the existing PodTemplate resource which will be used in this task. :param accelerator: The accelerator to use for this task. diff --git a/flytekit/core/python_customized_container_task.py b/flytekit/core/python_customized_container_task.py index ff014bd479..048197c268 100644 --- a/flytekit/core/python_customized_container_task.py +++ b/flytekit/core/python_customized_container_task.py @@ -84,10 +84,10 @@ def __init__( The key values will be available from runtime, if the backend is configured to provide secrets and if secrets are available in the configured secrets store. Possible options for secret stores are - - `Vault `__ - - `Confidant `__ - - `Kube secrets `__ - - `AWS Parameter store `__ + - [`Vault`](https://www.vaultproject.io) + - [`Confidant`](https://lyft.github.io/confidant) + - [`Kube secrets`](https://kubernetes.io/docs/concepts/configuration/secret) + - [`AWS Parameter store`](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html) """ sec_ctx = None if secret_requests: diff --git a/flytekit/core/python_function_task.py b/flytekit/core/python_function_task.py index f6932a82a7..307bd31a81 100644 --- a/flytekit/core/python_function_task.py +++ b/flytekit/core/python_function_task.py @@ -1,20 +1,3 @@ -""" -========================================= -:mod:`flytekit.core.python_function_task` -========================================= - -.. currentmodule:: flytekit.core.python_function_task - -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ - - PythonFunctionTask - PythonInstanceTask - -""" - from __future__ import annotations import inspect diff --git a/flytekit/core/resources.py b/flytekit/core/resources.py index 859f3476a1..e77b2ad795 100644 --- a/flytekit/core/resources.py +++ b/flytekit/core/resources.py @@ -38,7 +38,7 @@ class Resources(DataClassJSONMixin): > Persistent storage is not currently supported on the Flyte backend. Please see the :std:ref:`User Guide ` for detailed examples. - Also refer to the `K8s conventions. `__ + Also refer to the [`K8s conventions.`](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-units-in-kubernetes) """ cpu: Optional[Union[str, int, float, list, tuple]] = None diff --git a/flytekit/core/schedule.py b/flytekit/core/schedule.py index ac42eeedbc..9f1350b916 100644 --- a/flytekit/core/schedule.py +++ b/flytekit/core/schedule.py @@ -23,7 +23,7 @@ def to_flyte_idl(self, *args, **kwargs) -> google_message.Message: ... class CronSchedule(_schedule_models.Schedule): """ Use this when you have a launch plan that you want to run on a cron expression. - This uses standard `cron format `__ + This uses standard [`cron format`](https://docs.flyte.org/en/latest/concepts/schedules.html#cron-expression-table) in case where you are using default native scheduler using the schedule attribute. ``` @@ -69,7 +69,7 @@ def __init__( """ :param str cron_expression: This should be a cron expression in AWS style.Shouldn't be used in case of native scheduler. :param str schedule: This takes a cron alias (see ``_VALID_CRON_ALIASES``) or a croniter parseable schedule. - Only one of this or ``cron_expression`` can be set, not both. This uses standard `cron format `_ + Only one of this or ``cron_expression`` can be set, not both. This uses standard [`cron format`](https://docs.flyte.org/en/latest/concepts/schedules.html#cron-expression) and is supported by native scheduler :param str offset: :param str kickoff_time_input_arg: This is a convenient argument to use when your code needs to know what time diff --git a/flytekit/core/task.py b/flytekit/core/task.py index 31508b7bb8..8837fdc5cb 100644 --- a/flytekit/core/task.py +++ b/flytekit/core/task.py @@ -606,7 +606,7 @@ async def eager_workflow(x: int) -> int: print(f"Result: {result}") # "Result: 4" ``` Unlike :py:func:`dynamic workflows `, eager workflows are not compiled into a workflow spec, but - uses python's `async `__ capabilities to execute flyte entities. + uses python's [`async`](https://docs.python.org/3/library/asyncio.html) capabilities to execute flyte entities. > [!NOTE] > Eager workflows only support `@task`, `@workflow`, and `@eager` entities. Conditionals are not supported, use a @@ -632,7 +632,7 @@ async def eager_workflow(x: int) -> int: return await double(one) ``` Where ``config.yaml`` contains is a flytectl-compatible config file. - For more details, see `here `__. + For more details, see [`here`](https://docs.flyte.org/en/latest/flytectl/overview.html#configuration). When using a sandbox cluster started with ``flytectl demo start``, however, the ``client_secret_group`` and ``client_secret_key`` are not needed, : diff --git a/flytekit/deck/__init__.py b/flytekit/deck/__init__.py index 58da56cf64..953ca7d4a2 100644 --- a/flytekit/deck/__init__.py +++ b/flytekit/deck/__init__.py @@ -1,16 +1,11 @@ """ -========== -Flyte Deck -========== - -.. currentmodule:: flytekit.deck +# Flyte Deck Contains deck renderers provided by flytekit. -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ + +# This module provides the following classes: + Deck TopFrameRenderer diff --git a/flytekit/extend/__init__.py b/flytekit/extend/__init__.py index 73ac51e0ab..0dd96985ce 100644 --- a/flytekit/extend/__init__.py +++ b/flytekit/extend/__init__.py @@ -1,33 +1,10 @@ """ -================== -Extending Flytekit -================== -.. currentmodule:: flytekit.extend +## Extending Flytekit -This package contains things that are useful when extending Flytekit. -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ +This package contains things that are useful when extending Flytekit. - get_serializable - context_manager - IgnoreOutputs - ExecutionState - Image - ImageConfig - Interface - Promise - TaskPlugins - DictTransformer - T - TypeEngine - TypeTransformer - PythonCustomizedContainerTask - ExecutableTemplateShimTask - ShimTaskExecutor """ from flytekit.configuration import Image, ImageConfig, SerializationSettings diff --git a/flytekit/extras/accelerators.py b/flytekit/extras/accelerators.py index d2aff6b6d6..d6eb1ead7c 100644 --- a/flytekit/extras/accelerators.py +++ b/flytekit/extras/accelerators.py @@ -1,8 +1,7 @@ """ -Specifying Accelerators -========================== +## Specifying Accelerators -.. tags:: MachineLearning, Advanced, Hardware +tags: MachineLearning, Advanced, Hardware Flyte allows you to specify `gpu` resources for a given task. However, in some cases, you may want to use a different accelerator type, such as TPU, specific variations of GPUs, or fractional GPUs. You can configure the Flyte backend to @@ -22,29 +21,16 @@ def my_task() -> None: ... ``` -Base Classes ------------- -These classes can be used to create custom accelerator type constants. For example, you can create a TPU accelerator. - - - -.. currentmodule:: flytekit.extras.accelerators +### Base Classes -.. autosummary:: - :template: custom.rst - :toctree: generated/ - :nosignatures: - - BaseAccelerator - GPUAccelerator - MultiInstanceGPUAccelerator +These classes can be used to create custom accelerator type constants. For example, you can create a TPU accelerator. But, often, you may want to use a well known accelerator type, and to simplify this, flytekit provides a set of predefined accelerator constants, as described in the next section. -Predefined Accelerator Constants --------------------------------- +### Predefined Accelerator Constants + The `flytekit.extras.accelerators` module provides some constants for known accelerators, listed below, but this is not a complete list. If you know the name of the accelerator, you can pass the string name to the task decorator directly. @@ -75,22 +61,6 @@ def my_task() -> None: def my_task() -> None: ... ``` -.. currentmodule:: flytekit.extras.accelerators - -.. autosummary:: - :toctree: generated/ - :nosignatures: - - A10G - L4 - K80 - M60 - P4 - P100 - T4 - V100 - A100 - A100_80GB """ @@ -128,39 +98,39 @@ def to_flyte_idl(self) -> tasks_pb2.GPUAccelerator: #: use this constant to specify that the task should run on an -#: `NVIDIA A10 Tensor Core GPU `_ +#: `NVIDIA A10 Tensor Core GPU https://www.nvidia.com/en-us/data-center/products/a10-gpu`_ A10G = GPUAccelerator("nvidia-a10g") #: use this constant to specify that the task should run on an -#: `NVIDIA L4 Tensor Core GPU `_ +#: `NVIDIA L4 Tensor Core GPU https://www.nvidia.com/en-us/data-center/l4 L4 = GPUAccelerator("nvidia-l4") #: use this constant to specify that the task should run on an -#: `NVIDIA L4 Tensor Core GPU `_ +#: `NVIDIA L4 Tensor Core GPU https://www.nvidia.com/en-us/data-center/l4 L4_VWS = GPUAccelerator("nvidia-l4-vws") #: use this constant to specify that the task should run on an -#: `NVIDIA Tesla K80 GPU `_ +#: `NVIDIA Tesla K80 GPU https://www.nvidia.com/en-gb/data-center/tesla-k80 K80 = GPUAccelerator("nvidia-tesla-k80") #: use this constant to specify that the task should run on an -#: `NVIDIA Tesla M60 GPU `_ +#: `NVIDIA Tesla M60 GPU https://images.nvidia.com/content/tesla/pdf/188417-Tesla-M60-DS-A4-fnl-Web.pdf M60 = GPUAccelerator("nvidia-tesla-m60") #: use this constant to specify that the task should run on an -#: `NVIDIA Tesla P4 GPU `_ +#: `NVIDIA Tesla P4 GPU https://images.nvidia.com/content/pdf/tesla/184457-Tesla-P4-Datasheet-NV-Final-Letter-Web.pdf P4 = GPUAccelerator("nvidia-tesla-p4") #: use this constant to specify that the task should run on an -#: `NVIDIA Tesla P100 GPU `_ +#: `NVIDIA Tesla P100 GPU https://images.nvidia.com/content/tesla/pdf/nvidia-tesla-p100-datasheet.pdf P100 = GPUAccelerator("nvidia-tesla-p100") #: use this constant to specify that the task should run on an -#: `NVIDIA Tesla T4 GPU `_ +#: `NVIDIA Tesla T4 GPU https://www.nvidia.com/en-us/data-center/tesla-t4 T4 = GPUAccelerator("nvidia-tesla-t4") #: use this constant to specify that the task should run on an -#: `NVIDIA Tesla V100 GPU `_ +#: `NVIDIA Tesla V100 GPU https://images.nvidia.com/content/technologies/volta/pdf/tesla-volta-v100-datasheet-letter-fnl-web.pdf V100 = GPUAccelerator("nvidia-tesla-v100") @@ -204,10 +174,10 @@ class _A100_Base(MultiInstanceGPUAccelerator): class _A100(_A100_Base): """ - Class that represents an `NVIDIA A100 GPU `_. It is possible + Class that represents an `NVIDIA A100 GPU https://www.nvidia.com/en-us/data-center/a100. It is possible to specify a partition of an A100 GPU by using the provided partitions on the class. For example, to specify a 10GB partition, use ``A100.partition_2g_10gb``. - Refer to `Partitioned GPUs `_ + Refer to `Partitioned GPUs https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#partitioning """ partition_1g_5gb = _A100_Base.partitioned("1g.5gb") @@ -233,7 +203,7 @@ class _A100(_A100_Base): #: Use this constant to specify that the task should run on an entire -#: `NVIDIA A100 GPU `_. Fractional partitions are also available. +#: `NVIDIA A100 GPU https://www.nvidia.com/en-us/data-center/a100. Fractional partitions are also available. #: #: Use pre-defined partitions (as instance attributes). For example, to specify a 10GB partition, use #: ``A100.partition_2g_10gb``. @@ -250,7 +220,7 @@ class _A100_80GB_Base(MultiInstanceGPUAccelerator): class _A100_80GB(_A100_80GB_Base): """ - Partitions of an `NVIDIA A100 80GB GPU `_. + Partitions of an [`NVIDIA A100 80GB GPU`](https://www.nvidia.com/en-us/data-center/a100). """ partition_1g_10gb = _A100_80GB_Base.partitioned("1g.10gb") @@ -276,7 +246,7 @@ class _A100_80GB(_A100_80GB_Base): #: use this constant to specify that the task should run on an entire -#: `NVIDIA A100 80GB GPU `_. Fractional partitions are also available. +#: `NVIDIA A100 80GB GPU https://www.nvidia.com/en-us/data-center/a100. Fractional partitions are also available. #: #: Use pre-defined partitions (as instance attributes). For example, to specify a 10GB partition, use #: ``A100.partition_2g_10gb``. @@ -293,7 +263,7 @@ class _V5E_Base(MultiInstanceGPUAccelerator): class _V5E(_V5E_Base): """ - Slices of a `Google Cloud TPU v5e `_. + Slices of a [`Google Cloud TPU v5e](https://cloud.google.com/tpu/docs/v5e). """ slice_1x1 = _V5E_Base.partitioned("1x1") @@ -331,7 +301,7 @@ class _V5E(_V5E_Base): #: use this constant to specify that the task should run on V5E TPU. -#: `Google V5E Cloud TPU `_. +#: `Google V5E Cloud TPU https://cloud.google.com/tpu/docs/v5e>`_. #: #: Use pre-defined slices (as instance attributes). For example, to specify a 2x4 slice, use #: ``V5E.slice_2x4``. @@ -348,7 +318,7 @@ class _V5P_Base(MultiInstanceGPUAccelerator): class _V5P(_V5P_Base): """ - Slices of a `Google Cloud TPU v5p `_. + Slices of a [`Google Cloud TPU v5p`](https://cloud.google.com/tpu/docs/v5p). """ slice_2x2x1 = _V5P_Base.partitioned("2x2x1") @@ -408,7 +378,7 @@ class _V5P(_V5P_Base): #: Use this constant to specify that the task should run on V5P TPU. -#: `Google V5P Cloud TPU `_. +#: `Google V5P Cloud TPU https://cloud.google.com/tpu/docs/v5p. #: #: Use pre-defined slices (as instance attributes). For example, to specify a 2x4x4 slice, use #: ``V5P.slice_2x4x4``. @@ -425,7 +395,7 @@ class _V6E_Base(MultiInstanceGPUAccelerator): class _V6E(_V6E_Base): """ - Slices of a `Google Cloud TPU v6e `_. + Slices of a [`Google Cloud TPU v6e`](https://cloud.google.com/tpu/docs/v6e). """ slice_1x1 = _V6E_Base.partitioned("1x1") @@ -470,7 +440,7 @@ class _V6E(_V6E_Base): #: Use this constant to specify that the task should run on V6E TPU. -#: `Google V6E Cloud TPU `_. +#: `Google V6E Cloud TPU https://cloud.google.com/tpu/docs/v6e. #: #: Use pre-defined slices (as instance attributes). For example, to specify a 2x4 slice, use #: ``V6E.slice_2x4``. diff --git a/flytekit/remote/__init__.py b/flytekit/remote/__init__.py index c4b30b202c..0b2ac0c873 100644 --- a/flytekit/remote/__init__.py +++ b/flytekit/remote/__init__.py @@ -9,12 +9,13 @@ FlyteRemote(config=Config.auto(config_file=....)) FlyteRemote(config=Config(....)) ``` - -# Or if you need to specify a custom cert chain -# (options and compression are also respected keyword arguments) +Or if you need to specify a custom cert chain +(options and compression are also respected keyword arguments) +```python FlyteRemote(private_key=your_private_key_bytes, root_certificates=..., certificate_chain=...) -# fetch a workflow from the flyte backend + +### fetch a workflow from the flyte backend remote = FlyteRemote(...) flyte_workflow = remote.fetch_workflow(name="my_workflow", version="v1") @@ -27,45 +28,21 @@ ## Entrypoint -### FlyteRemote -The main class for interacting with a Flyte backend. - -### Options -Configuration options for the FlyteRemote client. - -## Entities - -### FlyteTask -Represents a registered Flyte task. - -### FlyteWorkflow -Represents a registered Flyte workflow. - -### FlyteLaunchPlan -Represents a registered Flyte launch plan. - -## Entity Components - -### FlyteNode -Base class for nodes in a Flyte workflow. - -### FlyteTaskNode -Represents a task node in a Flyte workflow. - -### FlyteWorkflowNode -Represents a subworkflow node in a Flyte workflow. - -## Execution Objects - -### FlyteWorkflowExecution -Represents an execution of a Flyte workflow. - -### FlyteTaskExecution -Represents an execution of a Flyte task. - -### FlyteNodeExecution -Represents an execution of a node within a workflow. - +| Class | | Description | +|-------|-------------|-------------| +| {{< py_class_ref remote.FlyteRemote >}} | {{< py_class_docsum remote.FlyteRemote >}} | The main class for interacting with a Flyte backend. | +| {{< py_class_ref remote.Config >}} | {{< py_class_docsum remote.Config >}} | Configuration options for the FlyteRemote client. | +| {{< py_class_ref remote.Options >}} | {{< py_class_docsum remote.Options >}} | Configuration options for the FlyteRemote client. | +| {{< py_class_ref remote.FlyteTask >}} | {{< py_class_docsum remote.FlyteTask >}} | Represents a registered Flyte task. | +| {{< py_class_ref remote.FlyteWorkflow >}} | {{< py_class_docsum remote.FlyteWorkflow >}} | Represents a registered Flyte workflow. | +| {{< py_class_ref remote.FlyteLaunchPlan >}} | {{< py_class_docsum remote.FlyteLaunchPlan >}} | Represents a registered Flyte launch plan. | +| {{< py_class_ref remote.FlyteNode >}} | {{< py_class_docsum remote.FlyteNode >}} | Base class for nodes in a Flyte workflow. | +| {{< py_class_ref remote.FlyteTaskNode >}} | {{< py_class_docsum remote.FlyteTaskNode >}} | Represents a task node in a Flyte workflow. | +| {{< py_class_ref remote.FlyteWorkflowNode >}} | {{< py_class_docsum remote.FlyteWorkflowNode >}} | Represents a subworkflow node in a Flyte workflow. | +| {{< py_class_ref remote.FlyteWorkflowExecution >}} | {{< py_class_docsum remote.FlyteWorkflowExecution >}} | Represents an execution of a Flyte workflow. | +| {{< py_class_ref remote.FlyteTaskExecution >}} | {{< py_class_docsum remote.FlyteTaskExecution >}} | Represents an execution of a Flyte task. | +| {{< py_class_ref remote.FlyteNodeExecution >}} | {{< py_class_docsum remote.FlyteNodeExecution >}} | Represents an execution of a node within a workflow. | +| {{< py_class_ref remote.FlyteBranchNode >}} | {{< py_class_docsum remote.FlyteBranchNode >}} | Represents a branch node in a Flyte workflow. | """ from flytekit.remote.entities import ( diff --git a/flytekit/types/directory/__init__.py b/flytekit/types/directory/__init__.py index 83bb0c8fa8..871aad8b2f 100644 --- a/flytekit/types/directory/__init__.py +++ b/flytekit/types/directory/__init__.py @@ -1,17 +1,12 @@ """ -Flytekit Directory Type -========================================================== -.. currentmodule:: flytekit.types.directory - Similar to :py:class:`flytekit.types.file.FlyteFile` there are some 'preformatted' directory types. -.. autosummary:: - :toctree: generated/ - :template: file_types.rst +| Class | Description | +|-------| ---- | +| {{< py_class_ref FlyteDirectory >}} | {{< py_class_docsum FlyteDirectory >}} | +| {{< py_class_ref TensorboardLogs >}} | {{< py_class_docsum TensorboardLogs >}} | +| {{< py_class_ref TFRecordsDirectory >}} | {{< py_class_docsum TFRecordsDirectory >}} | - FlyteDirectory - TensorboardLogs - TFRecordsDirectory """ import typing diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index 5d75872ee0..79406b0870 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -449,9 +449,8 @@ class FlyteDirToMultipartBlobTransformer(AsyncTypeTransformer[FlyteDirectory]): This transformer handles conversion between the Python native FlyteDirectory class defined above, and the Flyte IDL literal/type of Multipart Blob. Please see the FlyteDirectory comments for additional information. - .. caution: - - The transformer will not check if the given path is actually a directory. This is because the path could be + > [!CAUTION] caution: + > The transformer will not check if the given path is actually a directory. This is because the path could be a remote reference. """ @@ -470,7 +469,8 @@ def _blob_type(format: str) -> _core_types.BlobType: def assert_type(self, t: typing.Type[FlyteDirectory], v: typing.Union[FlyteDirectory, os.PathLike, str]): if isinstance(v, FlyteDirectory) or isinstance(v, str) or isinstance(v, os.PathLike): """ - NOTE: we do not do a isdir check because the given path could be remote reference + >[!NOTE] + > we do not do a isdir check because the given path could be remote reference """ return raise TypeError( diff --git a/flytekit/types/error/__init__.py b/flytekit/types/error/__init__.py index 6714e88844..7a60832694 100644 --- a/flytekit/types/error/__init__.py +++ b/flytekit/types/error/__init__.py @@ -1,14 +1 @@ -""" -Flytekit Error Type -========================================================== -.. currentmodule:: flytekit.types.error - -.. autosummary:: - :nosignatures: - :template: custom.rst - :toctree: generated/ - - FlyteError -""" - from .error import FlyteError diff --git a/flytekit/types/iterator/__init__.py b/flytekit/types/iterator/__init__.py index 3c1911394f..6c550ed4bf 100644 --- a/flytekit/types/iterator/__init__.py +++ b/flytekit/types/iterator/__init__.py @@ -1,15 +1,5 @@ """ -Flytekit Iterator Type -====================== -.. currentmodule:: flytekit.types.iterator - -.. autosummary:: - :nosignatures: - :toctree: generated/ - - FlyteIterator - JSON """ from .iterator import FlyteIterator diff --git a/flytekit/types/pickle/__init__.py b/flytekit/types/pickle/__init__.py index 59833bdc84..a4c22d9a8e 100644 --- a/flytekit/types/pickle/__init__.py +++ b/flytekit/types/pickle/__init__.py @@ -1,13 +1,4 @@ """ -Flytekit Pickle Type -========================================================== -.. currentmodule:: flytekit.types.pickle - -.. autosummary:: - :template: custom.rst - :toctree: generated/ - - FlytePickle """ from .pickle import FlytePickle diff --git a/flytekit/types/structured/__init__.py b/flytekit/types/structured/__init__.py index 16c1812fc6..bc8bbbc3c6 100644 --- a/flytekit/types/structured/__init__.py +++ b/flytekit/types/structured/__init__.py @@ -1,15 +1,4 @@ """ -Flytekit StructuredDataset -========================================================== -.. currentmodule:: flytekit.types.structured - -.. autosummary:: - :template: custom.rst - :toctree: generated/ - - StructuredDataset - StructuredDatasetDecoder - StructuredDatasetEncoder """ import functools diff --git a/plugins/flytekit-hive/flytekitplugins/hive/task.py b/plugins/flytekit-hive/flytekitplugins/hive/task.py index 76835a8d77..4b5d9a9451 100644 --- a/plugins/flytekit-hive/flytekitplugins/hive/task.py +++ b/plugins/flytekit-hive/flytekitplugins/hive/task.py @@ -124,7 +124,7 @@ def __init__( Args: select_query: Singular query that returns a Tabular dataset stage_query: optional query that should be executed before the actual ``select_query``. This can usually - be used for setting memory or the an alternate execution engine like `tez `__ + be used for setting memory or the an alternate execution engine like [`tez`](https://tez.apache.org) """ query_template = HiveSelectTask._HIVE_QUERY_FORMATTER.format( stage_query_str=stage_query or "", select_query_str=select_query.strip().strip(";") diff --git a/plugins/flytekit-kf-mpi/flytekitplugins/kfmpi/task.py b/plugins/flytekit-kf-mpi/flytekitplugins/kfmpi/task.py index a6a6ef3647..1eedd31f29 100644 --- a/plugins/flytekit-kf-mpi/flytekitplugins/kfmpi/task.py +++ b/plugins/flytekit-kf-mpi/flytekitplugins/kfmpi/task.py @@ -1,6 +1,6 @@ """ This Plugin adds the capability of running distributed MPI training to Flyte using backend plugins, natively on -Kubernetes. It leverages `MPI Job `_ Plugin from kubeflow. +Kubernetes. It leverages [`MPI Job`](https://github.com/kubeflow/mpi-operator) Plugin from kubeflow. """ from dataclasses import dataclass, field @@ -91,7 +91,7 @@ class Launcher: @dataclass class MPIJob(object): """ - Configuration for an executable `MPI Job `_. Use this + Configuration for an executable [`MPI Job`](https://github.com/kubeflow/mpi-operator). Use this to run distributed training on k8s with MPI Args: @@ -222,8 +222,8 @@ def get_custom(self, settings: SerializationSettings) -> Dict[str, Any]: @dataclass class HorovodJob(object): """ - Configuration for an executable `Horovod Job using MPI operator`_. Use this - to run distributed training on k8s with MPI. For more info, check out Running Horovod`_. + Configuration for an executable [`Horovod Job using MPI operator`](https://github.com/kubeflow/mpi-operator). Use this + to run distributed training on k8s with MPI. For more info, check out [`Running Horovod`](https://horovod.readthedocs.io/en/stable/summary_include.html#running-horovod). Args: worker: Worker configuration for the job. diff --git a/plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py b/plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py index 9a77d6326e..62e4e0ccda 100644 --- a/plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py +++ b/plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py @@ -1,6 +1,6 @@ """ This Plugin adds the capability of running distributed pytorch training to Flyte using backend plugins, natively on -Kubernetes. It leverages `Pytorch Job `_ Plugin from kubeflow. +Kubernetes. It leverages [`Pytorch Job`](https://github.com/kubeflow/pytorch-operator) Plugin from kubeflow. """ import os @@ -95,7 +95,7 @@ class Master: @dataclass class PyTorch(object): """ - Configuration for an executable `PyTorch Job `_. Use this + Configuration for an executable [`PyTorch Job`](https://github.com/kubeflow/pytorch-operator). Use this to run distributed PyTorch training on Kubernetes. Please notice, in most cases, you should not worry about the configuration of the master and worker groups. The default configuration should work. The only field you should change is the number of workers. Both replicas will use the same image, and the same @@ -125,12 +125,12 @@ class PyTorch(object): @dataclass class Elastic(object): """ - Configuration for `torch elastic training `_. + Configuration for [`torch elastic training`](https://pytorch.org/docs/stable/elastic/run.html). Use this to run single- or multi-node distributed pytorch elastic training on k8s. Single-node elastic training is executed in a k8s pod when `nnodes` is set to 1. - Multi-node training is executed otherwise using a `Pytorch Job `_. + Multi-node training is executed otherwise using a [`Pytorch Job`](https://github.com/kubeflow/training-operator). Like `torchrun`, this plugin sets the environment variable `OMP_NUM_THREADS` to 1 if it is not set. Please see https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html for potential performance improvements. diff --git a/plugins/flytekit-kf-tensorflow/flytekitplugins/kftensorflow/task.py b/plugins/flytekit-kf-tensorflow/flytekitplugins/kftensorflow/task.py index 62cd482416..60cd0b8472 100644 --- a/plugins/flytekit-kf-tensorflow/flytekitplugins/kftensorflow/task.py +++ b/plugins/flytekit-kf-tensorflow/flytekitplugins/kftensorflow/task.py @@ -1,6 +1,6 @@ """ This Plugin adds the capability of running distributed tensorflow training to Flyte using backend plugins, natively on -Kubernetes. It leverages `TF Job `_ Plugin from kubeflow. +Kubernetes. It leverages [`TF Job`](https://github.com/kubeflow/tf-operator) Plugin from kubeflow. """ from dataclasses import dataclass, field @@ -98,7 +98,7 @@ class Evaluator: @dataclass class TfJob: """ - Configuration for an executable `TensorFlow Job `_. Use this + Configuration for an executable [`TensorFlow Job`](https://github.com/kubeflow/tf-operator). Use this to run distributed TensorFlow training on Kubernetes. Args: diff --git a/tests/flytekit/integration/remote/workflows/basic/basic_workflow.py b/tests/flytekit/integration/remote/workflows/basic/basic_workflow.py index d63f96336b..2177c4f940 100644 --- a/tests/flytekit/integration/remote/workflows/basic/basic_workflow.py +++ b/tests/flytekit/integration/remote/workflows/basic/basic_workflow.py @@ -12,7 +12,7 @@ of ``t2`` in the workflow below. As such, the body of workflows is run at "registration" time. Please refer to the registration docs for additional information as well since it is actually a two-step process. -Take a look at the conceptual `discussion `__ +Take a look at the conceptual [`discussion`](https://lyft.github.io/flyte/user/concepts/workflows_nodes.html#workflows) behind workflows for additional information. """ From d9519cef061046d3273699b8af62e77229d4b25a Mon Sep 17 00:00:00 2001 From: chmod77 Date: Wed, 9 Apr 2025 18:15:30 +0300 Subject: [PATCH 04/20] chore: fix merge conflicts Signed-off-by: chmod77 Signed-off-by: Peeter Piegaze <1153481+ppiegaze@users.noreply.github.com> --- .../flytekitplugins/geopandas/gdf_transformers.py | 11 ++++++----- .../flytekit-geopandas/tests/test_geopandas_plugin.py | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/plugins/flytekit-geopandas/flytekitplugins/geopandas/gdf_transformers.py b/plugins/flytekit-geopandas/flytekitplugins/geopandas/gdf_transformers.py index c6be9a54b6..34c7ef8495 100644 --- a/plugins/flytekit-geopandas/flytekitplugins/geopandas/gdf_transformers.py +++ b/plugins/flytekit-geopandas/flytekitplugins/geopandas/gdf_transformers.py @@ -1,4 +1,3 @@ -import os import typing from pathlib import Path @@ -40,10 +39,12 @@ def encode( structured_dataset: StructuredDataset, structured_dataset_type: StructuredDatasetType, ) -> literals.StructuredDataset: - dir = ctx.file_access.get_random_remote_directory() - if not ctx.file_access.is_remote(dir): - Path(dir).mkdir(parents=True, exist_ok=True) - uri = os.path.join(str(dir), "data.parquet") + uri = typing.cast(str, structured_dataset.uri) or ctx.file_access.join( + ctx.file_access.raw_output_prefix, ctx.file_access.get_random_string() + ) + if not ctx.file_access.is_remote(uri): + Path(uri).mkdir(parents=True, exist_ok=True) + uri = str(Path(uri) / "data.parquet") df = typing.cast(gpd.GeoDataFrame, structured_dataset.dataframe) df.to_parquet(uri) structured_dataset_type.format = PARQUET diff --git a/plugins/flytekit-geopandas/tests/test_geopandas_plugin.py b/plugins/flytekit-geopandas/tests/test_geopandas_plugin.py index 683f98fba3..fe8d4cd181 100644 --- a/plugins/flytekit-geopandas/tests/test_geopandas_plugin.py +++ b/plugins/flytekit-geopandas/tests/test_geopandas_plugin.py @@ -6,6 +6,7 @@ from flytekit import task from flytekit.types.structured.structured_dataset import StructuredDataset +from pyproj import CRS import numpy as np From f2723b3f9f2b88febfe27c7a230c9dbcee822550 Mon Sep 17 00:00:00 2001 From: chmod77 Date: Wed, 9 Apr 2025 18:17:14 +0300 Subject: [PATCH 05/20] chore: fix merge conflicts Signed-off-by: chmod77 Signed-off-by: Peeter Piegaze <1153481+ppiegaze@users.noreply.github.com> --- .../flytekitplugins/slurm/function/connector.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/plugins/flytekit-slurm/flytekitplugins/slurm/function/connector.py b/plugins/flytekit-slurm/flytekitplugins/slurm/function/connector.py index d2f24a2af9..ba36aa4a1b 100644 --- a/plugins/flytekit-slurm/flytekitplugins/slurm/function/connector.py +++ b/plugins/flytekit-slurm/flytekitplugins/slurm/function/connector.py @@ -7,7 +7,6 @@ from asyncssh import SSHClientConnection from asyncssh.sftp import SFTPNoSuchFile -from flytekit import logger from flytekit.extend.backend.base_connector import AsyncConnectorBase, ConnectorRegistry, Resource, ResourceMeta from flytekit.extend.backend.utils import convert_to_flyte_phase from flytekit.models.literals import LiteralMap @@ -83,8 +82,7 @@ async def get(self, resource_meta: SlurmJobMetadata, **kwargs) -> Resource: conn = await get_ssh_conn( ssh_config=resource_meta.ssh_config, slurm_cluster_to_ssh_conn=self.slurm_cluster_to_ssh_conn ) - job_res = await conn.run(f"scontrol --json show job {resource_meta.job_id}", check=True) - job_info = json.loads(job_res.stdout)["jobs"][0] + job_res = await conn.run(f"scontrol show job {resource_meta.job_id}", check=True) # Determine the current flyte phase from Slurm job state job_state = job_info["job_state"][0].strip().lower() From 895ab05661693ea33303054b86a2d1cb7bf33a8a Mon Sep 17 00:00:00 2001 From: chmod77 Date: Wed, 9 Apr 2025 18:17:43 +0300 Subject: [PATCH 06/20] chore: fix merge conflicts Signed-off-by: chmod77 Signed-off-by: Peeter Piegaze <1153481+ppiegaze@users.noreply.github.com> --- .../flytekit-slurm/flytekitplugins/slurm/script/connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/flytekit-slurm/flytekitplugins/slurm/script/connector.py b/plugins/flytekit-slurm/flytekitplugins/slurm/script/connector.py index ac7dd9caa5..cbdd422ab9 100644 --- a/plugins/flytekit-slurm/flytekitplugins/slurm/script/connector.py +++ b/plugins/flytekit-slurm/flytekitplugins/slurm/script/connector.py @@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Tuple, Type from asyncssh import SSHClientConnection -from asyncssh.sftp import SFTPError, SFTPNoSuchFile +from asyncssh.sftp import SFTPError import flytekit from flytekit.core.type_engine import TypeEngine From bfad594cf57bf2ba2d602ede8a76b2d69ca65271 Mon Sep 17 00:00:00 2001 From: chmod77 Date: Fri, 4 Apr 2025 01:11:47 +0300 Subject: [PATCH 07/20] chore: further cleanup Signed-off-by: chmod77 Signed-off-by: Peeter Piegaze <1153481+ppiegaze@users.noreply.github.com> --- flytekit/__init__.py | 48 +++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/flytekit/__init__.py b/flytekit/__init__.py index 3b771f12c2..d772ba6759 100644 --- a/flytekit/__init__.py +++ b/flytekit/__init__.py @@ -32,7 +32,6 @@ in the flyte engine and hence should be used for control flow. "dynamic workflows" can be used to perform custom conditional logic not supported by flytekit. - ### Customizing Tasks & Workflows - TaskMetadata - Wrapper object that allows users to specify Task @@ -44,30 +43,25 @@ See the Dynamic module for more information. -- dynamic -Signaling --------- +##### Signaling - approve - sleep - wait_for_input Scheduling ----------- - CronSchedule - FixedRate -Notifications ------------ +##### Notifications - Email - PagerDuty - Slack -Reference Entities ------------------ +##### Reference Entities - get_reference_entity - LaunchPlanReference @@ -77,8 +71,7 @@ - reference_workflow - reference_launch_plan -Core Task Types -------------- +##### Core Task Types - SQLTask - ContainerTask @@ -86,15 +79,13 @@ - PythonInstanceTask - LaunchPlan -Secrets and SecurityContext --------------------------- +##### Secrets and SecurityContext - Secret - SecurityContext -Common Flyte IDL Objects ------------------------ +##### Common Flyte IDL Objects - AuthRole - Labels @@ -181,7 +172,7 @@ def current_context() -> ExecutionParameters: ```python - flytekit.current_context().logging.info(...) + flytekit.current_context().logging.info(...) ``` Available params are documented in :py:class:`flytekit.core.context_manager.ExecutionParams`. @@ -203,24 +194,31 @@ def load_implicit_plugins(): entrypoint specification to their setup.py. The following example shows how we can autoload a module called fsspec (whose init files contains the necessary plugin registration step) - ``` + + > [!NOTE] + > The group is always ``flytekit.plugins`` - # note the group is always ``flytekit.plugins`` - setup( + + ```python + setup( ... entry_points={'flytekit.plugins': 'fsspec=flytekitplugins.fsspec'}, ... - ) + ) ``` This works as long as the fsspec module has + > [!NOTE] + > For data persistence plugins: + + ```python + DataPersistencePlugins.register_plugin(f"{k}://", FSSpecPersistence, force=True) ``` + OR for type plugins: - # For data persistence plugins - DataPersistencePlugins.register_plugin(f"{k}://", FSSpecPersistence, force=True) - # OR for type plugins - TypeEngine.register(PanderaTransformer()) - # etc + ```python + TypeEngine.register(PanderaTransformer()) + # etc ``` """ discovered_plugins = entry_points(group="flytekit.plugins") From 1f8ec31b22741b9e530fc163169aaac02519a3df Mon Sep 17 00:00:00 2001 From: chmod77 Date: Fri, 4 Apr 2025 09:48:26 +0300 Subject: [PATCH 08/20] chore: further cleanup Signed-off-by: chmod77 Signed-off-by: Peeter Piegaze <1153481+ppiegaze@users.noreply.github.com> --- flytekit/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flytekit/__init__.py b/flytekit/__init__.py index d772ba6759..5dbb82b620 100644 --- a/flytekit/__init__.py +++ b/flytekit/__init__.py @@ -171,8 +171,7 @@ def current_context() -> ExecutionParameters: Usage ```python - - flytekit.current_context().logging.info(...) + flytekit.current_context().logging.info(...) ``` Available params are documented in :py:class:`flytekit.core.context_manager.ExecutionParams`. From 7a2320c4e4b19cad81ba298e3d150f4508c25ed9 Mon Sep 17 00:00:00 2001 From: chmod77 Date: Fri, 4 Apr 2025 10:17:55 +0300 Subject: [PATCH 09/20] chore: further docstring cleanup Signed-off-by: chmod77 Signed-off-by: Peeter Piegaze <1153481+ppiegaze@users.noreply.github.com> --- flytekit/__init__.py | 2 +- flytekit/clients/auth_helper.py | 15 +-- flytekit/clients/friendly.py | 4 +- flytekit/clients/raw.py | 7 +- flytekit/configuration/__init__.py | 15 ++- flytekit/core/annotation.py | 7 +- flytekit/core/array_node_map_task.py | 11 +- flytekit/core/base_task.py | 3 +- flytekit/core/checkpointer.py | 5 +- flytekit/core/condition.py | 38 ++++--- flytekit/core/context_manager.py | 31 +++--- flytekit/core/dynamic_workflow_task.py | 22 ++-- flytekit/core/launch_plan.py | 31 +----- flytekit/core/legacy_map_task.py | 13 ++- flytekit/core/notification.py | 15 ++- flytekit/core/python_function_task.py | 14 ++- flytekit/core/resources.py | 13 ++- flytekit/core/schedule.py | 5 +- flytekit/core/task.py | 135 ++++++++++++------------- flytekit/core/testing.py | 17 ++-- flytekit/core/tracker.py | 42 ++++---- flytekit/core/type_engine.py | 34 +++---- flytekit/deck/deck.py | 37 ++++--- flytekit/exceptions/eager.py | 39 ++++--- flytekit/extras/accelerators.py | 47 ++++----- flytekit/extras/webhook/task.py | 46 ++++----- flytekit/lazy_import/lazy_module.py | 6 +- flytekit/remote/backfill.py | 23 +++-- flytekit/types/directory/types.py | 19 ++-- flytekit/types/file/file.py | 15 ++- 30 files changed, 325 insertions(+), 386 deletions(-) diff --git a/flytekit/__init__.py b/flytekit/__init__.py index 5dbb82b620..0c1b9e41e3 100644 --- a/flytekit/__init__.py +++ b/flytekit/__init__.py @@ -187,7 +187,7 @@ def new_context() -> Generator[FlyteContext, None, None]: def load_implicit_plugins(): """ This method allows loading all plugins that have the entrypoint specification. This uses the plugin loading - behavior as explained `here <>`_. + behavior. This is an opt in system and plugins that have an implicit loading requirement should add the implicit loading entrypoint specification to their setup.py. The following example shows how we can autoload a module called fsspec diff --git a/flytekit/clients/auth_helper.py b/flytekit/clients/auth_helper.py index 213edd92e6..c62b1411f8 100644 --- a/flytekit/clients/auth_helper.py +++ b/flytekit/clients/auth_helper.py @@ -181,19 +181,20 @@ def get_channel(cfg: PlatformConfig, **kwargs) -> grpc.Channel: It is possible to pass additional options to the underlying channel. Examples for various options are as below ```python - - get_channel(cfg=PlatformConfig(...)) + get_channel(cfg=PlatformConfig(...)) ``` - ```python - :caption: Additional options to insecure / secure channel. Example `options` and `compression` refer to grpc guide - - get_channel(cfg=PlatformConfig(...), options=..., compression=...) + > [!NOTE] + > Additional options to insecure / secure channel. Example `options` and `compression` refer to grpc guide + ```python + get_channel(cfg=PlatformConfig(...), options=..., compression=...) ``` - :caption: Create secure channel with custom `grpc.ssl_channel_credentials` + > [!NOTE] + > Create secure channel with custom `grpc.ssl_channel_credentials` + ```python get_channel(cfg=PlatformConfig(insecure=False,...), credentials=...) ``` diff --git a/flytekit/clients/friendly.py b/flytekit/clients/friendly.py index 9d1d46cf6e..68c9d703cc 100644 --- a/flytekit/clients/friendly.py +++ b/flytekit/clients/friendly.py @@ -42,8 +42,8 @@ class SynchronousFlyteClient(_RawSynchronousFlyteClient): first. Create a client by ```python - SynchronousFlyteClient("your.domain:port", insecure=True) - # insecure should be True if your flyteadmin deployment doesn't have SSL enabled + SynchronousFlyteClient("your.domain:port", insecure=True) + # insecure should be True if your flyteadmin deployment doesn't have SSL enabled ``` """ diff --git a/flytekit/clients/raw.py b/flytekit/clients/raw.py index 4d9a7031a1..3216fe0c0e 100644 --- a/flytekit/clients/raw.py +++ b/flytekit/clients/raw.py @@ -29,10 +29,9 @@ class RawSynchronousFlyteClient(object): be explicit as opposed to inferred from the environment or a configuration file. To create a client, ```python - - from flytekit.configuration import PlatformConfig - RawSynchronousFlyteClient(PlatformConfig(endpoint="a.b.com", insecure=True)) # or - SynchronousFlyteClient(PlatformConfig(endpoint="a.b.com", insecure=True)) + from flytekit.configuration import PlatformConfig + RawSynchronousFlyteClient(PlatformConfig(endpoint="a.b.com", insecure=True)) # or + SynchronousFlyteClient(PlatformConfig(endpoint="a.b.com", insecure=True)) ``` """ diff --git a/flytekit/configuration/__init__.py b/flytekit/configuration/__init__.py index dc27c8f434..96faa3cc18 100644 --- a/flytekit/configuration/__init__.py +++ b/flytekit/configuration/__init__.py @@ -338,14 +338,13 @@ def from_images(cls, default_image: str, m: typing.Optional[typing.Dict[str, str your workflow uses multiple images ```python - - ImageConfig.from_dict( - "ghcr.io/flyteorg/flytecookbook:v1.0.0", - { - "spark": "ghcr.io/flyteorg/myspark:...", - "other": "...", - } - ) + ImageConfig.from_dict( + "ghcr.io/flyteorg/flytecookbook:v1.0.0", + { + "spark": "ghcr.io/flyteorg/myspark:...", + "other": "...", + } + ) ``` :return: diff --git a/flytekit/core/annotation.py b/flytekit/core/annotation.py index 769855f614..ce0cb4693d 100644 --- a/flytekit/core/annotation.py +++ b/flytekit/core/annotation.py @@ -15,10 +15,9 @@ class FlyteAnnotation: For a task definition: ```python - - @task - def x(a: typing.Annotated[int, FlyteAnnotation({"foo": {"bar": 1}})]): - return + @task + def x(a: typing.Annotated[int, FlyteAnnotation({"foo": {"bar": 1}})]): + return ``` """ diff --git a/flytekit/core/array_node_map_task.py b/flytekit/core/array_node_map_task.py index 5f3e8b9588..e67eaedf26 100644 --- a/flytekit/core/array_node_map_task.py +++ b/flytekit/core/array_node_map_task.py @@ -454,14 +454,13 @@ class ArrayNodeMapTaskResolver(tracker.TrackedInstance, TaskResolverMixin): But in cases in which `j` is bound to a fixed value by using `functools.partial` we need a way to ensure that the interface is not simply interpolated, but only the unbound inputs are interpolated. - ```python + ```python + def foo((i: int, j: str) -> str: + ... - def foo((i: int, j: str) -> str: - ... + mt = map_task(functools.partial(foo, j=10)) - mt = map_task(functools.partial(foo, j=10)) - - print(mt.interface) + print(mt.interface) ``` output: diff --git a/flytekit/core/base_task.py b/flytekit/core/base_task.py index e83cf33847..bf224bc508 100644 --- a/flytekit/core/base_task.py +++ b/flytekit/core/base_task.py @@ -102,8 +102,7 @@ def kwtypes(**kwargs) -> OrderedDict[str, Type]: This is a small helper function to convert the keyword arguments to an OrderedDict of types. ```python - - kwtypes(a=int, b=str) + kwtypes(a=int, b=str) ``` """ d = collections.OrderedDict() diff --git a/flytekit/core/checkpointer.py b/flytekit/core/checkpointer.py index c43208a5c8..5f96faf0de 100644 --- a/flytekit/core/checkpointer.py +++ b/flytekit/core/checkpointer.py @@ -37,9 +37,8 @@ def save(self, cp: typing.Union[Path, str, io.BufferedReader]): Usage: If you have a io.BufferedReader then the following should work ```python - - with input_file.open(mode="rb") as b: - checkpointer.save(b) + with input_file.open(mode="rb") as b: + checkpointer.save(b) ``` """ raise NotImplementedError("Use one of the derived classes") diff --git a/flytekit/core/condition.py b/flytekit/core/condition.py index 0f754cd6c2..2ab390dff5 100644 --- a/flytekit/core/condition.py +++ b/flytekit/core/condition.py @@ -44,8 +44,7 @@ class ConditionalSection: Usage: ```python - - v = conditional("fractions").if_((my_input > 0.1) & (my_input < 1.0)).then(...)... + v = conditional("fractions").if_((my_input > 0.1) & (my_input < 1.0)).then(...)... ``` """ @@ -488,24 +487,23 @@ def conditional(name: str) -> ConditionalSection: Example of a condition usage. Note the nesting and the assignment to a LHS variable ```python - - v = ( - conditional("fractions") - .if_((my_input > 0.1) & (my_input < 1.0)) - .then( - conditional("inner_fractions") - .if_(my_input < 0.5) - .then(double(n=my_input)) - .elif_((my_input > 0.5) & (my_input < 0.7)) - .then(square(n=my_input)) - .else_() - .fail("Only <0.7 allowed") - ) - .elif_((my_input > 1.0) & (my_input < 10.0)) - .then(square(n=my_input)) - .else_() - .then(double(n=my_input)) - ) + v = ( + conditional("fractions") + .if_((my_input > 0.1) & (my_input < 1.0)) + .then( + conditional("inner_fractions") + .if_(my_input < 0.5) + .then(double(n=my_input)) + .elif_((my_input > 0.5) & (my_input < 0.7)) + .then(square(n=my_input)) + .else_() + .fail("Only <0.7 allowed") + ) + .elif_((my_input > 1.0) & (my_input < 10.0)) + .then(square(n=my_input)) + .else_() + .then(double(n=my_input)) + ) ``` """ ctx = FlyteContextManager.current_context() diff --git a/flytekit/core/context_manager.py b/flytekit/core/context_manager.py index 86eaa2fe45..e828f365c9 100644 --- a/flytekit/core/context_manager.py +++ b/flytekit/core/context_manager.py @@ -70,7 +70,7 @@ class ExecutionParameters(object): ```python - flytekit.current_context() + flytekit.current_context() ``` This object provides the following objections @@ -796,18 +796,16 @@ def get_deck(self) -> typing.Union[str, "IPython.core.display.HTML"]: # type:ig IPython.display and should be rendered in the notebook. ```python - - with flytekit.new_context() as ctx: - my_task(...) - ctx.get_deck() + with flytekit.new_context() as ctx: + my_task(...) + ctx.get_deck() ``` OR if you wish to explicitly display ```python - - from IPython import display - display(ctx.get_deck()) + from IPython import display + display(ctx.get_deck()) ``` """ from flytekit.deck.deck import _get_deck @@ -920,15 +918,14 @@ class FlyteContextManager(object): Typical usage is ```python - - FlyteContextManager.initialize() - with FlyteContextManager.with_context(o) as ctx: - pass - - # If required - not recommended you can use - FlyteContextManager.push_context() - # but correspondingly a pop_context should be called - FlyteContextManager.pop_context() + FlyteContextManager.initialize() + with FlyteContextManager.with_context(o) as ctx: + pass + + # If required - not recommended you can use + FlyteContextManager.push_context() + # but correspondingly a pop_context should be called + FlyteContextManager.pop_context() ``` """ diff --git a/flytekit/core/dynamic_workflow_task.py b/flytekit/core/dynamic_workflow_task.py index 5ce9e41d4b..e939d87705 100644 --- a/flytekit/core/dynamic_workflow_task.py +++ b/flytekit/core/dynamic_workflow_task.py @@ -32,24 +32,22 @@ run as a :std:ref:`subworkflow `. Simple usage ```python - - @dynamic - def my_dynamic_subwf(a: int) -> (typing.List[str], int): - s = [] - for i in range(a): - s.append(t1(a=i)) - return s, 5 +@dynamic +def my_dynamic_subwf(a: int) -> (typing.List[str], int): + s = [] + for i in range(a): + s.append(t1(a=i)) + return s, 5 ``` Note in the code block that we call the Python ``range`` operator on the input. This is typically not allowed in a workflow but it is here. You can even express dependencies between tasks. ```python - - @dynamic - def my_dynamic_subwf(a: int, b: int) -> int: - x = t1(a=a) - return t2(b=b, x=x) +@dynamic +def my_dynamic_subwf(a: int, b: int) -> int: + x = t1(a=a) + return t2(b=b, x=x) ``` See the :std:ref:`cookbook ` for a longer discussion. diff --git a/flytekit/core/launch_plan.py b/flytekit/core/launch_plan.py index 5d973fde3a..7dd5ba6f86 100644 --- a/flytekit/core/launch_plan.py +++ b/flytekit/core/launch_plan.py @@ -26,26 +26,18 @@ class LaunchPlan(object): attributes set - no default values, fixed values, schedules, etc. Assuming you have the following workflow ```python - - @workflow - def wf(a: int, c: str) -> str: + @workflow + def wf(a: int, c: str) -> str: ... ``` Create the default launch plan with ```python - - LaunchPlan.get_or_create(workflow=my_wf) + LaunchPlan.get_or_create(workflow=my_wf) ``` If you specify additional parameters, you'll also have to give the launch plan a unique name. Default and fixed inputs can be expressed as Python native values like so: - .. literalinclude:: ../../../tests/flytekit/unit/core/test_launch_plan.py - :start-after: # fixed_and_default_start - :end-before: # fixed_and_default_end - :language: python - :dedent: 4 - Additionally, a launch plan can be configured to run on a schedule and emit notifications. @@ -53,23 +45,10 @@ def wf(a: int, c: str) -> str: To configure the remaining parameters, you'll need to import the relevant model objects as well. - .. literalinclude:: ../../../tests/flytekit/unit/core/test_launch_plan.py - :start-after: # schedule_start - :end-before: # schedule_end - :language: python - :dedent: 4 - ```python - - from flytekit.models.common import Annotations, AuthRole, Labels, RawOutputDataConfig + from flytekit.models.common import Annotations, AuthRole, Labels, RawOutputDataConfig ``` - Then use as follows - - .. literalinclude:: ../../../tests/flytekit/unit/core/test_launch_plan.py - :start-after: # auth_role_start - :end-before: # auth_role_end - :language: python - :dedent: 4 + Then use as follows: """ diff --git a/flytekit/core/legacy_map_task.py b/flytekit/core/legacy_map_task.py index 4214a92140..95b4635ea2 100644 --- a/flytekit/core/legacy_map_task.py +++ b/flytekit/core/legacy_map_task.py @@ -367,15 +367,14 @@ class MapTaskResolver(TrackedInstance, TaskResolverMixin): But in cases in which `j` is bound to a fixed value by using `functools.partial` we need a way to ensure that the interface is not simply interpolated, but only the unbound inputs are interpolated. - ```python + ```python + def foo((i: int, j: str) -> str: + ... - def foo((i: int, j: str) -> str: - ... + mt = map_task(functools.partial(foo, j=10)) - mt = map_task(functools.partial(foo, j=10)) - - print(mt.interface) - ``` + print(mt.interface) + ``` output: diff --git a/flytekit/core/notification.py b/flytekit/core/notification.py index ce31b4b78e..b8be6c492a 100644 --- a/flytekit/core/notification.py +++ b/flytekit/core/notification.py @@ -60,10 +60,9 @@ class PagerDuty(Notification): This notification should be used when sending emails to the PagerDuty service. ```python + from flytekit.models.core.execution import WorkflowExecutionPhase - from flytekit.models.core.execution import WorkflowExecutionPhase - - PagerDuty(phases=[WorkflowExecutionPhase.SUCCEEDED], recipients_email=["my-team@email.com"]) + PagerDuty(phases=[WorkflowExecutionPhase.SUCCEEDED], recipients_email=["my-team@email.com"]) ``` """ @@ -82,10 +81,9 @@ class Email(Notification): This notification should be used when sending regular emails to people. ```python + from flytekit.models.core.execution import WorkflowExecutionPhase - from flytekit.models.core.execution import WorkflowExecutionPhase - - Email(phases=[WorkflowExecutionPhase.SUCCEEDED], recipients_email=["my-team@email.com"]) + Email(phases=[WorkflowExecutionPhase.SUCCEEDED], recipients_email=["my-team@email.com"]) ``` """ @@ -103,10 +101,9 @@ class Slack(Notification): This notification should be used when sending emails to the Slack. ```python + from flytekit.models.core.execution import WorkflowExecutionPhase - from flytekit.models.core.execution import WorkflowExecutionPhase - - Slack(phases=[WorkflowExecutionPhase.SUCCEEDED], recipients_email=["my-team@email.com"]) + Slack(phases=[WorkflowExecutionPhase.SUCCEEDED], recipients_email=["my-team@email.com"]) ``` """ diff --git a/flytekit/core/python_function_task.py b/flytekit/core/python_function_task.py index 307bd31a81..ae7116da96 100644 --- a/flytekit/core/python_function_task.py +++ b/flytekit/core/python_function_task.py @@ -65,11 +65,10 @@ class PythonInstanceTask(PythonAutoContainerTask[T], ABC): # type: ignore will invoke the right class automatically, by capturing the module name and variable in the module name. ```python + x = MyInstanceTask(name="x", .....) - x = MyInstanceTask(name="x", .....) - - # this can be invoked as - x(a=5) # depending on the interface of the defined task + # this can be invoked as + x(a=5) # depending on the interface of the defined task ``` """ @@ -97,10 +96,9 @@ class PythonFunctionTask(PythonAutoContainerTask[T]): # type: ignore It is advised this task is used using the @task decorator as follows ```python - - @task - def my_func(a: int) -> str: - ... + @task + def my_func(a: int) -> str: + ... ``` In the above code, the name of the function, the module, and the interface (inputs = int and outputs = str) will be auto detected. diff --git a/flytekit/core/resources.py b/flytekit/core/resources.py index e77b2ad795..36b9c256d0 100644 --- a/flytekit/core/resources.py +++ b/flytekit/core/resources.py @@ -19,14 +19,13 @@ class Resources(DataClassJSONMixin): This class is used to specify both resource requests and resource limits. ```python + Resources(cpu="1", mem="2048") # This is 1 CPU and 2 KB of memory + Resources(cpu="100m", mem="2Gi") # This is 1/10th of a CPU and 2 gigabytes of memory + Resources(cpu=0.5, mem=1024) # This is 500m CPU and 1 KB of memory - Resources(cpu="1", mem="2048") # This is 1 CPU and 2 KB of memory - Resources(cpu="100m", mem="2Gi") # This is 1/10th of a CPU and 2 gigabytes of memory - Resources(cpu=0.5, mem=1024) # This is 500m CPU and 1 KB of memory - - # For Kubernetes-based tasks, pods use ephemeral local storage for scratch space, caching, and for logs. - # This allocates 1Gi of such local storage. - Resources(ephemeral_storage="1Gi") + # For Kubernetes-based tasks, pods use ephemeral local storage for scratch space, caching, and for logs. + # This allocates 1Gi of such local storage. + Resources(ephemeral_storage="1Gi") ``` When used together with `@task(resources=)`, you a specific the request and limits with one object. When the value is set to a tuple or list, the first value is the request and the diff --git a/flytekit/core/schedule.py b/flytekit/core/schedule.py index 9f1350b916..ef92611b5d 100644 --- a/flytekit/core/schedule.py +++ b/flytekit/core/schedule.py @@ -162,10 +162,9 @@ class FixedRate(_schedule_models.Schedule): Use this class to schedule a fixed-rate interval for a launch plan. ```python + from datetime import timedelta - from datetime import timedelta - - FixedRate(duration=timedelta(minutes=10)) + FixedRate(duration=timedelta(minutes=10)) ``` See the :std:ref:`fixed rate intervals` chapter in the cookbook for additional usage examples. diff --git a/flytekit/core/task.py b/flytekit/core/task.py index 8837fdc5cb..433e330986 100644 --- a/flytekit/core/task.py +++ b/flytekit/core/task.py @@ -35,10 +35,9 @@ class TaskPlugins(object): Usage ```python - - TaskPlugins.register_pythontask_plugin(config_object_type, plugin_object_type) - # config_object_type is any class that will be passed to the plugin_object as task_config - # Plugin_object_type is a derivative of ``PythonFunctionTask`` + TaskPlugins.register_pythontask_plugin(config_object_type, plugin_object_type) + # config_object_type is any class that will be passed to the plugin_object as task_config + # Plugin_object_type is a derivative of ``PythonFunctionTask`` ``` Examples of available task plugins include different query-based plugins such as :py:class:`flytekitplugins.athena.task.AthenaTask` and :py:class:`flytekitplugins.hive.task.HiveTask`, kubeflow @@ -62,10 +61,9 @@ def register_pythontask_plugin(cls, plugin_config_type: type, plugin: Type[Pytho Use this method to register a new plugin into Flytekit. Usage :: ```python - - TaskPlugins.register_pythontask_plugin(config_object_type, plugin_object_type) - # config_object_type is any class that will be passed to the plugin_object as task_config - # Plugin_object_type is a derivative of ``PythonFunctionTask`` + TaskPlugins.register_pythontask_plugin(config_object_type, plugin_object_type) + # config_object_type is any class that will be passed to the plugin_object as task_config + # Plugin_object_type is a derivative of ``PythonFunctionTask`` ``` """ if plugin_config_type in cls._PYTHONFUNCTION_TASK_PLUGINS: @@ -233,19 +231,17 @@ def task( For a simple python task, ```python - - @task - def my_task(x: int, y: typing.Dict[str, str]) -> str: - ... + @task + def my_task(x: int, y: typing.Dict[str, str]) -> str: + ... ``` For specific task types ```python - - @task(task_config=Spark(), retries=3) - def my_task(x: int, y: typing.Dict[str, str]) -> str: - ... + @task(task_config=Spark(), retries=3) + def my_task(x: int, y: typing.Dict[str, str]) -> str: + ... ``` Please see some cookbook :std:ref:`task examples ` for additional information. @@ -284,19 +280,18 @@ def my_task(x: int, y: typing.Dict[str, str]) -> str: and they vary from the default. ```python - - # Use default image name `fqn` and alter the tag to `tag-{{default.tag}}` tag of the default image - # with a prefix. In this case, it is assumed that the image like - # flytecookbook:tag-gitsha is published alongwith the default of flytecookbook:gitsha - @task(container_image='{{.images.default.fqn}}:tag-{{images.default.tag}}') - def foo(): - ... - - # Refer to configurations to configure fqns for other images besides default. In this case it will - # lookup for an image named xyz - @task(container_image='{{.images.xyz.fqn}}:{{images.default.tag}}') - def foo2(): - ... + # Use default image name `fqn` and alter the tag to `tag-{{default.tag}}` tag of the default image + # with a prefix. In this case, it is assumed that the image like + # flytecookbook:tag-gitsha is published alongwith the default of flytecookbook:gitsha + @task(container_image='{{.images.default.fqn}}:tag-{{images.default.tag}}') + def foo(): + ... + + # Refer to configurations to configure fqns for other images besides default. In this case it will + # lookup for an image named xyz + @task(container_image='{{.images.xyz.fqn}}:{{images.default.tag}}') + def foo2(): + ... ``` :param environment: Environment variables that should be added for this tasks execution :param requests: Specify compute resource requests for your task. For Pod-plugin tasks, these values will apply only @@ -325,19 +320,18 @@ def foo2(): before they can be run. Tasks and workflows do not have this requirement. ```python + @workflow + def workflow0(): + ... - @workflow - def workflow0(): - ... - - launchplan0 = LaunchPlan.get_or_create(workflow0) + launchplan0 = LaunchPlan.get_or_create(workflow0) - # Specify node_dependency_hints so that launchplan0 will be registered on flyteadmin, despite this being a - # dynamic task. - @dynamic(node_dependency_hints=[launchplan0]) - def launch_dynamically(): - # To run a sub-launchplan it must have previously been registered on flyteadmin. - return [launchplan0]*10 + # Specify node_dependency_hints so that launchplan0 will be registered on flyteadmin, despite this being a + # dynamic task. + @dynamic(node_dependency_hints=[launchplan0]) + def launch_dynamically(): + # To run a sub-launchplan it must have previously been registered on flyteadmin. + return [launchplan0]*10 ``` :param task_resolver: Provide a custom task resolver. :param disable_deck: (deprecated) If true, this task will not output deck html file @@ -582,28 +576,27 @@ def eager( For example: ```python + from flytekit import task, eager - from flytekit import task, eager + @task + def add_one(x: int) -> int: + return x + 1 - @task - def add_one(x: int) -> int: - return x + 1 + @task + def double(x: int) -> int: + return x * 2 - @task - def double(x: int) -> int: - return x * 2 + @eager + async def eager_workflow(x: int) -> int: + out = add_one(x=x) + return double(x=out) - @eager - async def eager_workflow(x: int) -> int: - out = add_one(x=x) - return double(x=out) + # run locally with asyncio + if __name__ == "__main__": + import asyncio - # run locally with asyncio - if __name__ == "__main__": - import asyncio - - result = asyncio.run(eager_workflow(x=1)) - print(f"Result: {result}") # "Result: 4" + result = asyncio.run(eager_workflow(x=1)) + print(f"Result: {result}") # "Result: 4" ``` Unlike :py:func:`dynamic workflows `, eager workflows are not compiled into a workflow spec, but uses python's [`async`](https://docs.python.org/3/library/asyncio.html) capabilities to execute flyte entities. @@ -618,18 +611,17 @@ async def eager_workflow(x: int) -> int: configured via :py:class:`~flytekit.configuration.PlatformConfig`. ```python + from flytekit.remote import FlyteRemote + from flytekit.configuration import Config - from flytekit.remote import FlyteRemote - from flytekit.configuration import Config - - @eager( - remote=FlyteRemote(config=Config.auto(config_file="config.yaml")), - client_secret_group="my_client_secret_group", - client_secret_key="my_client_secret_key", - ) - async def eager_workflow(x: int) -> int: - out = await add_one(x) - return await double(one) + @eager( + remote=FlyteRemote(config=Config.auto(config_file="config.yaml")), + client_secret_group="my_client_secret_group", + client_secret_key="my_client_secret_key", + ) + async def eager_workflow(x: int) -> int: + out = await add_one(x) + return await double(one) ``` Where ``config.yaml`` contains is a flytectl-compatible config file. For more details, see [`here`](https://docs.flyte.org/en/latest/flytectl/overview.html#configuration). @@ -638,10 +630,9 @@ async def eager_workflow(x: int) -> int: and ``client_secret_key`` are not needed, : ```python - - @eager - async def eager_workflow(x: int) -> int: - ... + @eager + async def eager_workflow(x: int) -> int: + ... ``` """ diff --git a/flytekit/core/testing.py b/flytekit/core/testing.py index 3ac703563f..505cf6d99b 100644 --- a/flytekit/core/testing.py +++ b/flytekit/core/testing.py @@ -21,15 +21,14 @@ def task_mock(t: PythonTask) -> typing.Generator[MagicMock, None, None]: Usage: ```python - - @task - def t1(i: int) -> int: - pass - - with task_mock(t1) as m: - m.side_effect = lambda x: x - t1(10) - # The mock is valid only within this context + @task + def t1(i: int) -> int: + pass + + with task_mock(t1) as m: + m.side_effect = lambda x: x + t1(10) + # The mock is valid only within this context ``` """ diff --git a/flytekit/core/tracker.py b/flytekit/core/tracker.py index d008d85f78..19143b1dd6 100644 --- a/flytekit/core/tracker.py +++ b/flytekit/core/tracker.py @@ -179,11 +179,10 @@ def isnested(func: Callable) -> bool: This would essentially be any function with a `..` (defined within a function) e.g. ```python - - def foo(): - def foo_inner(): - pass + def foo(): + def foo_inner(): pass + pass ``` In the above example `foo_inner` is the local function or a nested function. @@ -196,32 +195,31 @@ def is_functools_wrapped_module_level(func: Callable) -> bool: """Returns true if the function is a functools.wraps-updated function that is defined in the module-level scope. ```python + import functools - import functools - - def decorator(fn): - @functools.wraps(fn) - def wrapper(*args, **kwargs): - return fn(*arks, **kwargs) + def decorator(fn): + @functools.wraps(fn) + def wrapper(*args, **kwargs): + return fn(*arks, **kwargs) - return wrapper + return wrapper - @decorator - def foo(): - ... + @decorator + def foo(): + ... - def define_inner_wrapped_fn(): + def define_inner_wrapped_fn(): - @decorator - def foo_inner(*args, **kwargs): - return fn(*arks, **kwargs) + @decorator + def foo_inner(*args, **kwargs): + return fn(*arks, **kwargs) - return foo_inner + return foo_inner - bar = define_inner_wrapped_fn() + bar = define_inner_wrapped_fn() - is_functools_wrapped_module_level(foo) # True - is_functools_wrapped_module_level(bar) # False + is_functools_wrapped_module_level(foo) # True + is_functools_wrapped_module_level(bar) # False ``` In this case, applying this function to ``foo`` returns true because ``foo`` was defined in the module-level scope. diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index a2285b8998..2071e3f413 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -476,30 +476,28 @@ class DataclassTransformer(TypeTransformer[object]): Example ```python + @dataclass + class Test(DataClassJsonMixin): + a: int + b: str - @dataclass - class Test(DataClassJsonMixin): - a: int - b: str - - from marshmallow_jsonschema import JSONSchema - t = Test(a=10,b="e") - JSONSchema().dump(t.schema()) + from marshmallow_jsonschema import JSONSchema + t = Test(a=10,b="e") + JSONSchema().dump(t.schema()) ``` Output will look like ```python - - {'$schema': 'http://json-schema.org/draft-07/schema#', - 'definitions': {'TestSchema': {'properties': {'a': {'title': 'a', - 'type': 'number', - 'format': 'integer'}, - 'b': {'title': 'b', 'type': 'string'}}, - 'type': 'object', - 'additionalProperties': False}}, - '$ref': '#/definitions/TestSchema'} - ``` + {'$schema': 'http://json-schema.org/draft-07/schema#', + 'definitions': {'TestSchema': {'properties': {'a': {'title': 'a', + 'type': 'number', + 'format': 'integer'}, + 'b': {'title': 'b', 'type': 'string'}}, + 'type': 'object', + 'additionalProperties': False}}, + '$ref': '#/definitions/TestSchema'} +``` > [!NOTE] > The schema support is experimental and is useful for auto-completing in the UI/CLI diff --git a/flytekit/deck/deck.py b/flytekit/deck/deck.py index f9a45f0472..5df4b2fdc3 100644 --- a/flytekit/deck/deck.py +++ b/flytekit/deck/deck.py @@ -42,25 +42,24 @@ class Deck: their data with custom renderers. ```python - - iris_df = px.data.iris() - - @task() - def t1() -> str: - md_text = '#Hello Flyte##Hello Flyte###Hello Flyte' - m = MarkdownRenderer() - s = BoxRenderer("sepal_length") - deck = flytekit.Deck("demo", s.to_html(iris_df)) - deck.append(m.to_html(md_text)) - default_deck = flytekit.current_context().default_deck - default_deck.append(m.to_html(md_text)) - return md_text - - - # Use Annotated to override default renderer - @task() - def t2() -> Annotated[pd.DataFrame, TopFrameRenderer(10)]: - return iris_df + iris_df = px.data.iris() + + @task() + def t1() -> str: + md_text = '#Hello Flyte##Hello Flyte###Hello Flyte' + m = MarkdownRenderer() + s = BoxRenderer("sepal_length") + deck = flytekit.Deck("demo", s.to_html(iris_df)) + deck.append(m.to_html(md_text)) + default_deck = flytekit.current_context().default_deck + default_deck.append(m.to_html(md_text)) + return md_text + + + # Use Annotated to override default renderer + @task() + def t2() -> Annotated[pd.DataFrame, TopFrameRenderer(10)]: + return iris_df ``` """ diff --git a/flytekit/exceptions/eager.py b/flytekit/exceptions/eager.py index 5b599aee7a..7f96849cec 100644 --- a/flytekit/exceptions/eager.py +++ b/flytekit/exceptions/eager.py @@ -5,28 +5,27 @@ class EagerException(Exception): catch exceptions that are raised by tasks or subworkflows. ```python + from flytekit import task + from flytekit.exceptions.eager import EagerException - from flytekit import task - from flytekit.exceptions.eager import EagerException + @task + def add_one(x: int) -> int: + if x < 0: + raise ValueError("x must be positive") + return x + 1 - @task - def add_one(x: int) -> int: - if x < 0: - raise ValueError("x must be positive") - return x + 1 + @task + def double(x: int) -> int: + return x * 2 - @task - def double(x: int) -> int: - return x * 2 - - @eager - async def eager_workflow(x: int) -> int: - try: - out = await add_one(x=x) - except EagerException: - # The ValueError error is caught - # and raised as an EagerException - raise - return await double(x=out) + @eager + async def eager_workflow(x: int) -> int: + try: + out = await add_one(x=x) + except EagerException: + # The ValueError error is caught + # and raised as an EagerException + raise + return await double(x=out) ``` """ diff --git a/flytekit/extras/accelerators.py b/flytekit/extras/accelerators.py index d6eb1ead7c..62b638e0ff 100644 --- a/flytekit/extras/accelerators.py +++ b/flytekit/extras/accelerators.py @@ -12,13 +12,12 @@ If you want to use a specific GPU device, you can pass the device name directly to the task decorator, e.g.: ```python - - @task( - limits=Resources(gpu="1"), - accelerator=GPUAccelerator("nvidia-tesla-v100"), - ) - def my_task() -> None: - ... +@task( + limits=Resources(gpu="1"), + accelerator=GPUAccelerator("nvidia-tesla-v100"), +) +def my_task() -> None: + ... ``` ### Base Classes @@ -38,28 +37,26 @@ def my_task() -> None: If using the constants, you can import them directly from the module, e.g.: ```python - - from flytekit.extras.accelerators import T4 - - @task( - limits=Resources(gpu="1"), - accelerator=T4, - ) - def my_task() -> None: - ... +from flytekit.extras.accelerators import T4 + +@task( + limits=Resources(gpu="1"), + accelerator=T4, +) +def my_task() -> None: + ... ``` if you want to use a fractional GPU, you can use the ``partitioned`` method on the accelerator constant, e.g.: ```python - - from flytekit.extras.accelerators import A100 - - @task( - limits=Resources(gpu="1"), - accelerator=A100.partition_2g_10gb, - ) - def my_task() -> None: - ... +from flytekit.extras.accelerators import A100 + +@task( + limits=Resources(gpu="1"), + accelerator=A100.partition_2g_10gb, +) +def my_task() -> None: + ... ``` """ diff --git a/flytekit/extras/webhook/task.py b/flytekit/extras/webhook/task.py index ff11534256..cbf8cce151 100644 --- a/flytekit/extras/webhook/task.py +++ b/flytekit/extras/webhook/task.py @@ -23,35 +23,35 @@ class WebhookTask(SyncConnectorExecutorMixin, PythonTask): url="http://localhost:8000/", method=http.HTTPMethod.GET, headers={"Content-Type": "application/json"}, - ) - - get_with_params = WebhookTask( - name="get-with-params", - url="http://localhost:8000/items/{inputs.item_id}", - method=http.HTTPMethod.GET, - headers={"Content-Type": "application/json"}, - dynamic_inputs={"s": str, "item_id": int}, - show_data=True, - show_url=True, - description="Test Webhook Task", - data={"q": "{inputs.s}"}, - ) - + ) - @fk.workflow - def wf(s: str) -> (dict, dict, dict): - v = hello(s=s) - w = WebhookTask( - name="invoke-slack", - url="https://hooks.slack.com/services/xyz/zaa/aaa", + get_with_params = WebhookTask( + name="get-with-params", + url="http://localhost:8000/items/{inputs.item_id}", + method=http.HTTPMethod.GET, headers={"Content-Type": "application/json"}, - data={"text": "{inputs.s}"}, + dynamic_inputs={"s": str, "item_id": int}, show_data=True, show_url=True, description="Test Webhook Task", - dynamic_inputs={"s": str}, + data={"q": "{inputs.s}"}, ) - return simple_get(), get_with_params(s=v, item_id=10), w(s=v) + + + @fk.workflow + def wf(s: str) -> (dict, dict, dict): + v = hello(s=s) + w = WebhookTask( + name="invoke-slack", + url="https://hooks.slack.com/services/xyz/zaa/aaa", + headers={"Content-Type": "application/json"}, + data={"text": "{inputs.s}"}, + show_data=True, + show_url=True, + description="Test Webhook Task", + dynamic_inputs={"s": str}, + ) + return simple_get(), get_with_params(s=v, item_id=10), w(s=v) ``` All the parameters can be formatted using python format strings. The following parameters are available for diff --git a/flytekit/lazy_import/lazy_module.py b/flytekit/lazy_import/lazy_module.py index 9bd23da8cf..f92bf11d9b 100644 --- a/flytekit/lazy_import/lazy_module.py +++ b/flytekit/lazy_import/lazy_module.py @@ -31,9 +31,9 @@ def lazy_module(fullname): """ This function is used to lazily import modules. It is used in the following way: ```python - from flytekit.lazy_import import lazy_module - sklearn = lazy_module("sklearn") - sklearn.svm.SVC() + from flytekit.lazy_import import lazy_module + sklearn = lazy_module("sklearn") + sklearn.svm.SVC() ``` :param Text fullname: The full name of the module to import """ diff --git a/flytekit/remote/backfill.py b/flytekit/remote/backfill.py index 639818e9e0..73052ccbd6 100644 --- a/flytekit/remote/backfill.py +++ b/flytekit/remote/backfill.py @@ -24,21 +24,22 @@ def create_backfill_workflow( the Backfill plan is generated as (start_date - exclusive, end_date inclusive) - ```python - :caption: Correct usage for dates example + > [!NOTE] + > Correct usage for dates example - lp = Launchplan.get_or_create(...) - start_date = datetime.datetime(2023, 1, 1) - end_date = start_date + datetime.timedelta(days=10) - wf = create_backfill_workflow(start_date, end_date, for_lp=lp) + ```python + lp = Launchplan.get_or_create(...) + start_date = datetime.datetime(2023, 1, 1) + end_date = start_date + datetime.timedelta(days=10) + wf = create_backfill_workflow(start_date, end_date, for_lp=lp) ``` + > [!WARNING] + > Incorrect date example ```python - :caption: Incorrect date example - - wf = create_backfill_workflow(end_date, start_date, for_lp=lp) # end_date is before start_date - # OR - wf = create_backfill_workflow(start_date, start_date, for_lp=lp) # start and end date are same + wf = create_backfill_workflow(end_date, start_date, for_lp=lp) # end_date is before start_date + # OR + wf = create_backfill_workflow(start_date, start_date, for_lp=lp) # start and end date are same ``` :param start_date: datetime generate a backfill starting at this datetime (exclusive) diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index 79406b0870..df30f10f3c 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -345,16 +345,15 @@ def listdir(cls, directory: FlyteDirectory) -> typing.List[typing.Union[FlyteDir contents of the file/folder. For example: ```python - - entity = FlyteDirectory.listdir(directory) - for e in entity: - print("s3 object:", e.remote_source) - # s3 object: s3://test-flytedir/file1.txt - # s3 object: s3://test-flytedir/file2.txt - # s3 object: s3://test-flytedir/sub_dir - - open(entity[0], "r") # This will download the file to the local disk. - open(entity[0], "r") # flytekit will read data from the local disk if you open it again. + entity = FlyteDirectory.listdir(directory) + for e in entity: + print("s3 object:", e.remote_source) + # s3 object: s3://test-flytedir/file1.txt + # s3 object: s3://test-flytedir/file2.txt + # s3 object: s3://test-flytedir/sub_dir + + open(entity[0], "r") # This will download the file to the local disk. + open(entity[0], "r") # flytekit will read data from the local disk if you open it again. ``` """ diff --git a/flytekit/types/file/file.py b/flytekit/types/file/file.py index 07623d2404..67ceb24dfe 100644 --- a/flytekit/types/file/file.py +++ b/flytekit/types/file/file.py @@ -400,14 +400,13 @@ def open( """Returns a streaming File handle ```python - - @task - def copy_file(ff: FlyteFile) -> FlyteFile: - new_file = FlyteFile.new_remote_file() - with ff.open("rb", cache_type="readahead") as r: - with new_file.open("wb") as w: - w.write(r.read()) - return new_file + @task + def copy_file(ff: FlyteFile) -> FlyteFile: + new_file = FlyteFile.new_remote_file() + with ff.open("rb", cache_type="readahead") as r: + with new_file.open("wb") as w: + w.write(r.read()) + return new_file ``` :param mode: Open mode. For example: 'r', 'w', 'rb', 'rt', 'wb', etc. From 7bd911ca08bffed67814cee2bd54ffe7ae6d0031 Mon Sep 17 00:00:00 2001 From: chmod77 Date: Sun, 6 Apr 2025 02:01:21 +0300 Subject: [PATCH 10/20] chore: further docstring cleanup Signed-off-by: chmod77 Signed-off-by: Peeter Piegaze <1153481+ppiegaze@users.noreply.github.com> --- flytekit/clients/friendly.py | 2 +- flytekit/clis/sdk_in_container/serialize.py | 22 +- flytekit/core/legacy_map_task.py | 22 +- flytekit/core/reference.py | 13 + flytekit/core/task.py | 19 +- flytekit/core/workflow.py | 566 +++++++++++++++++++- flytekit/extras/sqlite3/task.py | 15 + 7 files changed, 643 insertions(+), 16 deletions(-) diff --git a/flytekit/clients/friendly.py b/flytekit/clients/friendly.py index 68c9d703cc..062449b490 100644 --- a/flytekit/clients/friendly.py +++ b/flytekit/clients/friendly.py @@ -38,7 +38,7 @@ class SynchronousFlyteClient(_RawSynchronousFlyteClient): """ This is a low-level client that users can use to make direct gRPC service calls to the control plane. See the :std:doc:`service spec `. This is more user-friendly interface than the - :py:class:`raw client ` so users should try to use this class + {{< py_class_ref flytekit.clients.raw.RawSynchronousFlyteClient >}} so users should try to use this class first. Create a client by ```python diff --git a/flytekit/clis/sdk_in_container/serialize.py b/flytekit/clis/sdk_in_container/serialize.py index 85a089e7e1..7329a99ad7 100644 --- a/flytekit/clis/sdk_in_container/serialize.py +++ b/flytekit/clis/sdk_in_container/serialize.py @@ -37,15 +37,19 @@ def serialize_all( env: typing.Optional[typing.Dict[str, str]] = None, ): """ - This function will write to the folder specified the following protobuf types :: - flyteidl.admin.launch_plan_pb2.LaunchPlan - flyteidl.admin.workflow_pb2.WorkflowSpec - flyteidl.admin.task_pb2.TaskSpec - - These can be inspected by calling (in the launch plan case) :: - flyte-cli parse-proto -f filename.pb -p flyteidl.admin.launch_plan_pb2.LaunchPlan - - See :py:class:`flytekit.models.core.identifier.ResourceType` to match the trailing index in the file name with the + This function will write to the folder specified the following protobuf types + ``` + flyteidl.admin.launch_plan_pb2.LaunchPlan + flyteidl.admin.workflow_pb2.WorkflowSpec + flyteidl.admin.task_pb2.TaskSpec + ``` + + These can be inspected by calling (in the launch plan case) + ```bash + flyte-cli parse-proto -f filename.pb -p flyteidl.admin.launch_plan_pb2.LaunchPlan + ``` + + See {{< py_class_ref flytekit.models.core.identifier.ResourceType >}} to match the trailing index in the file name with the entity type. :param pkgs: Dot-delimited Python packages/subpackages to look into for serialization. :param local_source_root: Where to start looking for the code. diff --git a/flytekit/core/legacy_map_task.py b/flytekit/core/legacy_map_task.py index 95b4635ea2..1e12ba9049 100644 --- a/flytekit/core/legacy_map_task.py +++ b/flytekit/core/legacy_map_task.py @@ -308,20 +308,36 @@ def map_task( ): """ Use a map task for parallelizable tasks that run across a list of an input type. A map task can be composed of - any individual :py:class:`flytekit.PythonFunctionTask`. + any individual {{< py_class_ref flytekit.PythonFunctionTask >}}. - Invoke a map task with arguments using the :py:class:`list` version of the expected input. + Invoke a map task with arguments using {{}} version of the expected input. Usage: + + ```python + @task + def my_mappable_task(a: int) -> typing.Optional[str]: + return str(a) + + @workflow + def my_wf(x: typing.List[int]) -> typing.List[typing.Optional[str]]: + return map_task( + my_mappable_task, + metadata=TaskMetadata(retries=1), + concurrency=10, + min_success_ratio=0.75, + )(a=x).with_overrides(requests=Resources(cpu="10M")) + ``` At run time, the underlying map task will be run for every value in the input collection. Attributes - such as :py:class:`flytekit.TaskMetadata` and ``with_overrides`` are applied to individual instances + such as {{< py_class_ref flytekit.TaskMetadata >}} and ``with_overrides`` are applied to individual instances of the mapped task. **Map Task Plugins** diff --git a/flytekit/core/reference.py b/flytekit/core/reference.py index 6a88549c43..a30d53e01a 100644 --- a/flytekit/core/reference.py +++ b/flytekit/core/reference.py @@ -24,11 +24,24 @@ def get_reference_entity( This function is the general form of the two aforementioned functions. It's better for programmatic usage, as the interface is passed in as arguments instead of analyzed from type annotations. + + ```python + ref_entity = get_reference_entity( + _identifier_model.ResourceType.WORKFLOW, + "project", + "dev", + "my.other.workflow", + "abc123", + inputs=kwtypes(a=str, b=int), + outputs={}, + ) + ``` :param resource_type: This is the type of entity it is. Must be one of :py:class:`flytekit.models.core.identifier.ResourceType` diff --git a/flytekit/core/task.py b/flytekit/core/task.py index 433e330986..a3d2d2c235 100644 --- a/flytekit/core/task.py +++ b/flytekit/core/task.py @@ -489,10 +489,25 @@ def reference_task( If at registration time the interface provided causes an issue with compilation, an error will be returned. Example: - + + ```python + @reference_task( + project="flytesnacks", + domain="development", + name="recipes.aaa.simple.join_strings", + version="553018f39e519bdb2597b652639c30ce16b99c79", + ) + def ref_t1(a: typing.List[str]) -> str: + ''' + The empty function acts as a convenient skeleton to make it intuitive to call/reference this task from workflows. + The interface of the task must match that of the remote task. Otherwise, remote compilation of the workflow will + fail. + ''' + return "hello" + ``` """ def wrapper(fn) -> ReferenceTask: diff --git a/flytekit/core/workflow.py b/flytekit/core/workflow.py index 3422a5bade..3616ec1a58 100644 --- a/flytekit/core/workflow.py +++ b/flytekit/core/workflow.py @@ -403,27 +403,63 @@ class ImperativeWorkflow(WorkflowBase): Assuming you have some tasks like so + + ```python + @task + def t1(a: str) -> str: + return a + " world" + + @task + def t2(): + print("side effect") + ``` You could create a workflow imperatively like so + + ```python + # Create the workflow with a name. This needs to be unique within the project and takes the place of the function + # name that's used for regular decorated function-based workflows. + wb = Workflow(name="my_workflow") + # Adds a top level input to the workflow. This is like an input to a workflow function. + wb.add_workflow_input("in1", str) + # Call your tasks. + node = wb.add_entity(t1, a=wb.inputs["in1"]) + wb.add_entity(t2) + # This is analogous to a return statement + wb.add_workflow_output("from_n0t1", node.outputs["o0"]) + ``` This workflow would be identical on the back-end to + + ```python + nt = typing.NamedTuple("wf_output", [("from_n0t1", str)]) + + @workflow + def my_workflow(in1: str) -> nt: + x = t1(a=in1) + t2() + return nt(x) + ``` Note that the only reason we need the ``NamedTuple`` is so we can name the output the same thing as in the imperative example. The imperative paradigm makes the naming of workflow outputs easier, but this isn't a big @@ -940,8 +976,528 @@ def workflow( Example: + + + ```python + import os + import sys + import typing + from collections import OrderedDict + from unittest.mock import patch + + import pytest + from typing_extensions import Annotated # type: ignore + + import flytekit.configuration + from flytekit import FlyteContextManager, StructuredDataset, kwtypes + from flytekit.configuration import Image, ImageConfig + from flytekit.core import context_manager + from flytekit.core.condition import conditional + from flytekit.core.task import task + from flytekit.core.workflow import WorkflowFailurePolicy, WorkflowMetadata, WorkflowMetadataDefaults, workflow + from flytekit.exceptions.user import FlyteValidationException, FlyteValueException, FlyteMissingReturnValueException + from flytekit.tools.translator import get_serializable + from flytekit.types.error.error import FlyteError + + default_img = Image(name="default", fqn="test", tag="tag") + serialization_settings = flytekit.configuration.SerializationSettings( + project="project", + domain="domain", + version="version", + env=None, + image_config=ImageConfig(default_image=default_img, images=[default_img]), + ) + + def test_metadata_values(): + with pytest.raises(FlyteValidationException): + WorkflowMetadata(on_failure=0) + + wm = WorkflowMetadata(on_failure=WorkflowFailurePolicy.FAIL_IMMEDIATELY) + assert wm.on_failure == WorkflowFailurePolicy.FAIL_IMMEDIATELY + + + def test_default_metadata_values(): + with pytest.raises(FlyteValidationException): + WorkflowMetadataDefaults(3) + + wm = WorkflowMetadataDefaults(interruptible=False) + assert wm.interruptible is False + + + def test_workflow_values(): + @task + def t1(a: int) -> typing.NamedTuple("OutputsBC", [("t1_int_output", int), ("c", str)]): + a = a + 2 + return a, "world-" + str(a) + + @workflow(interruptible=True, failure_policy=WorkflowFailurePolicy.FAIL_AFTER_EXECUTABLE_NODES_COMPLETE) + def wf(a: int) -> typing.Tuple[str, str]: + x, y = t1(a=a) + _, v = t1(a=x) + return y, v + + wf_spec = get_serializable(OrderedDict(), serialization_settings, wf) + assert wf_spec.template.metadata_defaults.interruptible + assert wf_spec.template.metadata.on_failure == 1 + + def test_default_values(): + @task + def t() -> bool: + return True + + @task + def f() -> bool: + return False + + @workflow + def wf(a: bool = True) -> bool: + return conditional("bool").if_(a.is_true()).then(t()).else_().then(f()) # type: ignore + + assert wf() is True + assert wf(a=False) is False + + + def test_list_output_wf(): + @task + def t1(a: int) -> int: + a = a + 5 + return a + + @workflow + def list_output_wf() -> typing.List[int]: + v = [] + for i in range(2): + v.append(t1(a=i)) + return v + + x = list_output_wf() + assert x == [5, 6] + + + def test_sub_wf_single_named_tuple(): + nt = typing.NamedTuple("SingleNamedOutput", [("named1", int)]) + + @task + def t1(a: int) -> nt: + a = a + 2 + return nt(a) + + @workflow + def subwf(a: int) -> nt: + return t1(a=a) + + @workflow + def wf(b: int) -> nt: + out = subwf(a=b) + return t1(a=out.named1) + + x = wf(b=3) + assert x == (7,) + + + def test_sub_wf_multi_named_tuple(): + nt = typing.NamedTuple("Multi", [("named1", int), ("named2", int)]) + + @task + def t1(a: int) -> nt: + a = a + 2 + return nt(a, a) + + @workflow + def subwf(a: int) -> nt: + return t1(a=a) + + @workflow + def wf(b: int) -> nt: + out = subwf(a=b) + return t1(a=out.named1) + + x = wf(b=3) + assert x == (7, 7) + + + def test_sub_wf_varying_types(): + @task + def t1l( + a: typing.List[typing.Dict[str, typing.List[int]]], + b: typing.Dict[str, typing.List[int]], + c: typing.Union[typing.List[typing.Dict[str, typing.List[int]]], typing.Dict[str, typing.List[int]], int], + d: int, + ) -> str: + xx = ",".join([f"{k}:{v}" for d in a for k, v in d.items()]) + yy = ",".join([f"{k}: {i}" for k, v in b.items() for i in v]) + if isinstance(c, list): + zz = ",".join([f"{k}:{v}" for d in c for k, v in d.items()]) + elif isinstance(c, dict): + zz = ",".join([f"{k}: {i}" for k, v in c.items() for i in v]) + else: + zz = str(c) + return f"First: {xx} Second: {yy} Third: {zz} Int: {d}" + + @task + def get_int() -> int: + return 1 + + @workflow + def subwf( + a: typing.List[typing.Dict[str, typing.List[int]]], + b: typing.Dict[str, typing.List[int]], + c: typing.Union[typing.List[typing.Dict[str, typing.List[int]]], typing.Dict[str, typing.List[int]]], + d: int, + ) -> str: + return t1l(a=a, b=b, c=c, d=d) + + @workflow + def wf() -> str: + ds = [ + {"first_map_a": [42], "first_map_b": [get_int(), 2]}, + { + "second_map_c": [33], + "second_map_d": [9, 99], + }, + ] + ll = { + "ll_1": [get_int(), get_int(), get_int()], + "ll_2": [4, 5, 6], + } + out = subwf(a=ds, b=ll, c=ds, d=get_int()) + return out + + wf.compile() + x = wf() + expected = ( + "First: first_map_a:[42],first_map_b:[1, 2],second_map_c:[33],second_map_d:[9, 99] " + "Second: ll_1: 1,ll_1: 1,ll_1: 1,ll_2: 4,ll_2: 5,ll_2: 6 " + "Third: first_map_a:[42],first_map_b:[1, 2],second_map_c:[33],second_map_d:[9, 99] " + "Int: 1" + ) + assert x == expected + wf_spec = get_serializable(OrderedDict(), serialization_settings, wf) + assert set(wf_spec.template.nodes[5].upstream_node_ids) == {"n2", "n1", "n0", "n4", "n3"} + + @workflow + def wf() -> str: + ds = [ + {"first_map_a": [42], "first_map_b": [get_int(), 2]}, + { + "second_map_c": [33], + "second_map_d": [9, 99], + }, + ] + ll = { + "ll_1": [get_int(), get_int(), get_int()], + "ll_2": [4, 5, 6], + } + out = subwf(a=ds, b=ll, c=ll, d=get_int()) + return out + + x = wf() + expected = ( + "First: first_map_a:[42],first_map_b:[1, 2],second_map_c:[33],second_map_d:[9, 99] " + "Second: ll_1: 1,ll_1: 1,ll_1: 1,ll_2: 4,ll_2: 5,ll_2: 6 " + "Third: ll_1: 1,ll_1: 1,ll_1: 1,ll_2: 4,ll_2: 5,ll_2: 6 " + "Int: 1" + ) + assert x == expected + + + def test_unexpected_outputs(): + @task + def t1(a: int) -> int: + a = a + 5 + return a + + @workflow + def no_outputs_wf(): + return t1(a=3) + + # Should raise an exception because the workflow returns something when it shouldn't + with pytest.raises(FlyteValueException): + no_outputs_wf() + + @pytest.mark.skipif(sys.version_info < (3, 10, 10), reason="inspect module does not work correctly with Python <3.10.10. https://github.com/python/cpython/issues/102647#issuecomment-1466868212") + def test_missing_return_value(): + @task + def t1(a: int) -> int: + a = a + 5 + return a + + # Should raise an exception because it doesn't return something when it should + with pytest.raises(FlyteMissingReturnValueException): + + @workflow + def one_output_wf() -> int: # type: ignore + t1(a=3) + + one_output_wf() + + + def test_custom_wrapper(): + def our_task( + _task_function: typing.Optional[typing.Callable] = None, + **kwargs, + ): + def wrapped(_func: typing.Callable): + return task(_task_function=_func) + + if _task_function: + return wrapped(_task_function) + else: + return wrapped + + @our_task( + foo={ + "bar1": lambda x: print(x), + "bar2": lambda x: print(x), + }, + ) + def missing_func_body() -> str: + return "foo" + + + def test_wf_no_output(): + @task + def t1(a: int) -> int: + a = a + 5 + return a + + @workflow + def no_outputs_wf(): + t1(a=3) + + assert no_outputs_wf() is None + + + def test_wf_nested_comp(exec_prefix): + @task + def t1(a: int) -> int: + a = a + 5 + return a + + @workflow + def outer() -> typing.Tuple[int, int]: + # You should not do this. This is just here for testing. + @workflow + def wf2() -> int: + return t1(a=5) + + return t1(a=3), wf2() + + assert (8, 10) == outer() + entity_mapping = OrderedDict() + + model_wf = get_serializable(entity_mapping, serialization_settings, outer) + + assert len(model_wf.template.interface.outputs) == 2 + assert len(model_wf.template.nodes) == 2 + assert model_wf.template.nodes[1].workflow_node is not None + + sub_wf = model_wf.sub_workflows[0] + assert len(sub_wf.nodes) == 1 + assert sub_wf.nodes[0].id == "n0" + assert sub_wf.nodes[0].task_node.reference_id.name == f"{exec_prefix}tests.flytekit.unit.core.test_workflows.t1" + + + @task + def add_5(a: int) -> int: + a = a + 5 + return a + + + @workflow + def simple_wf() -> int: + return add_5(a=1) + + @workflow + def my_wf_example(a: int) -> typing.Tuple[int, int]: + '''example + + Workflows can have inputs and return outputs of simple or complex types. + + ''' + + x = add_5(a=a) + + # You can use outputs of a previous task as inputs to other nodes. + z = add_5(a=x) + + # You can call other workflows from within this workflow + d = simple_wf() + + # You can add conditions that can run on primitive types and execute different branches + e = conditional("bool").if_(a == 5).then(add_5(a=d)).else_().then(add_5(a=z)) + + # Outputs of the workflow have to be outputs returned by prior nodes. + # No outputs and single or multiple outputs are supported + return x, e + + def test_workflow_lhs(): + assert my_wf_example._lhs == "my_wf_example" + + + def test_all_node_types(): + assert my_wf_example(a=1) == (6, 16) + entity_mapping = OrderedDict() + + model_wf = get_serializable(entity_mapping, serialization_settings, my_wf_example) + + assert len(model_wf.template.interface.outputs) == 2 + assert len(model_wf.template.nodes) == 4 + assert model_wf.template.nodes[2].workflow_node is not None + + sub_wf = model_wf.sub_workflows[0] + assert len(sub_wf.nodes) == 1 + assert sub_wf.nodes[0].id == "n0" + assert sub_wf.nodes[0].task_node.reference_id.name == "tests.flytekit.unit.core.test_workflows.add_5" + + + def test_wf_docstring(): + model_wf = get_serializable(OrderedDict(), serialization_settings, my_wf_example) + + assert len(model_wf.template.interface.outputs) == 2 + assert model_wf.template.interface.outputs["o0"].description == "outputs" + assert model_wf.template.interface.outputs["o1"].description == "outputs" + assert len(model_wf.template.interface.inputs) == 1 + assert model_wf.template.interface.inputs["a"].description == "input a" + + + @pytest.mark.skipif("pandas" not in sys.modules, reason="Pandas is not installed.") + def test_structured_dataset_wf(): + import pandas as pd + from pandas.testing import assert_frame_equal + + from flytekit.types.schema import FlyteSchema + + superset_cols = kwtypes(Name=str, Age=int, Height=int) + subset_cols = kwtypes(Name=str) + superset_df = pd.DataFrame({"Name": ["Tom", "Joseph"], "Age": [20, 22], "Height": [160, 178]}) + subset_df = pd.DataFrame({"Name": ["Tom", "Joseph"]}) + + @task + def t1() -> Annotated[pd.DataFrame, superset_cols]: + return superset_df + + @task + def t2(df: Annotated[pd.DataFrame, subset_cols]) -> Annotated[pd.DataFrame, subset_cols]: + return df + + @task + def t3(df: FlyteSchema[superset_cols]) -> FlyteSchema[superset_cols]: + return df + + @task + def t4() -> FlyteSchema[superset_cols]: + return superset_df + + @task + def t5(sd: Annotated[StructuredDataset, subset_cols]) -> Annotated[pd.DataFrame, subset_cols]: + return sd.open(pd.DataFrame).all() + + @workflow + def sd_wf() -> Annotated[pd.DataFrame, subset_cols]: + # StructuredDataset -> StructuredDataset + df = t1() + return t2(df=df) + + @workflow + def sd_to_schema_wf() -> pd.DataFrame: + # StructuredDataset -> schema + df = t1() + return t3(df=df) + + @workflow + def schema_to_sd_wf() -> typing.Tuple[pd.DataFrame, pd.DataFrame]: + # schema -> StructuredDataset + df = t4() + return t2(df=df), t5(sd=df) # type: ignore + + assert_frame_equal(sd_wf(), subset_df) + assert_frame_equal(sd_to_schema_wf(), superset_df) + assert_frame_equal(schema_to_sd_wf()[0], subset_df) + assert_frame_equal(schema_to_sd_wf()[1], subset_df) + + + @pytest.mark.skipif("pandas" not in sys.modules, reason="Pandas is not installed.") + def test_compile_wf_at_compile_time(): + import pandas as pd + + from flytekit.types.schema import FlyteSchema + + superset_cols = kwtypes(Name=str, Age=int, Height=int) + superset_df = pd.DataFrame({"Name": ["Tom", "Joseph"], "Age": [20, 22], "Height": [160, 178]}) + + ctx = FlyteContextManager.current_context() + with FlyteContextManager.with_context( + ctx.with_execution_state( + ctx.new_execution_state().with_params(mode=context_manager.ExecutionState.Mode.TASK_EXECUTION) + ) + ): + + @task + def t4() -> FlyteSchema[superset_cols]: + return superset_df + + @workflow + def wf(): + t4() + + assert ctx.compilation_state is None + + + @pytest.mark.parametrize( + "error_message", [ + "Fail!", + None, + "", + ("big", "boom!") + ] + ) + @patch("builtins.print") + def test_failure_node_local_execution(mock_print, error_message, exec_prefix): + @task + def clean_up(name: str, err: typing.Optional[FlyteError] = None): + print(f"Deleting cluster {name} due to {err}") + print("This is err:", str(err)) + + @task + def create_cluster(name: str): + print(f"Creating cluster: {name}") + + @task + def delete_cluster(name: str, err: typing.Optional[FlyteError] = None): + print(f"Deleting cluster {name}") + print(err) + + @task + def t1(a: int, b: str): + print(f"{a} {b}") + raise ValueError(error_message) + + @workflow(on_failure=clean_up) + def wf(name: str = "flyteorg"): + c = create_cluster(name=name) + t = t1(a=1, b="2") + d = delete_cluster(name=name) + c >> t >> d + + with pytest.raises(ValueError): + wf() + + # Adjusted the error message to match the one in the failure + expected_error_message = str( + FlyteError(message=f"Error encountered while executing '{exec_prefix}tests.flytekit.unit.core.test_workflows.t1':\n {error_message}", failed_node_id="fn0") + ) + + assert mock_print.call_count > 0 + + mock_print.assert_any_call("Creating cluster: flyteorg") + mock_print.assert_any_call("1 2") + mock_print.assert_any_call(f"Deleting cluster flyteorg due to {expected_error_message}") + mock_print.assert_any_call("This is err:", expected_error_message) + ``` + Again, users should keep in mind that even though the body of the function looks like regular Python, it is actually not. When flytekit scans the workflow function, the objects being passed around between the tasks are not @@ -1012,8 +1568,16 @@ def reference_workflow( Example: + + ```python + @reference_workflow(project="proj", domain="development", name="wf_name", version="abc") + def ref_wf1(a: int) -> typing.Tuple[str, str]: + ... + return "hello", "world" + ``` """ def wrapper(fn) -> ReferenceWorkflow: diff --git a/flytekit/extras/sqlite3/task.py b/flytekit/extras/sqlite3/task.py index 866898fe8e..bb563b3629 100644 --- a/flytekit/extras/sqlite3/task.py +++ b/flytekit/extras/sqlite3/task.py @@ -61,11 +61,26 @@ class SQLite3Task(PythonCustomizedContainerTask[SQLite3Config], SQLTask[SQLite3C > This is a pre-built container task. That is, your user container will not be used at task execution time. Instead the image defined in this task definition will be used instead. + + ```python + sql_task = SQLite3Task( + "test", + query_template="select TrackId, Name from tracks limit {{.inputs.limit}}", + inputs=kwtypes(limit=int), + output_schema_type=FlyteSchema[kwtypes(TrackId=int, Name=str)], + task_config=SQLite3Config( + uri=EXAMPLE_DB, + compressed=True, + ), + ) + ``` + See the :ref:`integrations guide ` for additional usage examples and the base class :py:class:`flytekit.extend.PythonCustomizedContainerTask` as well. From 8f9450d9dc90e9671592c62a5be7be602f5e50f2 Mon Sep 17 00:00:00 2001 From: chmod77 Date: Sun, 6 Apr 2025 02:30:34 +0300 Subject: [PATCH 11/20] chore: further docstring cleanup Signed-off-by: chmod77 Signed-off-by: Peeter Piegaze <1153481+ppiegaze@users.noreply.github.com> --- flytekit/core/legacy_map_task.py | 2 +- flytekit/core/reference.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flytekit/core/legacy_map_task.py b/flytekit/core/legacy_map_task.py index 1e12ba9049..72a7c1decd 100644 --- a/flytekit/core/legacy_map_task.py +++ b/flytekit/core/legacy_map_task.py @@ -314,7 +314,7 @@ def map_task( Usage: -