diff --git a/flytekit/image_spec/image_spec.py b/flytekit/image_spec/image_spec.py index 41383c7e5f..f6fcfcf96b 100644 --- a/flytekit/image_spec/image_spec.py +++ b/flytekit/image_spec/image_spec.py @@ -11,7 +11,7 @@ from dataclasses import asdict, dataclass from functools import cached_property, lru_cache from importlib import metadata -from typing import Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union import click import requests @@ -30,50 +30,57 @@ class ImageSpec: """ This class is used to specify the docker image that will be used to run the task. - Args: - name: name of the image. - python_version: python version of the image. Use default python in the base image if None. - builder: Type of plugin to build the image. Use envd by default. - source_root: source root of the image. - env: environment variables of the image. - registry: registry of the image. - packages: list of python packages to install. - conda_packages: list of conda packages to install. - conda_channels: list of conda channels. - requirements: path to the requirements.txt file. - apt_packages: list of apt packages to install. - cuda: version of cuda to install. - cudnn: version of cudnn to install. - base_image: base image of the image. - platform: Specify the target platforms for the build output (for example, windows/amd64 or linux/amd64,darwin/arm64 - pip_index: Specify the custom pip index url - pip_extra_index_url: Specify one or more pip index urls as a list - pip_secret_mounts: Specify a list of tuples to mount secret for pip install. Each tuple should contain the path to + Attributes: + name (str): Name of the image. + python_version (str): Python version of the image. Use default python in the base image if None. + builder (Optional[str]): Type of plugin to build the image. Use envd by default. + source_root (Optional[str]): Source root of the image. + env (Optional[Dict[str, str]]): Environment variables of the image. + registry (Optional[str]): Registry of the image. + packages (Optional[List[str]]): List of python packages to install. + conda_packages (Optional[List[str]]): List of conda packages to install. + conda_channels (Optional[List[str]]): List of conda channels. + requirements (Optional[str]): Path to the requirements.txt file. + apt_packages (Optional[List[str]]): List of apt packages to install. + cuda (Optional[str]): Version of cuda to install. + cudnn (Optional[str]): Version of cudnn to install. + base_image (Optional[Union[str, 'ImageSpec']]): Base image of the image. + platform (str): Specify the target platforms for the build output (for example, windows/amd64 or linux/amd64,darwin/arm64). + pip_index (Optional[str]): Specify the custom pip index url. + pip_extra_index_url (Optional[List[str]]): Specify one or more pip index urls as a list. + pip_secret_mounts (Optional[List[Tuple[str, str]]]): Specify a list of tuples to mount secret for pip install. Each tuple should contain the path to the secret file and the mount path. For example, [(".gitconfig", "/etc/gitconfig")]. This is experimental and the interface may change in the future. Configuring this should not change the built image. - pip_extra_args: Specify one or more extra pip install arguments as a space-delimited string - registry_config: Specify the path to a JSON registry config file - entrypoint: List of strings to overwrite the entrypoint of the base image with, set to [] to remove the entrypoint. - commands: Command to run during the building process - tag_format: Custom string format for image tag. The ImageSpec hash passed in as `spec_hash`. For example, - to add a "dev" suffix to the image tag, set `tag_format="{spec_hash}-dev"` - source_copy_mode: This option allows the user to specify which source files to copy from the local host, into the image. + pip_extra_args (Optional[str]): Specify one or more extra pip install arguments as a space-delimited string. + registry_config (Optional[str]): Specify the path to a JSON registry config file. + entrypoint (Optional[List[str]]): List of strings to overwrite the entrypoint of the base image with, set to [] to remove the entrypoint. + commands (Optional[List[str]]): Command to run during the building process. + tag_format (Optional[str]): Custom string format for image tag. The ImageSpec hash passed in as `spec_hash`. For example, + to add a "dev" suffix to the image tag, set `tag_format="{spec_hash}-dev"`. + source_copy_mode (Optional[CopyFileDetection]): This option allows the user to specify which source files to copy from the local host, into the image. Not setting this option means to use the default flytekit behavior. The default behavior is: - if fast register is used, source files are not copied into the image (because they're already copied into the fast register tar layer). - if fast register is not used, then the LOADED_MODULES (aka 'auto') option is used to copy loaded Python files into the image. - If the option is set by the user, then that option is of course used. - copy: List of files/directories to copy to /root. e.g. ["src/file1.txt", "src/file2.txt"] - python_exec: Python executable to use for install packages + copy (Optional[List[str]]): List of files/directories to copy to /root. e.g. ["src/file1.txt", "src/file2.txt"]. + python_exec (Optional[str]): Python executable to use for install packages. + runtime_packages (Optional[List[str]]): List of packages to be installed during runtime. `runtime_packages` requires `pip` to be installed + in your base image. + - If you are using an ImageSpec as your base image, please include `pip` into your packages: + `ImageSpec(..., packages=["pip"])`. + - If you want to install runtime packages into a fixed base_image and not use an image builder, you can + use `builder="noop"`: `ImageSpec(base_image="ghcr.io/name/my-custom-image", builder="noop").with_runtime_packages(["numpy"])`. + builder_options (Optional[Dict[str, Any]]): Additional options for the builder. This is a dictionary that will be passed to the builder. + The options are builder-specific and may not be supported by all builders. """ name: str = "flytekit" python_version: str = None # Use default python in the base image if None. builder: Optional[str] = None source_root: Optional[str] = None # a.txt:auto - env: Optional[typing.Dict[str, str]] = None + env: Optional[Dict[str, str]] = None registry: Optional[str] = None packages: Optional[List[str]] = None conda_packages: Optional[List[str]] = None @@ -95,6 +102,8 @@ class ImageSpec: source_copy_mode: Optional[CopyFileDetection] = None copy: Optional[List[str]] = None python_exec: Optional[str] = None + runtime_packages: Optional[List[str]] = None + builder_options: Optional[Dict[str, Any]] = None def __post_init__(self): self.name = self.name.lower() @@ -145,6 +154,9 @@ def __post_init__(self): error_msg = "pip_secret_mounts must be a list of tuples of two strings or None" raise ValueError(error_msg) + if self.builder_options is not None and not isinstance(self.builder_options, dict): + raise ValueError("builder_options must be a dictionary or None") + @cached_property def id(self) -> str: """ @@ -310,16 +322,30 @@ def exist(self) -> Optional[bool]: click.secho(f"Failed to check if the image exists with error:\n {e}", fg="red") return None - def _update_attribute(self, attr_name: str, values: Union[str, List[str]]) -> "ImageSpec": + def _update_attribute(self, attr_name: str, values: Union[str, List[str], Dict[str, Any]]) -> "ImageSpec": """ - Generic method to update a specified list attribute, either appending or extending. + Generic method to update a specified attribute, handling strings, lists, and dictionaries. """ - current_value = copy.deepcopy(getattr(self, attr_name)) or [] + current_value = copy.deepcopy(getattr(self, attr_name)) + + if current_value is None: + if isinstance(values, dict): + current_value = {} + else: + current_value = [] if isinstance(values, str): + if not isinstance(current_value, list): + raise TypeError(f"Cannot append string to non-list attribute {attr_name}") current_value.append(values) elif isinstance(values, list): + if not isinstance(current_value, list): + raise TypeError(f"Cannot extend non-list attribute {attr_name}") current_value.extend(values) + elif isinstance(values, dict): + if not isinstance(current_value, dict): + raise TypeError(f"Cannot update non-dict attribute {attr_name}") + current_value.update(values) return dataclasses.replace(self, **{attr_name: current_value}) @@ -358,6 +384,18 @@ def force_push(self) -> "ImageSpec": return copied_image_spec + def with_runtime_packages(self, runtime_packages: List[str]) -> "ImageSpec": + """ + Builder that returns a new image spec with runtime packages. Dev packages will be installed during runtime. + """ + return self._update_attribute("runtime_packages", runtime_packages) + + def with_builder_options(self, builder_options: Dict[str, Any]) -> "ImageSpec": + """ + Builder that returns a new image spec with additional builder options. + """ + return self._update_attribute("builder_options", builder_options) + @classmethod def from_env(cls, *, pinned_packages: Optional[List[str]] = None, **kwargs) -> "ImageSpec": """Create ImageSpec with the environment's Python version and packages pinned to the ones in the environment.""" diff --git a/pydoclint-errors-baseline.txt b/pydoclint-errors-baseline.txt index 808a93731e..b317be03df 100644 --- a/pydoclint-errors-baseline.txt +++ b/pydoclint-errors-baseline.txt @@ -48,7 +48,6 @@ flytekit/core/checkpointer.py DOC109: Method `Checkpoint.save`: The option `--arg-type-hints-in-docstring` is `True` but there are no type hints in the docstring arg list DOC110: Method `Checkpoint.save`: The option `--arg-type-hints-in-docstring` is `True` but not all args in the docstring arg list have type hints DOC105: Method `Checkpoint.save`: Argument names match, but type hints in these args do not match: cp - DOC501: Method `Checkpoint.save` has "raise" statements, but the docstring does not have a "Raises" section DOC503: Method `Checkpoint.save` exceptions in the "Raises" section in the docstring do not match those in the function body. Raised exceptions in the docstring: []. Raised exceptions in the body: ['NotImplementedError']. DOC301: Class `SyncCheckpoint`: __init__() should not have a docstring; please combine it with the docstring of the class -------------------- @@ -65,7 +64,6 @@ flytekit/core/context_manager.py DOC603: Class `ExecutionState`: Class docstring attributes are different from actual class attributes. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Attributes in the class definition but not in the docstring: [branch_eval_mode: Optional[BranchEvalMode], user_space_params: Optional[ExecutionParameters]]. Arguments in the docstring but not in the actual class attributes: [branch_eval_mode Optional[BranchEvalMode]: , user_space_params Optional[ExecutionParameters]: ]. (Please read https://jsh9.github.io/pydoclint/checking_class_attributes.html on how to correctly document class attributes.) DOC101: Method `ExecutionState.__init__`: Docstring contains fewer arguments than in function signature. DOC103: Method `ExecutionState.__init__`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [branch_eval_mode: Optional[BranchEvalMode], engine_dir: Optional[Union[os.PathLike, str]], mode: Optional[ExecutionState.Mode], user_space_params: Optional[ExecutionParameters], working_dir: Union[os.PathLike, str]]. - DOC501: Method `ExecutionState.__init__` has "raise" statements, but the docstring does not have a "Raises" section DOC503: Method `ExecutionState.__init__` exceptions in the "Raises" section in the docstring do not match those in the function body. Raised exceptions in the docstring: []. Raised exceptions in the body: ['ValueError']. DOC603: Class `OutputMetadataTracker`: Class docstring attributes are different from actual class attributes. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Attributes in the class definition but not in the docstring: [output_metadata: typing.Dict[typing.Any, OutputMetadata]]. Arguments in the docstring but not in the actual class attributes: [output_metadata Optional[TaskOutputMetadata]: ]. (Please read https://jsh9.github.io/pydoclint/checking_class_attributes.html on how to correctly document class attributes.) -------------------- @@ -173,13 +171,10 @@ flytekit/extras/tensorflow/record.py DOC603: Class `TFRecordDatasetConfig`: Class docstring attributes are different from actual class attributes. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Attributes in the class definition but not in the docstring: [buffer_size: Optional[int], compression_type: Optional[str], name: Optional[str], num_parallel_reads: Optional[int]]. (Please read https://jsh9.github.io/pydoclint/checking_class_attributes.html on how to correctly document class attributes.) -------------------- flytekit/image_spec/image_spec.py - DOC601: Class `ImageSpec`: Class docstring contains fewer class attributes than actual class attributes. (Please read https://jsh9.github.io/pydoclint/checking_class_attributes.html on how to correctly document class attributes.) - DOC603: Class `ImageSpec`: Class docstring attributes are different from actual class attributes. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Attributes in the class definition but not in the docstring: [apt_packages: Optional[List[str]], base_image: Optional[Union[str, 'ImageSpec']], builder: Optional[str], commands: Optional[List[str]], conda_channels: Optional[List[str]], conda_packages: Optional[List[str]], copy: Optional[List[str]], cuda: Optional[str], cudnn: Optional[str], entrypoint: Optional[List[str]], env: Optional[typing.Dict[str, str]], name: str, packages: Optional[List[str]], pip_extra_args: Optional[str], pip_extra_index_url: Optional[List[str]], pip_index: Optional[str], pip_secret_mounts: Optional[List[Tuple[str, str]]], platform: str, python_exec: Optional[str], python_version: str, registry: Optional[str], registry_config: Optional[str], requirements: Optional[str], source_copy_mode: Optional[CopyFileDetection], source_root: Optional[str], tag_format: Optional[str]]. (Please read https://jsh9.github.io/pydoclint/checking_class_attributes.html on how to correctly document class attributes.) DOC109: Method `ImageSpecBuilder.build_image`: The option `--arg-type-hints-in-docstring` is `True` but there are no type hints in the docstring arg list DOC110: Method `ImageSpecBuilder.build_image`: The option `--arg-type-hints-in-docstring` is `True` but not all args in the docstring arg list have type hints DOC105: Method `ImageSpecBuilder.build_image`: Argument names match, but type hints in these args do not match: image_spec DOC203: Method `ImageSpecBuilder.build_image` return type(s) in docstring not consistent with the return annotation. Return annotation types: ['Optional[str]']; docstring return section types: ['fully_qualified_image_name'] - DOC501: Method `ImageSpecBuilder.build_image` has "raise" statements, but the docstring does not have a "Raises" section DOC503: Method `ImageSpecBuilder.build_image` exceptions in the "Raises" section in the docstring do not match those in the function body. Raised exceptions in the docstring: []. Raised exceptions in the body: ['NotImplementedError']. DOC109: Method `ImageSpecBuilder.should_build`: The option `--arg-type-hints-in-docstring` is `True` but there are no type hints in the docstring arg list DOC110: Method `ImageSpecBuilder.should_build`: The option `--arg-type-hints-in-docstring` is `True` but not all args in the docstring arg list have type hints @@ -208,7 +203,6 @@ flytekit/interactive/vscode_lib/decorator.py DOC107: Function `download_file`: The option `--arg-type-hints-in-signature` is `True` but not all args in the signature have type hints DOC105: Function `download_file`: Argument names match, but type hints in these args do not match: url, target_dir DOC203: Function `download_file` return type(s) in docstring not consistent with the return annotation. Return annotation has 0 type(s); docstring return section has 1 type(s). - DOC501: Function `download_file` has "raise" statements, but the docstring does not have a "Raises" section DOC503: Function `download_file` exceptions in the "Raises" section in the docstring do not match those in the function body. Raised exceptions in the docstring: []. Raised exceptions in the body: ['ValueError']. DOC106: Function `prepare_interactive_python`: The option `--arg-type-hints-in-signature` is `True` but there are no argument type hints in the signature DOC107: Function `prepare_interactive_python`: The option `--arg-type-hints-in-signature` is `True` but not all args in the signature have type hints @@ -475,7 +469,6 @@ plugins/flytekit-duckdb/flytekitplugins/duckdb/task.py DOC103: Method `DuckDBQuery._execute_query`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [con: duckdb.DuckDBPyConnection]. DOC402: Method `DuckDBQuery._execute_query` has "yield" statements, but the docstring does not have a "Yields" section DOC404: Method `DuckDBQuery._execute_query` yield type(s) in docstring not consistent with the return annotation. Return annotation exists, but docstring "yields" section does not exist or has 0 type(s). - DOC501: Method `DuckDBQuery._execute_query` has "raise" statements, but the docstring does not have a "Raises" section DOC503: Method `DuckDBQuery._execute_query` exceptions in the "Raises" section in the docstring do not match those in the function body. Raised exceptions in the docstring: []. Raised exceptions in the body: ['ValueError']. -------------------- plugins/flytekit-flyteinteractive/flytekitplugins/flyteinteractive/jupyter_lib/decorator.py @@ -548,7 +541,6 @@ plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py DOC101: Function `spawn_helper`: Docstring contains fewer arguments than in function signature. DOC107: Function `spawn_helper`: The option `--arg-type-hints-in-signature` is `True` but not all args in the signature have type hints DOC103: Function `spawn_helper`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [kwargs: ]. - DOC501: Function `spawn_helper` has "raise" statements, but the docstring does not have a "Raises" section DOC503: Function `spawn_helper` exceptions in the "Raises" section in the docstring do not match those in the function body. Raised exceptions in the docstring: []. Raised exceptions in the body: ['Exception']. DOC101: Method `PytorchElasticFunctionTask._execute`: Docstring contains fewer arguments than in function signature. DOC106: Method `PytorchElasticFunctionTask._execute`: The option `--arg-type-hints-in-signature` is `True` but there are no argument type hints in the signature diff --git a/tests/flytekit/unit/core/image_spec/test_image_spec.py b/tests/flytekit/unit/core/image_spec/test_image_spec.py index 10438d7e01..9d152823b4 100644 --- a/tests/flytekit/unit/core/image_spec/test_image_spec.py +++ b/tests/flytekit/unit/core/image_spec/test_image_spec.py @@ -5,13 +5,14 @@ import mock import pytest +from flytekit.configuration import (FastSerializationSettings, ImageConfig, + SerializationSettings) +from flytekit.constants import CopyFileDetection from flytekit.core import context_manager from flytekit.core.context_manager import ExecutionState +from flytekit.core.python_auto_container import update_image_spec_copy_handling from flytekit.image_spec import ImageSpec from flytekit.image_spec.image_spec import _F_IMG_ID, ImageBuildEngine -from flytekit.core.python_auto_container import update_image_spec_copy_handling -from flytekit.configuration import SerializationSettings, FastSerializationSettings, ImageConfig -from flytekit.constants import CopyFileDetection REQUIREMENT_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt") REGISTRY_CONFIG_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "registry_config.json") @@ -33,7 +34,8 @@ def test_image_spec(mock_image_spec_builder, monkeypatch): requirements=REQUIREMENT_FILE, registry_config=REGISTRY_CONFIG_FILE, entrypoint=["/bin/bash"], - copy=["/src/file1.txt"] + copy=["/src/file1.txt"], + builder_options={"builder_option": "builder_option_value"}, ) assert image_spec._is_force_push is False @@ -62,6 +64,7 @@ def test_image_spec(mock_image_spec_builder, monkeypatch): assert image_spec._is_force_push is True assert image_spec.entrypoint == ["/bin/bash"] assert image_spec.copy == ["/src/file1.txt", "/src", "/src/file2.txt"] + assert image_spec.builder_options == {"builder_option": "builder_option_value"} assert image_spec.image_name() == f"localhost:30001/flytekit:{image_spec.tag}" ctx = context_manager.FlyteContext.current_context() @@ -301,3 +304,37 @@ def test_image_spec_same_id_and_tag_with_builder(): image_spec_with_builder = ImageSpec(name="my_image", builder="envd") assert image_spec.id == image_spec_with_builder.id assert image_spec.tag == image_spec_with_builder.tag + + +def test_dev_packages(): + image_spec = ImageSpec(name="localhost:30000/flytekit:0.1.5") + new_image_spec = image_spec.with_runtime_packages(["my-new-package"]) + assert new_image_spec.runtime_packages == ["my-new-package"] + + +def test_invalid_builder_options(): + msg = "builder_options must be a dictionary or None" + with pytest.raises(ValueError, match=msg): + ImageSpec(name="localhost:30000/flytekit:0.1.5", builder_options="invalid_builder_option") + with pytest.raises(ValueError, match=msg): + ImageSpec(name="localhost:30000/flytekit:0.1.5", + builder_options=["invalid_builder_option"]) + + +def test_with_builder_options(): + image_spec = ImageSpec( + name="localhost:30000/flytekit:0.1.5", + builder_options={ + "existing_builder_option_1": "existing_builder_option_value_1", + } + ) + new_image_spec = image_spec.with_builder_options( + {"new_builder_option_1": "new_builder_option_value_1"}) + + assert image_spec.builder_options == { + "existing_builder_option_1": "existing_builder_option_value_1", + } + assert new_image_spec.builder_options == { + "existing_builder_option_1": "existing_builder_option_value_1", + "new_builder_option_1": "new_builder_option_value_1" + }