From 17d1ae6d87a9c02ea6518a5deba7f4fe935ded30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Fri, 13 Mar 2026 13:28:01 +0000 Subject: [PATCH 01/16] feat: add KubeflowExecutor for Kubeflow Training Operator on Kubernetes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces KubeflowExecutor and a matching TorchX scheduler so users can deploy distributed PyTorchJobs and TrainJobs to any Kubernetes cluster running the Kubeflow Training Operator via run.run() / run.Experiment. - KubeflowExecutor builds and submits PyTorchJob / TrainJob CRDs via the K8s API (local kubeconfig with in-cluster fallback) - Supports both PyTorchJob and TrainJob job_kind variants - macro_values() maps to PET_* env vars injected by Training Operator so torchrun launch scripts resolve ${head_node_ip_var} / ${node_rank_var} - Inline Script path passed to torchrun uses pod-side path (role_args[0]) instead of the empty fn_or_script.path; inline scripts chmod'd 755 so torchrun --no-python can execute them - Retry kubectl logs -f until pods are running to prevent the log thread dying before Training Operator pods start - Backwards-compat migration in _get_job_dirs(): rename stored nproc_per_node → nprocs_per_node on old JSON entries - TorchX scheduler persists job state and maps KubeflowJobState → AppState - Documentation added to docs/guides/execution.md Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: oliver könig --- .gitignore | 1 + docs/guides/execution.md | 44 + nemo_run/__init__.py | 2 + nemo_run/config.py | 1 + nemo_run/core/execution/__init__.py | 2 + nemo_run/core/execution/kubeflow.py | 752 ++++++++++++++++++ .../core/execution/templates/kubeflow.sh.j2 | 22 + nemo_run/run/experiment.py | 2 + nemo_run/run/torchx_backend/packaging.py | 8 +- nemo_run/run/torchx_backend/schedulers/api.py | 3 + .../run/torchx_backend/schedulers/kubeflow.py | 279 +++++++ pyproject.toml | 4 + test/core/execution/test_kubeflow.py | 749 +++++++++++++++++ .../schedulers/test_kubeflow.py | 289 +++++++ uv.lock | 233 +++--- 15 files changed, 2275 insertions(+), 116 deletions(-) create mode 100644 nemo_run/core/execution/kubeflow.py create mode 100644 nemo_run/core/execution/templates/kubeflow.sh.j2 create mode 100644 nemo_run/run/torchx_backend/schedulers/kubeflow.py create mode 100644 test/core/execution/test_kubeflow.py create mode 100644 test/run/torchx_backend/schedulers/test_kubeflow.py diff --git a/.gitignore b/.gitignore index 517031ee..f0564656 100644 --- a/.gitignore +++ b/.gitignore @@ -183,3 +183,4 @@ _version.py # NeMo Run .nemo_run/ +local/ diff --git a/docs/guides/execution.md b/docs/guides/execution.md index 6f2c0063..8560759b 100644 --- a/docs/guides/execution.md +++ b/docs/guides/execution.md @@ -53,6 +53,7 @@ The packager support matrix is described below: | SkypilotExecutor | run.Packager, run.GitArchivePackager, run.PatternPackager, run.HybridPackager | | DGXCloudExecutor | run.Packager, run.GitArchivePackager, run.PatternPackager, run.HybridPackager | | LeptonExecutor | run.Packager, run.GitArchivePackager, run.PatternPackager, run.HybridPackager | +| KubeflowExecutor | run.Packager | `run.Packager` is a passthrough base packager. @@ -293,6 +294,49 @@ def your_dgx_executor(nodes: int, gpus_per_node: int, container_image: str): For a complete end-to-end example using DGX Cloud with NeMo, refer to the [NVIDIA DGX Cloud NeMo End-to-End Workflow Example](https://docs.nvidia.com/dgx-cloud/run-ai/latest/nemo-e2e-example.html). +#### KubeflowExecutor + +The `KubeflowExecutor` integrates with the [Kubeflow Training Operator](https://github.com/kubeflow/training-operator) to run distributed training jobs on any Kubernetes cluster. It submits CRDs directly via the Kubernetes API — no `kubectl` required. + +Two job kinds are supported via the `job_kind` parameter: + +- **`"PyTorchJob"`** (default) — Training Operator v1 (`kubeflow.org/v1`) +- **`"TrainJob"`** — Training Operator v2 (`trainer.kubeflow.org/v1alpha1`) + +Kubernetes configuration is loaded automatically: local kubeconfig is tried first, falling back to in-cluster config when running inside a pod. + +Here's an example configuration: + +```python +# PyTorchJob (default) +executor = run.KubeflowExecutor( + namespace="runai-nemo-ci", + image="nvcr.io/nvidian/nemo:nightly", + num_nodes=3, # total pods: 1 Master + (num_nodes-1) Workers + gpus_per_node=8, # also sets nproc_per_node unless overridden explicitly + cpu_requests="16", + memory_requests="64Gi", + volumes=[ + {"name": "model-cache", "persistentVolumeClaim": {"claimName": "nemo-ci-datasets-project-nkf5l"}} + ], + volume_mounts=[{"name": "model-cache", "mountPath": "/nemo-workspace"}], + labels={"app": "nemo-ci-training"}, + env_vars={"NCCL_DEBUG": "INFO"}, +) + +# TrainJob (Training Operator v2) +executor = run.KubeflowExecutor( + job_kind="TrainJob", + runtime_ref="torch-distributed", # name of the ClusterTrainingRuntime + namespace="runai-nemo-ci", + image="nvcr.io/nvidian/nemo:nightly", + num_nodes=3, + gpus_per_node=8, +) +``` + +`cancel(wait=True)` polls until both the CR and all associated pods are fully terminated before returning. + #### LeptonExecutor The `LeptonExecutor` integrates with an NVIDIA DGX Cloud Lepton cluster's Python SDK to launch distributed jobs. It uses API calls behind the Lepton SDK to authenticate, identify the target node group and resource shapes, and submit the job specification which will be launched as a batch job on the cluster. diff --git a/nemo_run/__init__.py b/nemo_run/__init__.py index 04f56916..0d403a54 100644 --- a/nemo_run/__init__.py +++ b/nemo_run/__init__.py @@ -24,6 +24,7 @@ from nemo_run.core.execution.base import Executor, ExecutorMacros, import_executor from nemo_run.core.execution.dgxcloud import DGXCloudExecutor from nemo_run.core.execution.docker import DockerExecutor +from nemo_run.core.execution.kubeflow import KubeflowExecutor from nemo_run.core.execution.launcher import FaultTolerance, SlurmRay, SlurmTemplate, Torchrun from nemo_run.core.execution.lepton import LeptonExecutor from nemo_run.core.execution.local import LocalExecutor @@ -66,6 +67,7 @@ "Packager", "Partial", "Plugin", + "KubeflowExecutor", "run", "Script", "SkypilotExecutor", diff --git a/nemo_run/config.py b/nemo_run/config.py index d45e536d..8f20cc26 100644 --- a/nemo_run/config.py +++ b/nemo_run/config.py @@ -495,6 +495,7 @@ def to_command( ) with open(filename, "w") as f: f.write("#!/usr/bin/bash\n" + inline_content) + os.chmod(filename, os.stat(filename).st_mode | 0o755) if is_local: cmd = [filename] diff --git a/nemo_run/core/execution/__init__.py b/nemo_run/core/execution/__init__.py index 7c787a16..08e088c8 100644 --- a/nemo_run/core/execution/__init__.py +++ b/nemo_run/core/execution/__init__.py @@ -16,6 +16,7 @@ from nemo_run.core.execution.dgxcloud import DGXCloudExecutor from nemo_run.core.execution.lepton import LeptonExecutor from nemo_run.core.execution.local import LocalExecutor +from nemo_run.core.execution.kubeflow import KubeflowExecutor from nemo_run.core.execution.skypilot import SkypilotExecutor from nemo_run.core.execution.slurm import SlurmExecutor @@ -25,4 +26,5 @@ "SkypilotExecutor", "DGXCloudExecutor", "LeptonExecutor", + "KubeflowExecutor", ] diff --git a/nemo_run/core/execution/kubeflow.py b/nemo_run/core/execution/kubeflow.py new file mode 100644 index 00000000..f281a706 --- /dev/null +++ b/nemo_run/core/execution/kubeflow.py @@ -0,0 +1,752 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import subprocess +import time + +from jinja2 import Environment, PackageLoader, select_autoescape +from dataclasses import dataclass, field +from enum import Enum +from typing import Any, Iterable, Optional + +try: + from kubernetes import client, config, watch + from kubernetes.client.rest import ApiException + + _KUBERNETES_AVAILABLE = True +except ImportError: + _KUBERNETES_AVAILABLE = False + +from nemo_run.core.execution.base import Executor, ExecutorMacros +from nemo_run.core.packaging.base import Packager + +logger = logging.getLogger(__name__) + +# PyTorchJob (Kubeflow Training Operator v1) +_PYTORCHJOB_GROUP = "kubeflow.org" +_PYTORCHJOB_VERSION = "v1" +_PYTORCHJOB_PLURAL = "pytorchjobs" +_PYTORCHJOB_KIND = "PyTorchJob" + +# TrainJob (Kubeflow Training Operator v2) +_TRAINJOB_GROUP = "trainer.kubeflow.org" +_TRAINJOB_VERSION = "v1alpha1" +_TRAINJOB_PLURAL = "trainjobs" +_TRAINJOB_KIND = "TrainJob" + + +class KubeflowJobState(Enum): + CREATED = "Created" + RUNNING = "Running" + SUCCEEDED = "Succeeded" + FAILED = "Failed" + UNKNOWN = "Unknown" + + +@dataclass(kw_only=True) +class KubeflowExecutor(Executor): + """ + Dataclass to configure a Kubeflow Executor for the Kubeflow Training Operator on Kubernetes. + + Supports both PyTorchJob (Training Operator v1) and TrainJob (Training Operator v2) via + the ``job_kind`` parameter. Kubernetes configuration is loaded automatically (local kubeconfig + with in-cluster fallback). + + Args: + job_kind: ``"PyTorchJob"`` (default) or ``"TrainJob"``. + runtime_ref: ``ClusterTrainingRuntime`` name used by TrainJob (e.g. ``"torch-distributed"``). + Ignored for PyTorchJob. + """ + + job_kind: str = "PyTorchJob" + runtime_ref: str = "torch-distributed" + namespace: str = "default" + image: str = "" + num_nodes: int = 2 + nprocs_per_node: Optional[int] = None # defaults to gpus_per_node when not set + gpus_per_node: Optional[int] = None + cpu_requests: Optional[str] = None + memory_requests: Optional[str] = None + cpu_limits: Optional[str] = None + memory_limits: Optional[str] = None + volume_mounts: list[dict[str, Any]] = field(default_factory=list) + volumes: list[dict[str, Any]] = field(default_factory=list) + labels: dict[str, Any] = field(default_factory=dict) + annotations: dict[str, Any] = field(default_factory=dict) + tolerations: list[dict[str, Any]] = field(default_factory=list) + affinity: dict[str, Any] = field(default_factory=dict) + # env_list accepts full env var dicts (e.g. valueFrom/secretKeyRef). + # Simple key=value pairs should use the inherited env_vars dict instead. + env_list: list[dict[str, Any]] = field(default_factory=list) + # pod_spec_overrides merges extra fields into the pod spec (PyTorchJob) or + # podTemplateOverrides[].spec (TrainJob) — e.g. {"resourceClaims": [...]}. + pod_spec_overrides: dict[str, Any] = field(default_factory=dict) + restart_policy: str = "OnFailure" + image_pull_secrets: list[str] = field(default_factory=list) + spec_kwargs: dict[str, Any] = field(default_factory=dict) + container_kwargs: dict[str, Any] = field(default_factory=dict) + # Workdir sync: if set, package() rsyncs job_dir → PVC before launch and + # pull_results() rsyncs the PVC back to job_dir after the job completes. + workdir_pvc: Optional[str] = None + workdir_pvc_path: str = "/nemo_run" + # Optional local directory whose contents are merged into job_dir before + # the PVC sync. Use this to include local scripts/files that are not + # generated by the packager (e.g. a hand-written training script). + workdir_local_path: Optional[str] = None + + def __post_init__(self): + if not _KUBERNETES_AVAILABLE: + raise ImportError( + "kubernetes package is required for KubeflowExecutor. " + "Install it with: pip install nemo-run[kubeflow]" + ) + if self.job_kind not in (_PYTORCHJOB_KIND, _TRAINJOB_KIND): + raise ValueError(f"job_kind must be 'PyTorchJob' or 'TrainJob', got {self.job_kind!r}") + try: + config.load_kube_config() + except Exception as original_exc: + try: + config.load_incluster_config() + except Exception: + raise original_exc + self._custom_objects_api = client.CustomObjectsApi() + self._core_v1_api = client.CoreV1Api() + + # ── K8s API coordinates ─────────────────────────────────────────────────── + + def _group(self) -> str: + return _PYTORCHJOB_GROUP if self.job_kind == _PYTORCHJOB_KIND else _TRAINJOB_GROUP + + def _version(self) -> str: + return _PYTORCHJOB_VERSION if self.job_kind == _PYTORCHJOB_KIND else _TRAINJOB_VERSION + + def _plural(self) -> str: + return _PYTORCHJOB_PLURAL if self.job_kind == _PYTORCHJOB_KIND else _TRAINJOB_PLURAL + + def _pod_label_selector(self, job_name: str) -> str: + if self.job_kind == _PYTORCHJOB_KIND: + return f"training.kubeflow.org/job-name={job_name}" + # TrainJob delegates to JobSet; pods carry the jobset label + return f"jobset.sigs.k8s.io/jobset-name={job_name}" + + # ── Executor interface ──────────────────────────────────────────────────── + + def assign(self, exp_id: str, exp_dir: str, task_id: str, task_dir: str) -> None: + self.experiment_id = exp_id + self.experiment_dir = exp_dir + self.job_name = task_id + self.job_dir = os.path.join(exp_dir, task_dir) + + def nnodes(self) -> int: + return self.num_nodes + + def nproc_per_node(self) -> int: + if self.nprocs_per_node is not None: + return self.nprocs_per_node + if self.gpus_per_node is not None: + return self.gpus_per_node + return 1 + + # ── Manifest builders ───────────────────────────────────────────────────── + + def get_job_body(self, name: str, command: list[str]) -> dict: + """Build the CRD manifest dict for the configured ``job_kind``.""" + if self.job_kind == _PYTORCHJOB_KIND: + return self._get_pytorchjob_body(name, command) + return self._get_trainjob_body(name, command) + + def _build_resources(self) -> dict[str, Any]: + limits: dict[str, Any] = {} + requests: dict[str, Any] = {} + if self.cpu_requests: + requests["cpu"] = self.cpu_requests + if self.memory_requests: + requests["memory"] = self.memory_requests + if self.cpu_limits: + limits["cpu"] = self.cpu_limits + if self.memory_limits: + limits["memory"] = self.memory_limits + if self.gpus_per_node is not None: + limits["nvidia.com/gpu"] = str(self.gpus_per_node) + requests["nvidia.com/gpu"] = str(self.gpus_per_node) + resources: dict[str, Any] = {} + if limits: + resources["limits"] = limits + if requests: + resources["requests"] = requests + return resources + + def _get_pytorchjob_body(self, name: str, command: list[str]) -> dict: + resources = self._build_resources() + env = [{"name": k, "value": v} for k, v in self.env_vars.items()] + self.env_list + + container: dict[str, Any] = { + "name": "pytorch", + "image": self.image, + "command": command, + "env": env, + } + if self.volume_mounts: + container["volumeMounts"] = self.volume_mounts + if resources: + container["resources"] = resources + container.update(self.container_kwargs) + + pod_spec: dict[str, Any] = {"containers": [container]} + if self.volumes: + pod_spec["volumes"] = self.volumes + if self.image_pull_secrets: + pod_spec["imagePullSecrets"] = [{"name": s} for s in self.image_pull_secrets] + if self.tolerations: + pod_spec["tolerations"] = self.tolerations + if self.affinity: + pod_spec["affinity"] = self.affinity + pod_spec.update(self.pod_spec_overrides) + + template_metadata: dict[str, Any] = {} + if self.labels: + template_metadata["labels"] = self.labels + if self.annotations: + template_metadata["annotations"] = self.annotations + + replica_spec: dict[str, Any] = { + "restartPolicy": self.restart_policy, + "template": { + "metadata": template_metadata, + "spec": pod_spec, + }, + } + + spec: dict[str, Any] = { + "nprocPerNode": str(self.nproc_per_node()), + "pytorchReplicaSpecs": { + "Master": {"replicas": 1, **replica_spec}, + "Worker": {"replicas": self.num_nodes - 1, **replica_spec}, + }, + **self.spec_kwargs, + } + + return { + "apiVersion": f"{_PYTORCHJOB_GROUP}/{_PYTORCHJOB_VERSION}", + "kind": _PYTORCHJOB_KIND, + "metadata": { + "name": name, + "namespace": self.namespace, + "labels": self.labels, + "annotations": self.annotations, + }, + "spec": spec, + } + + def _get_trainjob_body(self, name: str, command: list[str]) -> dict: + resources = self._build_resources() + env = [{"name": k, "value": v} for k, v in self.env_vars.items()] + self.env_list + + trainer: dict[str, Any] = { + "numNodes": self.num_nodes, + "numProcPerNode": self.nproc_per_node(), + "image": self.image, + "command": command, + "env": env, + } + if resources: + trainer["resourcesPerNode"] = resources + trainer.update(self.container_kwargs) + + # TrainJob uses podTemplateOverrides for pod-level config (volumes, tolerations, + # affinity, imagePullSecrets, etc.) rather than embedding them in the pod spec. + # All native fields are merged into a single override entry targeting "node". + pod_spec_override: dict[str, Any] = {} + if self.volumes: + pod_spec_override["volumes"] = self.volumes + if self.image_pull_secrets: + pod_spec_override["imagePullSecrets"] = [{"name": s} for s in self.image_pull_secrets] + if self.tolerations: + pod_spec_override["tolerations"] = self.tolerations + if self.affinity: + pod_spec_override["affinity"] = self.affinity + if self.volume_mounts: + # Container name must match the CRT's container name ("node") so the + # volumeMounts are merged into the existing container rather than + # creating a new image-less container. + pod_spec_override.setdefault("containers", []).append( + {"name": "node", "volumeMounts": self.volume_mounts} + ) + pod_spec_override.update(self.pod_spec_overrides) + + spec: dict[str, Any] = { + "runtimeRef": {"name": self.runtime_ref}, + "trainer": trainer, + } + if pod_spec_override: + spec["podTemplateOverrides"] = [ + {"targetJobs": [{"name": "node"}], "spec": pod_spec_override} + ] + spec.update(self.spec_kwargs) + + metadata: dict[str, Any] = {"name": name, "namespace": self.namespace} + if self.labels: + metadata["labels"] = self.labels + if self.annotations: + metadata["annotations"] = self.annotations + + return { + "apiVersion": f"{_TRAINJOB_GROUP}/{_TRAINJOB_VERSION}", + "kind": _TRAINJOB_KIND, + "metadata": metadata, + "spec": spec, + } + + # ── Submit / status / cancel / logs ────────────────────────────────────── + + def launch( + self, + name: str, + cmd: list[str], + wait: bool = False, + timeout: int = 300, + poll_interval: int = 10, + ) -> tuple[str, KubeflowJobState]: + name = name.replace("_", "-").replace(".", "-").lower() + job_body = self.get_job_body(name, cmd) + try: + self._custom_objects_api.create_namespaced_custom_object( + group=self._group(), + version=self._version(), + namespace=self.namespace, + plural=self._plural(), + body=job_body, + ) + except ApiException as e: + if e.status == 409: + raise RuntimeError( + f"{self.job_kind} {name} already exists in namespace {self.namespace}" + ) from e + raise + + logger.info("Submitted %s %s to namespace %s", self.job_kind, name, self.namespace) + + if not wait: + return name, KubeflowJobState.CREATED + + deadline = time.time() + timeout + state = KubeflowJobState.CREATED + last_logged_state: Optional[KubeflowJobState] = None + while time.time() < deadline: + state = self.status(name) or KubeflowJobState.UNKNOWN + if state != last_logged_state: + logger.info("%s %s: %s", self.job_kind, name, state.value) + last_logged_state = state + if state == KubeflowJobState.RUNNING: + return name, state + if state in (KubeflowJobState.SUCCEEDED, KubeflowJobState.FAILED): + return name, state + time.sleep(poll_interval) + + raise RuntimeError( + f"{self.job_kind} {name} did not reach RUNNING within {timeout}s, last state: {state}" + ) + + def status(self, job_name: str) -> Optional[KubeflowJobState]: + try: + resp = self._custom_objects_api.get_namespaced_custom_object( + group=self._group(), + version=self._version(), + namespace=self.namespace, + plural=self._plural(), + name=job_name, + ) + except ApiException as e: + if e.status == 404: + return None + logger.warning("API error getting status for %s: %s", job_name, e) + return None + + job_status = resp.get("status", {}) + + if self.job_kind == _TRAINJOB_KIND: + # TrainJob (v2) uses status.jobsStatus[].{active,ready,succeeded,failed} + jobs_status = job_status.get("jobsStatus", []) + if any(js.get("failed", 0) > 0 for js in jobs_status): + return KubeflowJobState.FAILED + if jobs_status and all( + js.get("succeeded", 0) > 0 and js.get("active", 0) == 0 for js in jobs_status + ): + return KubeflowJobState.SUCCEEDED + if any(js.get("active", 0) > 0 or js.get("ready", 0) > 0 for js in jobs_status): + return KubeflowJobState.RUNNING + return KubeflowJobState.UNKNOWN + + # PyTorchJob (v1) uses status.conditions[].{type,status} + conditions = job_status.get("conditions", []) + state_map = { + "Running": KubeflowJobState.RUNNING, + "Succeeded": KubeflowJobState.SUCCEEDED, + "Failed": KubeflowJobState.FAILED, + } + for cond in reversed(conditions): + if cond.get("status") == "True" and cond.get("type") in state_map: + return state_map[cond["type"]] + return KubeflowJobState.UNKNOWN + + def fetch_logs( + self, + job_name: str, + stream: bool = False, + lines: int = 100, + timeout: int = 60, + ) -> Iterable[str]: + label_selector = self._pod_label_selector(job_name) + cmd = [ + "kubectl", + "logs", + "-l", + label_selector, + "-n", + self.namespace, + "--tail", + str(lines), + ] + if stream: + cmd.append("-f") + # Pods may not be running yet when the log thread starts. Retry + # kubectl logs -f until we get output (or 10 minutes pass). + deadline = time.time() + 600 + while time.time() < deadline: + proc = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, bufsize=1 + ) + lines_yielded = 0 + try: + for line in iter(proc.stdout.readline, ""): + if line: + lines_yielded += 1 + yield line + if proc.poll() is not None: + for remaining in proc.stdout: + if remaining: + lines_yielded += 1 + yield remaining + break + except Exception as e: + logger.error("Error streaming logs: %s", e) + break + finally: + proc.terminate() + proc.wait(timeout=2) + if lines_yielded > 0: + break # kubectl exited after producing output — job done + time.sleep(5) # no pods running yet, retry + else: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) + yield from result.stdout.splitlines() + + def cancel( + self, + job_name: str, + wait: bool = False, + timeout: int = 300, + poll_interval: int = 5, + ) -> Optional[bool]: + try: + self._custom_objects_api.delete_namespaced_custom_object( + group=self._group(), + version=self._version(), + namespace=self.namespace, + plural=self._plural(), + name=job_name, + ) + except ApiException as e: + if e.status == 404: + logger.info("%s %s already deleted", self.job_kind, job_name) + return None + raise + + if not wait: + return None + + label_selector = self._pod_label_selector(job_name) + deadline = time.time() + timeout + + while time.time() < deadline: + time.sleep(poll_interval) + + # Check if CR is gone + try: + self._custom_objects_api.get_namespaced_custom_object( + group=self._group(), + version=self._version(), + namespace=self.namespace, + plural=self._plural(), + name=job_name, + ) + continue # CR still present + except ApiException as e: + if e.status != 404: + continue + + # CR is gone; check pods + pods = self._core_v1_api.list_namespaced_pod( + namespace=self.namespace, + label_selector=label_selector, + ) + if len(pods.items) == 0: + return True + + return False + + # ── Workdir sync helpers ────────────────────────────────────────────────── + + def _data_mover_pod_name(self, job_name: str) -> str: + return f"{job_name}-data-mover" + + def _start_data_mover_pod(self, pod_name: str, timeout: int = 120) -> None: + """Spin up a throw-away Alpine pod that mounts workdir_pvc and blocks until Running. + + Uses ``kubectl cp`` (tar-based, built into Alpine — no internet needed) for data + transfer. The pod inherits tolerations, affinity, and imagePullSecrets from the + main workload so it can be scheduled on the same nodes (required when the PVC is + zone- or node-local). + """ + vol_name = "nemo-run-workdir" + pod_spec: dict[str, Any] = { + "restartPolicy": "Never", + "containers": [ + { + "name": "mover", + "image": "alpine:3.19", + "command": ["sleep", "infinity"], + "volumeMounts": [{"name": vol_name, "mountPath": self.workdir_pvc_path}], + } + ], + "volumes": [ + { + "name": vol_name, + "persistentVolumeClaim": {"claimName": self.workdir_pvc}, + } + ], + } + if self.tolerations: + pod_spec["tolerations"] = self.tolerations + if self.affinity: + pod_spec["affinity"] = self.affinity + if self.image_pull_secrets: + pod_spec["imagePullSecrets"] = [{"name": s} for s in self.image_pull_secrets] + pod_body = { + "apiVersion": "v1", + "kind": "Pod", + "metadata": {"name": pod_name, "namespace": self.namespace}, + "spec": pod_spec, + } + # Always delete a stale pod first so we start clean. + self._delete_data_mover_pod(pod_name) + + self._core_v1_api.create_namespaced_pod(namespace=self.namespace, body=pod_body) + logger.info("Created data-mover pod '%s'", pod_name) + + w = watch.Watch() + for event in w.stream( + self._core_v1_api.list_namespaced_pod, + namespace=self.namespace, + field_selector=f"metadata.name={pod_name}", + timeout_seconds=timeout, + ): + pod_obj = event.get("object") + phase = pod_obj.status.phase if pod_obj and pod_obj.status else None + if phase == "Running": + w.stop() + break + else: + raise RuntimeError( + f"Data-mover pod '{pod_name}' did not reach Running within {timeout}s" + ) + + def _delete_data_mover_pod(self, pod_name: str, timeout: int = 120) -> None: + try: + self._core_v1_api.delete_namespaced_pod( + name=pod_name, namespace=self.namespace, body=client.V1DeleteOptions() + ) + except ApiException as e: + if e.status != 404: + logger.warning("Failed to delete data-mover pod '%s': %s", pod_name, e) + return + deadline = time.time() + timeout + while time.time() < deadline: + try: + self._core_v1_api.read_namespaced_pod(name=pod_name, namespace=self.namespace) + except ApiException as e: + if e.status == 404: + return + time.sleep(2) + logger.warning("Data-mover pod '%s' did not terminate within %ds", pod_name, timeout) + + def _rsync_to_pod(self, pod_name: str, local_path: str, remote_path: str) -> None: + """Copy local_path → pod:remote_path via kubectl cp (uses tar, no rsync needed).""" + subprocess.check_call( + ["kubectl", "exec", "-n", self.namespace, pod_name, "--", "mkdir", "-p", remote_path] + ) + # kubectl cp /. /: copies directory contents + subprocess.check_call( + [ + "kubectl", + "cp", + "-n", + self.namespace, + f"{local_path.rstrip(os.sep)}/.", + f"{pod_name}:{remote_path.rstrip('/')}", + ] + ) + logger.info("Copied '%s' → pod:%s", local_path, remote_path) + + def _rsync_from_pod(self, pod_name: str, remote_path: str, local_path: str) -> None: + """Copy pod:remote_path → local_path via kubectl cp (uses tar, no rsync needed).""" + os.makedirs(local_path, exist_ok=True) + subprocess.check_call( + [ + "kubectl", + "cp", + "-n", + self.namespace, + f"{pod_name}:{remote_path.rstrip('/')}", + f"{local_path.rstrip(os.sep)}", + ] + ) + logger.info("Copied pod:%s → '%s'", remote_path, local_path) + + def materialize_launch_script(self, cmd: list[str], max_retries: int = 0) -> None: + """Render kubeflow.sh.j2 with *cmd* as the training command and write + it to ``{job_dir}/launch.sh`` so it can be synced to the pod.""" + env = Environment( + loader=PackageLoader("nemo_run", "core/execution/templates"), + keep_trailing_newline=True, + autoescape=select_autoescape(["html", "xml"]), + ) + template = env.get_template("kubeflow.sh.j2") + env_var_lines = [f"export {k}={v}" for k, v in self.env_vars.items()] + script = template.render( + training_command=" ".join(cmd), + env_vars=env_var_lines, + max_retries=max_retries, + ) + os.makedirs(self.job_dir, exist_ok=True) + launch_script_path = os.path.join(self.job_dir, "launch.sh") + with open(launch_script_path, "w") as f: + f.write(script) + logger.info("Wrote launch script to %s", launch_script_path) + + def package(self, packager: Packager, job_name: str) -> None: + if not self.workdir_pvc: + return + # Merge extra local files (e.g. training scripts) into job_dir so they + # get synced to the pod alongside generated files like launch.sh. + if self.workdir_local_path: + os.makedirs(self.job_dir, exist_ok=True) + subprocess.check_call( + [ + "rsync", + "-a", + f"{self.workdir_local_path.rstrip(os.sep)}/", + f"{self.job_dir.rstrip(os.sep)}/", + ] + ) + logger.info("Merged '%s' into job_dir '%s'", self.workdir_local_path, self.job_dir) + # Auto-add workdir PVC to volumes/volume_mounts so training pods can access it + vol_name = "nemo-run-workdir" + if not any(v.get("name") == vol_name for v in self.volumes): + self.volumes.append( + {"name": vol_name, "persistentVolumeClaim": {"claimName": self.workdir_pvc}} + ) + if not any(vm.get("mountPath") == self.workdir_pvc_path for vm in self.volume_mounts): + self.volume_mounts.append({"name": vol_name, "mountPath": self.workdir_pvc_path}) + + pod_name = self._data_mover_pod_name(job_name) + self._start_data_mover_pod(pod_name) + try: + self._rsync_to_pod(pod_name, self.job_dir, self.workdir_pvc_path) + finally: + self._delete_data_mover_pod(pod_name) + + def pull_results(self, job_name: str, dest_dir: Optional[str] = None) -> None: + """Sync workdir_pvc_path back to a local directory after the job completes. + + Args: + job_name: The job name used when the job was launched. + dest_dir: Local destination directory. Defaults to ``self.job_dir`` + when set. If neither is available the method looks up the + persisted job state in ``~/.nemo_run/.kubeflow_jobs.json`` to + find the original ``job_dir``. + """ + if not self.workdir_pvc: + logger.warning("pull_results called but workdir_pvc is not set — nothing to sync") + return + + local_path = dest_dir or getattr(self, "job_dir", "") or "" + if not local_path: + # Try to recover job_dir from the scheduler's persisted state. + local_path = self._lookup_job_dir(job_name) + if not local_path: + raise RuntimeError( + f"Cannot determine destination directory for pull_results('{job_name}'). " + "Pass dest_dir explicitly or call via an executor that has job_dir set." + ) + + pod_name = self._data_mover_pod_name(job_name) + self._start_data_mover_pod(pod_name) + try: + self._rsync_from_pod(pod_name, self.workdir_pvc_path, local_path) + finally: + self._delete_data_mover_pod(pod_name) + + def _lookup_job_dir(self, job_name: str) -> str: + """Look up the job_dir saved by the scheduler for *job_name*.""" + try: + from nemo_run.config import get_nemorun_home + + jobs_file = os.path.join(get_nemorun_home(), ".kubeflow_jobs.json") + if not os.path.isfile(jobs_file): + return "" + import json + + with open(jobs_file) as f: + data = json.load(f) + for entry in data.values(): + if entry.get("job_name") == job_name: + # Deserialize executor to get job_dir + try: + import fiddle as fdl + + from nemo_run.core.serialization.zlib_json import ZlibJSONSerializer + + serializer = ZlibJSONSerializer() + saved_executor: "KubeflowExecutor" = fdl.build( + serializer.deserialize(entry["executor"]) + ) + return getattr(saved_executor, "job_dir", "") or "" + except Exception: + pass + except Exception as e: + logger.debug("Could not look up job_dir for '%s': %s", job_name, e) + return "" + + def macro_values(self) -> Optional[ExecutorMacros]: + return ExecutorMacros( + head_node_ip_var="PET_MASTER_ADDR", + nproc_per_node_var="PET_NPROC_PER_NODE", + num_nodes_var="PET_NNODES", + node_rank_var="PET_NODE_RANK", + het_group_host_var="PET_MASTER_ADDR", + ) diff --git a/nemo_run/core/execution/templates/kubeflow.sh.j2 b/nemo_run/core/execution/templates/kubeflow.sh.j2 new file mode 100644 index 00000000..962f93c2 --- /dev/null +++ b/nemo_run/core/execution/templates/kubeflow.sh.j2 @@ -0,0 +1,22 @@ +#!/bin/bash +# +# Generated by NeMo Run +# +set -evx +export PYTHONUNBUFFERED=1 +export TORCHX_MAX_RETRIES={{max_retries}} + +{%- for env_var in env_vars %} +{{env_var}} +{%- endfor %} + +echo "Starting training command..." +set +e + +{{training_command}} + +exitcode=$? +set -e + +echo "Main command exited with code $exitcode" +exit $exitcode diff --git a/nemo_run/run/experiment.py b/nemo_run/run/experiment.py index 460f04f6..ba430413 100644 --- a/nemo_run/run/experiment.py +++ b/nemo_run/run/experiment.py @@ -51,6 +51,7 @@ ) from nemo_run.core.execution.base import Executor from nemo_run.core.execution.dgxcloud import DGXCloudExecutor +from nemo_run.core.execution.kubeflow import KubeflowExecutor from nemo_run.core.execution.docker import DockerExecutor from nemo_run.core.execution.lepton import LeptonExecutor from nemo_run.core.execution.local import LocalExecutor @@ -206,6 +207,7 @@ class Experiment(ConfigurableMixin): DockerExecutor, DGXCloudExecutor, LeptonExecutor, + KubeflowExecutor, ) _DETACH_SUPPORTED_EXECUTORS = ( SlurmExecutor, diff --git a/nemo_run/run/torchx_backend/packaging.py b/nemo_run/run/torchx_backend/packaging.py index 99bea100..8c5f6b5f 100644 --- a/nemo_run/run/torchx_backend/packaging.py +++ b/nemo_run/run/torchx_backend/packaging.py @@ -141,7 +141,13 @@ def _get_details_from_script(fn_or_script: Script, serialize_configs: bool): ) m = fn_or_script.path if fn_or_script.m else None no_python = fn_or_script.entrypoint != "python" - script = fn_or_script.path if not fn_or_script.m else None + if fn_or_script.m: + script = None + elif fn_or_script.inline and role_args: + # Inline scripts are written to a file; role_args[0] is the pod-side path + script = role_args[0] + else: + script = fn_or_script.path entrypoint = fn_or_script.entrypoint return role_args, args, m, no_python, script, entrypoint diff --git a/nemo_run/run/torchx_backend/schedulers/api.py b/nemo_run/run/torchx_backend/schedulers/api.py index a33ee20a..76b46a4b 100644 --- a/nemo_run/run/torchx_backend/schedulers/api.py +++ b/nemo_run/run/torchx_backend/schedulers/api.py @@ -20,6 +20,7 @@ from nemo_run.core.execution.base import Executor from nemo_run.core.execution.dgxcloud import DGXCloudExecutor from nemo_run.core.execution.docker import DockerExecutor +from nemo_run.core.execution.kubeflow import KubeflowExecutor from nemo_run.core.execution.lepton import LeptonExecutor from nemo_run.core.execution.local import LocalExecutor from nemo_run.core.execution.skypilot import SkypilotExecutor @@ -34,6 +35,7 @@ DockerExecutor: "docker_persistent", DGXCloudExecutor: "dgx_cloud", LeptonExecutor: "lepton", + KubeflowExecutor: "kubeflow", } REVERSE_EXECUTOR_MAPPING: dict[str, Type[Executor]] = { @@ -44,6 +46,7 @@ "docker_persistent": DockerExecutor, "dgx_cloud": DGXCloudExecutor, "lepton": LeptonExecutor, + "kubeflow": KubeflowExecutor, } diff --git a/nemo_run/run/torchx_backend/schedulers/kubeflow.py b/nemo_run/run/torchx_backend/schedulers/kubeflow.py new file mode 100644 index 00000000..547410e3 --- /dev/null +++ b/nemo_run/run/torchx_backend/schedulers/kubeflow.py @@ -0,0 +1,279 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging +import os +import shutil +import tempfile +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Any, Iterable, Optional + +import fiddle as fdl +import fiddle._src.experimental.dataclasses as fdl_dc +from torchx.schedulers.api import ( + AppDryRunInfo, + DescribeAppResponse, + ListAppResponse, + Scheduler, + Stream, + split_lines, +) +from torchx.specs import AppDef, AppState, ReplicaStatus, Role, RoleStatus, runopts + +from nemo_run.config import get_nemorun_home +from nemo_run.core.execution.base import Executor +from nemo_run.core.execution.kubeflow import KubeflowExecutor, KubeflowJobState +from nemo_run.core.serialization.zlib_json import ZlibJSONSerializer +from nemo_run.run.torchx_backend.schedulers.api import SchedulerMixin + +KUBEFLOW_JOB_DIRS = os.path.join(get_nemorun_home(), ".kubeflow_jobs.json") + +KUBEFLOW_STATES: dict[Optional[KubeflowJobState], AppState] = { + KubeflowJobState.CREATED: AppState.SUBMITTED, + KubeflowJobState.RUNNING: AppState.RUNNING, + KubeflowJobState.SUCCEEDED: AppState.SUCCEEDED, + KubeflowJobState.FAILED: AppState.FAILED, + KubeflowJobState.UNKNOWN: AppState.PENDING, + None: AppState.PENDING, +} + +log = logging.getLogger(__name__) + + +@dataclass +class KubeflowJobRequest: + """Wrapper around the TorchX AppDef and the KubeflowExecutor.""" + + app: AppDef + executor: KubeflowExecutor + cmd: list[str] + name: str + + +class KubeflowScheduler(SchedulerMixin, Scheduler[dict[str, str]]): # type: ignore + def __init__(self, session_name: str) -> None: + super().__init__("kubeflow", session_name) + + def _run_opts(self) -> runopts: + opts = runopts() + opts.add( + "job_dir", + type_=str, + help="The directory to place the job code and outputs.", + ) + return opts + + def _submit_dryrun( # type: ignore + self, + app: AppDef, + cfg: Executor, + ) -> AppDryRunInfo[KubeflowJobRequest]: + assert isinstance(cfg, KubeflowExecutor), ( + f"{cfg.__class__} not supported for Kubeflow scheduler." + ) + executor = cfg + assert len(app.roles) == 1, "Only single-role apps are supported." + role = app.roles[0] + values = cfg.macro_values() + if values: + role = values.apply(role) + + cmd = [role.entrypoint] + role.args + + # When workdir_pvc is configured, materialise a launch.sh from the + # Jinja2 template (env vars + training command) and point the job at + # it so torchrun / launcher details stay out of the manifest. + if executor.workdir_pvc and getattr(executor, "job_dir", None): + # Rewrite any local workdir_local_path references in the cmd to + # their pod-side equivalents under workdir_pvc_path, so users can + # pass run.Script(path=) and the pod sees the synced path. + if executor.workdir_local_path: + local_prefix = executor.workdir_local_path.rstrip(os.sep) + pod_prefix = executor.workdir_pvc_path.rstrip("/") + cmd = [c.replace(local_prefix, pod_prefix) for c in cmd] + executor.materialize_launch_script(cmd) + cmd = ["/bin/bash", f"{executor.workdir_pvc_path}/launch.sh"] + + req = KubeflowJobRequest(app=app, executor=executor, cmd=cmd, name=role.name) + + return AppDryRunInfo( + req, + lambda r: f"KubeflowJob for app: {r.app.name}, cmd: {' '.join(r.cmd)}", + ) + + def schedule(self, dryrun_info: AppDryRunInfo[KubeflowJobRequest]) -> str: + req = dryrun_info.request + executor = req.executor + + executor.package(executor.packager, job_name=executor.job_name) + + job_name, status = executor.launch(name=req.name, cmd=req.cmd) + if not job_name: + raise RuntimeError("Failed scheduling run on Kubeflow: no job_name returned") + + role_name = req.app.roles[0].name + experiment_id = getattr(executor, "experiment_id", "kubeflow_experiment") + app_id = f"{experiment_id}___{role_name}___{job_name}" + + _save_job_dir(app_id, job_status=status.value, executor=executor, job_name=job_name) + return app_id + + def describe(self, app_id: str) -> Optional[DescribeAppResponse]: + stored_data = _get_job_dirs() + job_info = stored_data.get(app_id) + parts = app_id.split("___") + role_name = parts[1] if len(parts) > 1 else app_id + roles = [Role(name=role_name, image="", num_replicas=1)] + roles_statuses = [ + RoleStatus( + role_name, + replicas=[ + ReplicaStatus(id=0, role=role_name, state=AppState.SUBMITTED, hostname="") + ], + ) + ] + + if not job_info: + return None + + executor: KubeflowExecutor = job_info.get("executor", None) # type: ignore + if not executor: + return None + + # Use stored job_name to avoid re-splitting app_id (handles role names with '___') + job_name = job_info.get("job_name") or parts[-1] + kf_state = executor.status(job_name) + app_state = KUBEFLOW_STATES.get(kf_state, AppState.PENDING) + roles_statuses[0].replicas[0].state = app_state + + return DescribeAppResponse( + app_id=app_id, + roles=roles, + roles_statuses=roles_statuses, + state=app_state, + msg="", + ) + + def log_iter( + self, + app_id: str, + role_name: str, + k: int = 0, + regex: Optional[str] = None, + since: Optional[datetime] = None, + until: Optional[datetime] = None, + should_tail: bool = False, + streams: Optional[Stream] = None, + ) -> Iterable[str]: + stored_data = _get_job_dirs() + job_info = stored_data.get(app_id) + if not job_info: + return [] + job_name = job_info.get("job_name") or app_id.split("___")[-1] + executor: Optional[KubeflowExecutor] = job_info.get("executor", None) # type: ignore + if not executor: + return [] + + logs = executor.fetch_logs(job_name=job_name, stream=should_tail) + if isinstance(logs, str): + if len(logs) == 0: + logs = [] + else: + logs = split_lines(logs) + + return logs + + def _cancel_existing(self, app_id: str) -> None: + stored_data = _get_job_dirs() + job_info = stored_data.get(app_id) + if not job_info: + return None + job_name = job_info.get("job_name") or app_id.split("___")[-1] + executor: KubeflowExecutor = job_info.get("executor", None) # type: ignore + if not executor: + return None + executor.cancel(job_name) + + def list(self) -> list[ListAppResponse]: + return [] + + def _validate(self, app: AppDef, scheduler: str) -> None: + pass + + +def create_scheduler(session_name: str, **kwargs: Any) -> KubeflowScheduler: + return KubeflowScheduler(session_name=session_name) + + +def _save_job_dir( + app_id: str, job_status: str, executor: KubeflowExecutor, job_name: str = "" +) -> None: + original_apps = {} + job_dirs_path = os.path.join(get_nemorun_home(), ".kubeflow_jobs.json") + os.makedirs(os.path.dirname(job_dirs_path), exist_ok=True) + if not os.path.isfile(job_dirs_path): + Path(job_dirs_path).touch() + + serializer = ZlibJSONSerializer() + with open(job_dirs_path, "r+") as f: + try: + original_apps = json.load(f) + except Exception: + original_apps = {} + + app = { + "job_status": job_status, + "job_name": job_name, + "executor": serializer.serialize( + fdl_dc.convert_dataclasses_to_configs(executor, allow_post_init=True) + ), + } + original_apps[app_id] = app + + with tempfile.NamedTemporaryFile(mode="w+", delete=False) as fp: + json.dump(original_apps, fp) + temp_path = fp.name + + f.close() + shutil.move(temp_path, job_dirs_path) + + +def _get_job_dirs() -> dict[str, dict[str, Any]]: + job_dirs_path = os.path.join(get_nemorun_home(), ".kubeflow_jobs.json") + if not os.path.isfile(job_dirs_path): + return {} + with open(job_dirs_path, "r") as f: + data = json.load(f) + + serializer = ZlibJSONSerializer() + for app in data.values(): + try: + cfg = serializer.deserialize(app["executor"]) + # Backwards compat: migrate renamed field nproc_per_node → nprocs_per_node + try: + val = cfg.nproc_per_node + del cfg.nproc_per_node + cfg.nprocs_per_node = val + except AttributeError: + pass + app["executor"] = fdl.build(cfg) + except Exception as e: + log.debug("Failed to deserialize executor: %s", e) + continue + + return data diff --git a/pyproject.toml b/pyproject.toml index 4e0d00f9..58fa7c9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ docker_persistent = "nemo_run.run.torchx_backend.schedulers.docker:create_schedu dgx_cloud = "nemo_run.run.torchx_backend.schedulers.dgxcloud:create_scheduler" lepton = "nemo_run.run.torchx_backend.schedulers.lepton:create_scheduler" skypilot_jobs = "nemo_run.run.torchx_backend.schedulers.skypilot_jobs:create_scheduler" +kubeflow = "nemo_run.run.torchx_backend.schedulers.kubeflow:create_scheduler" [project.optional-dependencies] skypilot = [ @@ -60,6 +61,9 @@ ray = [ "kubernetes", "ray[default]>=2.49.2", ] +kubeflow = [ + "kubernetes", +] [dependency-groups] dev = [ diff --git a/test/core/execution/test_kubeflow.py b/test/core/execution/test_kubeflow.py new file mode 100644 index 00000000..9c3b2b95 --- /dev/null +++ b/test/core/execution/test_kubeflow.py @@ -0,0 +1,749 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest.mock import MagicMock, patch + +import pytest +from kubernetes.client.rest import ApiException + +from nemo_run.core.execution.kubeflow import KubeflowExecutor, KubeflowJobState + + +class TestKubeflowExecutor: + @pytest.fixture + def mock_k8s_clients(self): + with ( + patch("nemo_run.core.execution.kubeflow.config.load_kube_config"), + patch("nemo_run.core.execution.kubeflow.client.CustomObjectsApi") as mock_custom, + patch("nemo_run.core.execution.kubeflow.client.CoreV1Api") as mock_core, + ): + yield mock_custom.return_value, mock_core.return_value + + @pytest.fixture + def executor(self, mock_k8s_clients): + return KubeflowExecutor( + image="nvcr.io/nvidian/nemo:nightly", + num_nodes=3, + gpus_per_node=8, + ) + + # ── Initialization ────────────────────────────────────────────────────────── + + def test_executor_defaults(self, executor): + assert executor.namespace == "default" + assert executor.restart_policy == "OnFailure" + assert executor.nprocs_per_node is None # unset; resolved at manifest build time + assert executor.job_kind == "PyTorchJob" + + def test_invalid_job_kind(self, mock_k8s_clients): + with pytest.raises(ValueError, match="job_kind must be"): + KubeflowExecutor(image="test:latest", job_kind="InvalidKind") + + def test_kubeconfig_fallback_to_incluster(self): + with ( + patch("nemo_run.core.execution.kubeflow.config.load_kube_config") as mock_load, + patch( + "nemo_run.core.execution.kubeflow.config.load_incluster_config" + ) as mock_incluster, + patch("nemo_run.core.execution.kubeflow.client.CustomObjectsApi"), + patch("nemo_run.core.execution.kubeflow.client.CoreV1Api"), + ): + mock_load.side_effect = Exception("no kubeconfig") + KubeflowExecutor(image="test:latest") + mock_incluster.assert_called_once() + + def test_kubeconfig_both_fail_raises(self): + with ( + patch("nemo_run.core.execution.kubeflow.config.load_kube_config") as mock_load, + patch( + "nemo_run.core.execution.kubeflow.config.load_incluster_config" + ) as mock_incluster, + patch("nemo_run.core.execution.kubeflow.client.CustomObjectsApi"), + patch("nemo_run.core.execution.kubeflow.client.CoreV1Api"), + ): + mock_load.side_effect = Exception("no kubeconfig") + mock_incluster.side_effect = Exception("not in cluster") + with pytest.raises(Exception, match="no kubeconfig"): + KubeflowExecutor(image="test:latest") + + def test_nnodes(self, executor): + assert executor.nnodes() == 3 # num_nodes=3 total + + def test_nproc_per_node_explicit(self, mock_k8s_clients): + e = KubeflowExecutor(image="test:latest", nprocs_per_node=4) + assert e.nproc_per_node() == 4 + + def test_nproc_per_node_defaults_to_gpus(self, mock_k8s_clients): + e = KubeflowExecutor(image="test:latest", gpus_per_node=8) + assert e.nproc_per_node() == 8 + + def test_nproc_per_node_defaults_to_1_when_no_gpu(self, mock_k8s_clients): + e = KubeflowExecutor(image="test:latest") + assert e.nproc_per_node() == 1 + + def test_assign(self, executor): + executor.assign("exp-1", "/tmp/exp", "task-0", "task-0") + assert executor.experiment_id == "exp-1" + assert executor.experiment_dir == "/tmp/exp" + assert executor.job_dir == "/tmp/exp/task-0" + + # ── PyTorchJob manifest generation ────────────────────────────────────────── + + def test_get_job_body_structure(self, executor): + body = executor.get_job_body("my-job", ["/bin/bash", "-c", "echo hi"]) + assert body["apiVersion"] == "kubeflow.org/v1" + assert body["kind"] == "PyTorchJob" + assert body["metadata"]["name"] == "my-job" + spec = body["spec"] + assert spec["nprocPerNode"] == "8" # defaults to gpus_per_node + assert "Master" in spec["pytorchReplicaSpecs"] + assert "Worker" in spec["pytorchReplicaSpecs"] + assert spec["pytorchReplicaSpecs"]["Master"]["replicas"] == 1 + assert spec["pytorchReplicaSpecs"]["Worker"]["replicas"] == 2 + + def test_get_job_body_resources(self, executor): + executor.cpu_requests = "16" + executor.memory_requests = "64Gi" + body = executor.get_job_body("my-job", ["python", "train.py"]) + container = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"]["containers"][ + 0 + ] + resources = container["resources"] + assert resources["limits"]["nvidia.com/gpu"] == "8" + assert resources["requests"]["cpu"] == "16" + assert resources["requests"]["memory"] == "64Gi" + + def test_get_job_body_no_gpu(self, mock_k8s_clients): + e = KubeflowExecutor(image="test:latest", gpus_per_node=None) + body = e.get_job_body("cpu-job", ["python", "train.py"]) + container = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"]["containers"][ + 0 + ] + resources = container.get("resources", {}) + limits = resources.get("limits", {}) + requests = resources.get("requests", {}) + assert "nvidia.com/gpu" not in limits + assert "nvidia.com/gpu" not in requests + + def test_get_job_body_volumes(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + volumes=[{"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}}], + volume_mounts=[{"name": "data", "mountPath": "/data"}], + ) + body = e.get_job_body("vol-job", ["echo", "hi"]) + spec = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"] + assert spec["volumes"] == [ + {"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}} + ] + container = spec["containers"][0] + assert container["volumeMounts"] == [{"name": "data", "mountPath": "/data"}] + + def test_get_job_body_env_vars(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + env_vars={"MY_VAR": "hello", "OTHER": "world"}, + ) + body = e.get_job_body("env-job", ["echo"]) + container = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"]["containers"][ + 0 + ] + env_names = {item["name"]: item["value"] for item in container["env"]} + assert env_names["MY_VAR"] == "hello" + assert env_names["OTHER"] == "world" + + def test_get_job_body_labels_annotations(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + labels={"app": "my-app"}, + annotations={"note": "test"}, + ) + body = e.get_job_body("labeled-job", ["echo"]) + assert body["metadata"]["labels"] == {"app": "my-app"} + assert body["metadata"]["annotations"] == {"note": "test"} + pod_meta = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["metadata"] + assert pod_meta["labels"] == {"app": "my-app"} + + def test_get_job_body_image_pull_secrets(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + image_pull_secrets=["my-secret", "other-secret"], + ) + body = e.get_job_body("secret-job", ["echo"]) + pod_spec = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"] + assert pod_spec["imagePullSecrets"] == [ + {"name": "my-secret"}, + {"name": "other-secret"}, + ] + + def test_get_job_body_spec_kwargs(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + spec_kwargs={"elasticPolicy": {"maxRestarts": 3}}, + ) + body = e.get_job_body("spec-job", ["echo"]) + assert body["spec"]["elasticPolicy"] == {"maxRestarts": 3} + + def test_get_job_body_container_kwargs(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + container_kwargs={"securityContext": {"runAsUser": 1000}}, + ) + body = e.get_job_body("ckwargs-job", ["echo"]) + container = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"]["containers"][ + 0 + ] + assert container["securityContext"] == {"runAsUser": 1000} + + def test_get_job_body_artifact(self, mock_k8s_clients): + e = KubeflowExecutor( + image="nvcr.io/nvidian/nemo:nightly", + namespace="runai-nemo-ci", + num_nodes=3, + nprocs_per_node=8, + gpus_per_node=8, + cpu_requests="16", + memory_requests="64Gi", + volumes=[{"name": "model-cache", "persistentVolumeClaim": {"claimName": "my-pvc"}}], + volume_mounts=[{"name": "model-cache", "mountPath": "/nemo-workspace"}], + labels={"app": "nemo-ci-training"}, + ) + body = e.get_job_body("nemo-ci-training", ["/bin/bash", "-c", "echo hi"]) + + assert body["apiVersion"] == "kubeflow.org/v1" + assert body["kind"] == "PyTorchJob" + assert body["metadata"]["name"] == "nemo-ci-training" + assert body["metadata"]["namespace"] == "runai-nemo-ci" + spec = body["spec"] + assert spec["nprocPerNode"] == "8" + master = spec["pytorchReplicaSpecs"]["Master"] + worker = spec["pytorchReplicaSpecs"]["Worker"] + assert master["replicas"] == 1 + assert worker["replicas"] == 2 + for replica in [master, worker]: + container = replica["template"]["spec"]["containers"][0] + assert container["image"] == "nvcr.io/nvidian/nemo:nightly" + assert container["resources"]["limits"]["nvidia.com/gpu"] == "8" + assert container["resources"]["requests"]["cpu"] == "16" + assert container["resources"]["requests"]["memory"] == "64Gi" + + # ── TrainJob manifest generation ───────────────────────────────────────────── + + def test_get_trainjob_body_structure(self, mock_k8s_clients): + e = KubeflowExecutor( + image="nvcr.io/nvidian/nemo:nightly", + job_kind="TrainJob", + num_nodes=2, + gpus_per_node=8, + ) + body = e.get_job_body("my-trainjob", ["python", "train.py"]) + assert body["apiVersion"] == "trainer.kubeflow.org/v1alpha1" + assert body["kind"] == "TrainJob" + assert body["metadata"]["name"] == "my-trainjob" + spec = body["spec"] + assert spec["runtimeRef"] == {"name": "torch-distributed"} + trainer = spec["trainer"] + assert trainer["numNodes"] == 2 + assert trainer["numProcPerNode"] == 8 # defaults to gpus_per_node, int not str + assert trainer["image"] == "nvcr.io/nvidian/nemo:nightly" + assert trainer["command"] == ["python", "train.py"] + + def test_get_trainjob_body_resources(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + job_kind="TrainJob", + gpus_per_node=4, + cpu_requests="8", + memory_requests="32Gi", + ) + body = e.get_job_body("res-job", ["echo"]) + resources = body["spec"]["trainer"]["resourcesPerNode"] + assert resources["limits"]["nvidia.com/gpu"] == "4" + assert resources["requests"]["cpu"] == "8" + assert resources["requests"]["memory"] == "32Gi" + + def test_get_trainjob_body_custom_runtime_ref(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + job_kind="TrainJob", + runtime_ref="my-custom-runtime", + ) + body = e.get_job_body("rt-job", ["echo"]) + assert body["spec"]["runtimeRef"] == {"name": "my-custom-runtime"} + + def test_get_trainjob_body_no_resources_when_no_gpu(self, mock_k8s_clients): + e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") + body = e.get_job_body("cpu-job", ["echo"]) + assert "resourcesPerNode" not in body["spec"]["trainer"] + + def test_get_trainjob_body_volumes_via_pod_template_overrides(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + job_kind="TrainJob", + volumes=[{"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}}], + volume_mounts=[{"name": "data", "mountPath": "/data"}], + ) + body = e.get_job_body("vol-job", ["echo"]) + overrides = body["spec"]["podTemplateOverrides"] + assert len(overrides) == 1 + assert overrides[0]["targetJobs"] == [{"name": "node"}] + pod_spec = overrides[0]["spec"] + assert pod_spec["volumes"] == [ + {"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}} + ] + containers = pod_spec["containers"] + assert containers[0]["name"] == "node" + assert containers[0]["volumeMounts"] == [{"name": "data", "mountPath": "/data"}] + + def test_get_trainjob_body_image_pull_secrets_via_pod_template_overrides( + self, mock_k8s_clients + ): + e = KubeflowExecutor( + image="test:latest", + job_kind="TrainJob", + image_pull_secrets=["my-secret"], + ) + body = e.get_job_body("secret-job", ["echo"]) + pod_spec = body["spec"]["podTemplateOverrides"][0]["spec"] + assert pod_spec["imagePullSecrets"] == [{"name": "my-secret"}] + + def test_get_trainjob_body_no_overrides_when_no_volumes(self, mock_k8s_clients): + e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") + body = e.get_job_body("plain-job", ["echo"]) + assert "podTemplateOverrides" not in body["spec"] + + def test_get_trainjob_body_tolerations_and_affinity(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + job_kind="TrainJob", + tolerations=[{"key": "nvidia.com/gpu", "operator": "Exists", "effect": "NoSchedule"}], + affinity={"nodeAffinity": {"requiredDuringSchedulingIgnoredDuringExecution": {}}}, + ) + body = e.get_job_body("tol-job", ["echo"]) + pod_spec = body["spec"]["podTemplateOverrides"][0]["spec"] + assert pod_spec["tolerations"] == [ + {"key": "nvidia.com/gpu", "operator": "Exists", "effect": "NoSchedule"} + ] + assert "nodeAffinity" in pod_spec["affinity"] + + def test_get_trainjob_body_env_list(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + job_kind="TrainJob", + env_vars={"SIMPLE": "value"}, + env_list=[ + { + "name": "SECRET_KEY", + "valueFrom": {"secretKeyRef": {"name": "my-secret", "key": "key"}}, + } + ], + ) + body = e.get_job_body("env-job", ["echo"]) + env = body["spec"]["trainer"]["env"] + env_by_name = {e["name"]: e for e in env} + assert env_by_name["SIMPLE"]["value"] == "value" + assert "valueFrom" in env_by_name["SECRET_KEY"] + + def test_get_trainjob_body_pod_spec_overrides(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + job_kind="TrainJob", + pod_spec_overrides={ + "resourceClaims": [ + {"name": "imex-channel", "resourceClaimTemplateName": "my-template"} + ] + }, + ) + body = e.get_job_body("rc-job", ["echo"]) + pod_spec = body["spec"]["podTemplateOverrides"][0]["spec"] + assert pod_spec["resourceClaims"][0]["name"] == "imex-channel" + + def test_get_trainjob_body_all_overrides_in_single_entry(self, mock_k8s_clients): + # volumes, tolerations, affinity, imagePullSecrets, pod_spec_overrides + # must all land in ONE podTemplateOverrides entry, not multiple. + e = KubeflowExecutor( + image="test:latest", + job_kind="TrainJob", + volumes=[{"name": "data", "emptyDir": {}}], + tolerations=[{"key": "gpu", "operator": "Exists"}], + image_pull_secrets=["my-secret"], + pod_spec_overrides={"resourceClaims": [{"name": "imex"}]}, + ) + body = e.get_job_body("merged-job", ["echo"]) + overrides = body["spec"]["podTemplateOverrides"] + assert len(overrides) == 1 + pod_spec = overrides[0]["spec"] + assert "volumes" in pod_spec + assert "tolerations" in pod_spec + assert "imagePullSecrets" in pod_spec + assert "resourceClaims" in pod_spec + + def test_get_pytorchjob_body_tolerations_and_affinity(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + tolerations=[{"key": "nvidia.com/gpu", "operator": "Exists"}], + affinity={"nodeAffinity": {}}, + ) + body = e.get_job_body("tol-job", ["echo"]) + pod_spec = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"] + assert pod_spec["tolerations"] == [{"key": "nvidia.com/gpu", "operator": "Exists"}] + assert "nodeAffinity" in pod_spec["affinity"] + + def test_get_pytorchjob_body_env_list(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + env_list=[ + { + "name": "SECRET", + "valueFrom": {"secretKeyRef": {"name": "s", "key": "k"}}, + } + ], + ) + body = e.get_job_body("env-job", ["echo"]) + container = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"]["containers"][ + 0 + ] + env_by_name = {e["name"]: e for e in container["env"]} + assert "valueFrom" in env_by_name["SECRET"] + + # ── Launch / status / cancel ───────────────────────────────────────────────── + + def test_launch_success(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.create_namespaced_custom_object.return_value = {} + + job_name, state = executor.launch("test-job", ["/bin/bash", "-c", "echo hi"]) + assert job_name == "test-job" + assert state == KubeflowJobState.CREATED + mock_custom.create_namespaced_custom_object.assert_called_once() + + def test_launch_wait_until_running(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.create_namespaced_custom_object.return_value = {} + mock_custom.get_namespaced_custom_object.side_effect = [ + {"status": {"conditions": [{"type": "Created", "status": "True"}]}}, + {"status": {"conditions": [{"type": "Running", "status": "True"}]}}, + ] + + with patch("time.sleep"): + job_name, state = executor.launch( + "test-job", ["/bin/bash", "-c", "echo hi"], wait=True, timeout=30 + ) + assert state == KubeflowJobState.RUNNING + + def test_launch_wait_timeout(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.create_namespaced_custom_object.return_value = {} + mock_custom.get_namespaced_custom_object.return_value = { + "status": {"conditions": [{"type": "Created", "status": "True"}]} + } + + with patch("time.sleep"): + with pytest.raises(RuntimeError, match="did not reach RUNNING"): + executor.launch("test-job", ["echo"], wait=True, timeout=-1) + + def test_launch_conflict(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.create_namespaced_custom_object.side_effect = ApiException(status=409) + + with pytest.raises(RuntimeError, match="already exists"): + executor.launch("test-job", ["/bin/bash", "-c", "echo hi"]) + + def test_status_running(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.get_namespaced_custom_object.return_value = { + "status": { + "conditions": [ + {"type": "Created", "status": "True"}, + {"type": "Running", "status": "True"}, + ] + } + } + assert executor.status("test-job") == KubeflowJobState.RUNNING + + def test_status_succeeded(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.get_namespaced_custom_object.return_value = { + "status": { + "conditions": [ + {"type": "Running", "status": "False"}, + {"type": "Succeeded", "status": "True"}, + ] + } + } + assert executor.status("test-job") == KubeflowJobState.SUCCEEDED + + def test_status_failed(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.get_namespaced_custom_object.return_value = { + "status": { + "conditions": [ + {"type": "Running", "status": "False"}, + {"type": "Failed", "status": "True"}, + ] + } + } + assert executor.status("test-job") == KubeflowJobState.FAILED + + def test_status_not_found(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.get_namespaced_custom_object.side_effect = ApiException(status=404) + assert executor.status("missing-job") is None + + def test_status_api_error(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.get_namespaced_custom_object.side_effect = ApiException(status=500) + assert executor.status("bad-job") is None + + def test_cancel(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.delete_namespaced_custom_object.return_value = {} + # Should not raise + executor.cancel("test-job") + mock_custom.delete_namespaced_custom_object.assert_called_once() + + def test_cancel_already_deleted(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.delete_namespaced_custom_object.side_effect = ApiException(status=404) + result = executor.cancel("gone-job") + assert result is None # handled gracefully + + def test_cancel_with_wait(self, executor, mock_k8s_clients): + mock_custom, mock_core = mock_k8s_clients + mock_custom.delete_namespaced_custom_object.return_value = {} + # CR is gone on first poll + mock_custom.get_namespaced_custom_object.side_effect = ApiException(status=404) + mock_core.list_namespaced_pod.return_value = MagicMock(items=[]) + + with patch("time.sleep"): + result = executor.cancel("test-job", wait=True, timeout=30, poll_interval=0) + assert result is True + + def test_cancel_with_wait_timeout(self, executor, mock_k8s_clients): + mock_custom, mock_core = mock_k8s_clients + mock_custom.delete_namespaced_custom_object.return_value = {} + # CR never disappears + mock_custom.get_namespaced_custom_object.return_value = {"metadata": {"name": "test-job"}} + + with patch("time.sleep"): + result = executor.cancel("test-job", wait=True, timeout=-1, poll_interval=0) + assert result is False + + # ── Logs ───────────────────────────────────────────────────────────────────── + + def test_fetch_logs_no_follow(self, executor, mock_k8s_clients): + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout="line1\nline2\n") + lines = list(executor.fetch_logs("my-job", stream=False, lines=50)) + + mock_run.assert_called_once() + called_cmd = mock_run.call_args[0][0] + assert "--tail" in called_cmd + assert "50" in called_cmd + label_arg = " ".join(called_cmd) + assert "training.kubeflow.org/job-name=my-job" in label_arg + assert "-f" not in called_cmd + assert lines == ["line1", "line2"] + + def test_fetch_logs_follow(self, executor, mock_k8s_clients): + import io + + mock_proc = MagicMock() + mock_proc.stdout = io.StringIO("line1\nline2\n") + mock_proc.poll.return_value = None # still running; loop exits when readline() hits EOF + + with patch("subprocess.Popen", return_value=mock_proc) as mock_popen: + lines = list(executor.fetch_logs("my-job", stream=True, lines=100)) + + mock_popen.assert_called_once() + called_cmd = mock_popen.call_args[0][0] + assert "-f" in called_cmd + assert lines == ["line1\n", "line2\n"] + + def test_fetch_logs_trainjob_label_selector(self, mock_k8s_clients): + e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(stdout="") + list(e.fetch_logs("my-trainjob", stream=False)) + + called_cmd = mock_run.call_args[0][0] + label_arg = " ".join(called_cmd) + assert "jobset.sigs.k8s.io/jobset-name=my-trainjob" in label_arg + + # ── TrainJob status (jobsStatus-based) ──────────────────────────────────── + + def test_trainjob_status_running(self, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") + mock_custom.get_namespaced_custom_object.return_value = { + "status": {"jobsStatus": [{"active": 2, "ready": 2, "succeeded": 0, "failed": 0}]} + } + assert e.status("test-job") == KubeflowJobState.RUNNING + + def test_trainjob_status_succeeded(self, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") + mock_custom.get_namespaced_custom_object.return_value = { + "status": {"jobsStatus": [{"active": 0, "ready": 0, "succeeded": 3, "failed": 0}]} + } + assert e.status("test-job") == KubeflowJobState.SUCCEEDED + + def test_trainjob_status_failed(self, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") + mock_custom.get_namespaced_custom_object.return_value = { + "status": {"jobsStatus": [{"active": 0, "ready": 0, "succeeded": 0, "failed": 1}]} + } + assert e.status("test-job") == KubeflowJobState.FAILED + + def test_trainjob_status_unknown_when_empty(self, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") + mock_custom.get_namespaced_custom_object.return_value = {"status": {}} + assert e.status("test-job") == KubeflowJobState.UNKNOWN + + # ── Workdir sync ────────────────────────────────────────────────────────── + + @pytest.fixture + def workdir_executor(self, mock_k8s_clients, tmp_path): + e = KubeflowExecutor( + image="test:latest", + workdir_pvc="my-pvc", + workdir_pvc_path="/nemo_run", + ) + e.job_dir = str(tmp_path) + return e + + def _make_watch_events(self, phase: str): + pod = MagicMock() + pod.status.phase = phase + return [{"object": pod}] + + def test_package_noop_without_workdir_pvc(self, mock_k8s_clients, tmp_path): + e = KubeflowExecutor(image="test:latest") + e.job_dir = str(tmp_path) + mock_custom, mock_core = mock_k8s_clients + e.package(MagicMock(), "test-job") + mock_core.create_namespaced_pod.assert_not_called() + + def test_package_syncs_to_pvc(self, workdir_executor, mock_k8s_clients): + _, mock_core = mock_k8s_clients + mock_core.create_namespaced_pod.return_value = MagicMock() + mock_core.delete_namespaced_pod.return_value = MagicMock() + mock_core.read_namespaced_pod.side_effect = ApiException(status=404) + + with ( + patch("kubernetes.watch.Watch") as mock_watch_cls, + patch("subprocess.check_call") as mock_check_call, + ): + mock_watch = MagicMock() + mock_watch_cls.return_value = mock_watch + mock_watch.stream.return_value = self._make_watch_events("Running") + + workdir_executor.package(MagicMock(), "test-job") + + mock_core.create_namespaced_pod.assert_called_once() + assert mock_check_call.call_count == 2 # mkdir + rsync + # workdir PVC auto-added to volumes/volume_mounts + assert any( + v.get("persistentVolumeClaim", {}).get("claimName") == "my-pvc" + for v in workdir_executor.volumes + ) + assert any(vm.get("mountPath") == "/nemo_run" for vm in workdir_executor.volume_mounts) + + def test_package_auto_add_volume_idempotent(self, workdir_executor, mock_k8s_clients): + """Calling package() twice should not duplicate volumes.""" + _, mock_core = mock_k8s_clients + mock_core.create_namespaced_pod.return_value = MagicMock() + mock_core.delete_namespaced_pod.return_value = MagicMock() + mock_core.read_namespaced_pod.side_effect = ApiException(status=404) + + with ( + patch("kubernetes.watch.Watch") as mock_watch_cls, + patch("subprocess.check_call"), + ): + mock_watch = MagicMock() + mock_watch_cls.return_value = mock_watch + mock_watch.stream.return_value = self._make_watch_events("Running") + workdir_executor.package(MagicMock(), "test-job") + workdir_executor.package(MagicMock(), "test-job") + + pvc_vols = [ + v + for v in workdir_executor.volumes + if v.get("persistentVolumeClaim", {}).get("claimName") == "my-pvc" + ] + assert len(pvc_vols) == 1 + + def test_pull_results_syncs_from_pvc(self, workdir_executor, mock_k8s_clients): + _, mock_core = mock_k8s_clients + mock_core.create_namespaced_pod.return_value = MagicMock() + mock_core.delete_namespaced_pod.return_value = MagicMock() + mock_core.read_namespaced_pod.side_effect = ApiException(status=404) + + with ( + patch("kubernetes.watch.Watch") as mock_watch_cls, + patch("subprocess.check_call") as mock_check_call, + ): + mock_watch = MagicMock() + mock_watch_cls.return_value = mock_watch + mock_watch.stream.return_value = self._make_watch_events("Running") + + workdir_executor.pull_results("test-job") + + mock_core.create_namespaced_pod.assert_called_once() + assert mock_check_call.call_count == 1 # kubectl cp only (no mkdir for pull) + cp_args = mock_check_call.call_args[0][0] + # kubectl cp /: + assert "kubectl" in cp_args + assert "cp" in cp_args + assert "test-job-data-mover:/nemo_run" in cp_args + + def test_pull_results_noop_without_workdir_pvc(self, mock_k8s_clients): + e = KubeflowExecutor(image="test:latest") + _, mock_core = mock_k8s_clients + e.pull_results("test-job") + mock_core.create_namespaced_pod.assert_not_called() + + def test_data_mover_pod_inherits_tolerations_affinity_pull_secrets( + self, mock_k8s_clients, tmp_path + ): + _, mock_core = mock_k8s_clients + mock_core.create_namespaced_pod.return_value = MagicMock() + mock_core.delete_namespaced_pod.return_value = MagicMock() + mock_core.read_namespaced_pod.side_effect = ApiException(status=404) + + e = KubeflowExecutor( + image="test:latest", + workdir_pvc="my-pvc", + workdir_pvc_path="/nemo_run", + tolerations=[{"key": "gpu", "operator": "Exists"}], + affinity={"nodeAffinity": {"key": "val"}}, + image_pull_secrets=["my-secret"], + ) + e.job_dir = str(tmp_path) + + with ( + patch("kubernetes.watch.Watch") as mock_watch_cls, + patch("subprocess.check_call"), + ): + mock_watch_cls.return_value.stream.return_value = self._make_watch_events("Running") + e.package(MagicMock(), "test-job") + + pod_body = mock_core.create_namespaced_pod.call_args[1]["body"] + spec = pod_body["spec"] + assert spec["tolerations"] == [{"key": "gpu", "operator": "Exists"}] + assert spec["affinity"] == {"nodeAffinity": {"key": "val"}} + assert spec["imagePullSecrets"] == [{"name": "my-secret"}] diff --git a/test/run/torchx_backend/schedulers/test_kubeflow.py b/test/run/torchx_backend/schedulers/test_kubeflow.py new file mode 100644 index 00000000..209335fb --- /dev/null +++ b/test/run/torchx_backend/schedulers/test_kubeflow.py @@ -0,0 +1,289 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock +from unittest.mock import MagicMock, patch + +import pytest +from torchx.schedulers.api import AppDryRunInfo +from torchx.specs import AppDef, AppState, Role + +from nemo_run.core.execution.kubeflow import KubeflowExecutor, KubeflowJobState +from nemo_run.run.torchx_backend.schedulers.kubeflow import ( + KUBEFLOW_STATES, + KubeflowScheduler, + create_scheduler, +) + + +@pytest.fixture +def mock_k8s(): + with ( + patch("nemo_run.core.execution.kubeflow.config.load_kube_config"), + patch("nemo_run.core.execution.kubeflow.client.CustomObjectsApi") as mock_custom, + patch("nemo_run.core.execution.kubeflow.client.CoreV1Api") as mock_core, + ): + yield mock_custom.return_value, mock_core.return_value + + +@pytest.fixture +def executor(mock_k8s, tmp_path): + e = KubeflowExecutor( + image="nvcr.io/nvidian/nemo:nightly", + num_nodes=3, + gpus_per_node=8, + ) + e.experiment_id = "test_exp" + e.job_dir = str(tmp_path) + e.experiment_dir = str(tmp_path) + e.job_name = "test_role" + return e + + +@pytest.fixture +def scheduler(): + return create_scheduler(session_name="test") + + +@pytest.fixture +def mock_app_def(): + return AppDef( + name="test_app", + roles=[ + Role( + name="test_role", + image="nvcr.io/nvidian/nemo:nightly", + entrypoint="python", + args=["train.py"], + ) + ], + ) + + +# ── Scheduler lifecycle ─────────────────────────────────────────────────────── + + +def test_create_scheduler(): + s = create_scheduler(session_name="test") + assert isinstance(s, KubeflowScheduler) + assert s.session_name == "test" + + +def test_submit_dryrun(scheduler, mock_app_def, executor): + with mock.patch.object(KubeflowExecutor, "package") as mock_pkg: + mock_pkg.return_value = None + dryrun_info = scheduler._submit_dryrun(mock_app_def, executor) + assert isinstance(dryrun_info, AppDryRunInfo) + assert dryrun_info.request is not None + + +def test_schedule(scheduler, mock_app_def, executor): + with ( + mock.patch.object(KubeflowExecutor, "package") as mock_pkg, + mock.patch.object(KubeflowExecutor, "launch") as mock_launch, + ): + mock_pkg.return_value = None + mock_launch.return_value = ("test-job", KubeflowJobState.CREATED) + + dryrun_info = scheduler._submit_dryrun(mock_app_def, executor) + app_id = scheduler.schedule(dryrun_info) + + assert app_id == "test_exp___test_role___test-job" + mock_pkg.assert_called_once() + mock_launch.assert_called_once() + + +# ── State mapping ───────────────────────────────────────────────────────────── + + +def test_describe_running(scheduler, executor): + with mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs: + mock_dirs.return_value = { + "test_exp___test_role___test-job": { + "job_status": "Created", + "job_name": "test-job", + "executor": executor, + } + } + with mock.patch.object(KubeflowExecutor, "status", return_value=KubeflowJobState.RUNNING): + resp = scheduler.describe("test_exp___test_role___test-job") + assert resp is not None + assert resp.state == AppState.RUNNING + + +def test_describe_succeeded(scheduler, executor): + with mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs: + mock_dirs.return_value = { + "test_exp___test_role___test-job": { + "job_status": "Created", + "job_name": "test-job", + "executor": executor, + } + } + with mock.patch.object(KubeflowExecutor, "status", return_value=KubeflowJobState.SUCCEEDED): + resp = scheduler.describe("test_exp___test_role___test-job") + assert resp.state == AppState.SUCCEEDED + + +def test_describe_failed(scheduler, executor): + with mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs: + mock_dirs.return_value = { + "test_exp___test_role___test-job": { + "job_status": "Created", + "job_name": "test-job", + "executor": executor, + } + } + with mock.patch.object(KubeflowExecutor, "status", return_value=KubeflowJobState.FAILED): + resp = scheduler.describe("test_exp___test_role___test-job") + assert resp.state == AppState.FAILED + + +def test_describe_unknown_maps_to_pending(scheduler, executor): + # None status (transient error) must not become FAILED — avoids false failures + with mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs: + mock_dirs.return_value = { + "test_exp___test_role___test-job": { + "job_status": "Created", + "job_name": "test-job", + "executor": executor, + } + } + with mock.patch.object(KubeflowExecutor, "status", return_value=None): + resp = scheduler.describe("test_exp___test_role___test-job") + assert resp.state == AppState.PENDING + + +def test_describe_uses_stored_job_id_not_split(scheduler, executor): + # Regression: role names containing '___' must not corrupt app_id parsing. + real_job_name = "real-job-abc123" + app_id = f"experiment___role_name___{real_job_name}" + + with ( + mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs, + mock.patch.object( + KubeflowExecutor, "status", return_value=KubeflowJobState.RUNNING + ) as mock_status, + ): + mock_dirs.return_value = { + app_id: { + "job_status": "Created", + "job_name": real_job_name, + "executor": executor, + } + } + resp = scheduler.describe(app_id) + + assert resp is not None + mock_status.assert_called_once_with(real_job_name) + + +# ── Cancel / logs ───────────────────────────────────────────────────────────── + + +def test_cancel_existing(scheduler, executor): + with ( + mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs, + mock.patch.object(KubeflowExecutor, "cancel") as mock_cancel, + ): + mock_dirs.return_value = { + "test_exp___test_role___test-job": { + "job_status": "Running", + "job_name": "test-job", + "executor": executor, + } + } + scheduler._cancel_existing("test_exp___test_role___test-job") + mock_cancel.assert_called_once_with("test-job") + + +def test_log_iter_list(scheduler, executor): + with mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs: + mock_dirs.return_value = { + "test_exp___test_role___test-job": { + "job_status": "Running", + "job_name": "test-job", + "executor": executor, + } + } + executor.fetch_logs = MagicMock(return_value=["log line 1", "log line 2"]) + + lines = list(scheduler.log_iter("test_exp___test_role___test-job", "test_role")) + assert lines == ["log line 1", "log line 2"] + + +def test_log_iter_str(scheduler, executor): + with mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs: + mock_dirs.return_value = { + "test_exp___test_role___test-job": { + "job_status": "Running", + "job_name": "test-job", + "executor": executor, + } + } + executor.fetch_logs = MagicMock(return_value="log line 1\nlog line 2") + + lines = list(scheduler.log_iter("test_exp___test_role___test-job", "test_role")) + assert "log line 1\n" in lines or "log line 1" in lines + + +# ── Persistence ─────────────────────────────────────────────────────────────── + + +def test_save_job_dir_new_file(executor, tmp_path): + from nemo_run.config import set_nemorun_home + + set_nemorun_home(str(tmp_path)) + + from nemo_run.run.torchx_backend.schedulers.kubeflow import _get_job_dirs, _save_job_dir + + _save_job_dir("my_app_id", job_status="Created", executor=executor, job_name="my-job") + dirs = _get_job_dirs() + assert "my_app_id" in dirs + assert dirs["my_app_id"]["job_name"] == "my-job" + assert isinstance(dirs["my_app_id"]["executor"], KubeflowExecutor) + + +def test_save_job_dir_existing_file(executor, tmp_path): + from nemo_run.config import set_nemorun_home + + set_nemorun_home(str(tmp_path)) + + from nemo_run.run.torchx_backend.schedulers.kubeflow import _get_job_dirs, _save_job_dir + + _save_job_dir("app_id_1", job_status="Created", executor=executor, job_name="job-1") + _save_job_dir("app_id_2", job_status="Running", executor=executor, job_name="job-2") + + dirs = _get_job_dirs() + assert "app_id_1" in dirs + assert "app_id_2" in dirs + + +def test_get_job_dirs_file_not_found(tmp_path): + from nemo_run.config import set_nemorun_home + + set_nemorun_home(str(tmp_path)) + + from nemo_run.run.torchx_backend.schedulers.kubeflow import _get_job_dirs + + result = _get_job_dirs() + assert result == {} + + +# ── State map ───────────────────────────────────────────────────────────────── + + +def test_unknown_state_maps_to_pending(): + assert KUBEFLOW_STATES[KubeflowJobState.UNKNOWN] == AppState.PENDING diff --git a/uv.lock b/uv.lock index 26812027..e3e7b833 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", @@ -110,7 +110,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohappyeyeballs" }, { name = "aiosignal" }, - { name = "async-timeout", marker = "python_full_version < '3.11' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "async-timeout", marker = "python_full_version < '3.11'" }, { name = "attrs" }, { name = "frozenlist" }, { name = "multidict" }, @@ -260,11 +260,11 @@ name = "anyio" version = "4.8.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, { name = "idna" }, { name = "sniffio" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.12.*' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.13' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.13' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.13' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.12.*' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a3/73/199a98fc2dae33535d6b8e8e6ec01f8c1d76c9adb096c6b7d64823038cde/anyio-4.8.0.tar.gz", hash = "sha256:1d9fe889df5212298c0c0723fa20479d1b94883a2df44bd3897aa91083316f7a", size = 181126, upload-time = "2025-01-05T13:13:11.095Z" } wheels = [ @@ -446,8 +446,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "azure-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/34/e8/6a1354d9fd22a84a83f009915598b823a7d9cb60e39cd28661b9c54d1121/azure_batch-15.0.0b1.tar.gz", hash = "sha256:dfbddd158ffade52193e3e4d86c996ea7236ffd2695a43734fae5e05a974e2ed", size = 896678, upload-time = "2024-09-19T22:29:31.336Z" } wheels = [ @@ -623,8 +623,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "requests" }, { name = "six" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/cc/ee/668328306a9e963a5ad9f152cd98c7adad86c822729fd1d2a01613ad1e67/azure_core-1.32.0.tar.gz", hash = "sha256:22b3c35d6b2dae14990f6c1be2912bf23ffe50b220e708a28ab1bb92b1c730e5", size = 279128, upload-time = "2024-10-31T17:45:17.528Z" } wheels = [ @@ -679,8 +679,8 @@ dependencies = [ { name = "cryptography" }, { name = "msal" }, { name = "msal-extensions" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ee/89/7d170fab0b85d9650cdb7abda087e849644beb52bd28f6804620dd0cecd9/azure_identity-1.20.0.tar.gz", hash = "sha256:40597210d56c83e15031b0fe2ea3b26420189e1e7f3e20bdbb292315da1ba014", size = 264447, upload-time = "2025-02-12T00:40:41.225Z" } wheels = [ @@ -695,8 +695,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/80/4f/b0d62738a6e3c8e27c3cc33400e8deb14d6490042180fc872c1cdbe891ac/azure-keyvault-administration-4.4.0b2.tar.gz", hash = "sha256:8d0edefad78024c3a97b071fa5cf50daf923085e9d4379259f7237d911e66810", size = 98067, upload-time = "2023-11-03T21:01:36.248Z" } wheels = [ @@ -711,8 +711,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/e6/cf/85d521e65557e4dee2cd9c700f518c3a46f6f71068e61c07d0b13b2e0727/azure-keyvault-certificates-4.7.0.zip", hash = "sha256:9e47d9a74825e502b13d5481c99c182040c4f54723f43371e00859436dfcf3ca", size = 533075, upload-time = "2023-03-16T21:52:21.956Z" } wheels = [ @@ -728,8 +728,8 @@ dependencies = [ { name = "azure-core" }, { name = "cryptography" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/50/f0/cc544f2ea8dc1a7ea9a1159ffb5b2b56b3fb86694fc565c87e5444a98718/azure-keyvault-keys-4.9.0b3.tar.gz", hash = "sha256:aa8b1ec9fe96a81106f2f3dcd61175ecae3a01693c05af15f4a45e77894e946a", size = 208992, upload-time = "2023-11-03T21:02:08.115Z" } wheels = [ @@ -744,8 +744,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/5c/a1/78ecabf98e97d600dcac1559ff64b4bc9f84eca126c0aeba859916832b0c/azure-keyvault-secrets-4.7.0.zip", hash = "sha256:77ee2534ba651a1f306c85d7b505bc3ccee8fea77450ebafafc26aec16e5445d", size = 423956, upload-time = "2023-03-16T21:52:57.183Z" } wheels = [ @@ -788,8 +788,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ac/38/8916327a19b106916ed950461eed816c53a7d8736990ddc6167a5738f161/azure_mgmt_appconfiguration-3.1.0.tar.gz", hash = "sha256:0596f09e7e7841be91dde1c818134100bbfa124486e06889d239dd587744b47c", size = 187092, upload-time = "2024-10-21T06:17:55.606Z" } wheels = [ @@ -930,8 +930,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/90/f2/a2b1391e9df876d7ef9086f8d41ad4666eafef921ae0c47da931f8cedb1a/azure-mgmt-compute-33.0.0.tar.gz", hash = "sha256:a3cc0fe4f09c8e1d3523c1bfb92620dfe263a0a893b0ac13a33d7057e9ddddd2", size = 5308187, upload-time = "2024-08-23T08:47:37.38Z" } wheels = [ @@ -946,8 +946,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/4a/0c/434063cc0dfd1a5f07e4517d6ffc9ffa6bdc6159019266402f61624129c6/azure_mgmt_containerinstance-10.2.0b1.tar.gz", hash = "sha256:bf4bb77bd6681270dd0a733aa3a7c3ecdfacba8e616d3a8c3b98cce9c48cc7c0", size = 79214, upload-time = "2024-10-21T07:07:15.846Z" } wheels = [ @@ -976,8 +976,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/31/b0/a5a2d6c2b2f9f7404f86732d400786d7cd996a155467f47ecaf786ce56d9/azure_mgmt_containerservice-34.1.0.tar.gz", hash = "sha256:637a6cf8f06636c016ad151d76f9c7ba75bd05d4334b3dd7837eb8b517f30dbe", size = 319609, upload-time = "2025-02-19T04:08:41.59Z" } wheels = [ @@ -1004,8 +1004,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/74/2a/3240e83aff38443d334a17467d32a46bab269164ab9477bb17d2277b32f8/azure_mgmt_cosmosdb-9.7.0.tar.gz", hash = "sha256:b5072d319f11953d8f12e22459aded1912d5f27e442e1d8b49596a85005410a1", size = 265676, upload-time = "2024-11-19T03:19:26.253Z" } wheels = [ @@ -1118,8 +1118,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/89/3c/04ce6c779c28d95a13e37cf00854a31472ef4b563d98361c50200180b8f2/azure-mgmt-hdinsight-9.0.0b3.tar.gz", hash = "sha256:72549e08ff3eed3d6e23835e73ece1cc32bdf340bdf8919e78916c352c200f64", size = 103516, upload-time = "2024-08-21T09:18:57.044Z" } wheels = [ @@ -1302,8 +1302,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/61/85/b86cb3e554d72a837f0c86caf9ed43c3462cce5d7ce1bb1114bfcd34745b/azure_mgmt_mysqlflexibleservers-1.0.0b3.tar.gz", hash = "sha256:611fd88f3db1e0a8477a1633fe94c461d17213e215170eb53c1eea9b823bd4c3", size = 96156, upload-time = "2024-11-18T06:10:12.832Z" } wheels = [ @@ -1332,8 +1332,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/19/a3/8d2fa6e33107354c8cd2abcca4e0f02138bda4c6024984ae5fce5cf23b27/azure_mgmt_network-28.1.0.tar.gz", hash = "sha256:8c84bffb5ec75c6e0244e58ecf07c00d5fc421d616b0cb369c6fe585af33cf87", size = 651528, upload-time = "2024-12-20T05:56:54.599Z" } wheels = [ @@ -1362,8 +1362,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/52/f2/7cc422a144074a30e88bd5d5ca8e12100ca2a90791fef82a1e962bea816f/azure_mgmt_postgresqlflexibleservers-1.1.0b2.tar.gz", hash = "sha256:f0eb026f275f97bf95ae82cd58e30a760fff2944a7f4a80fc285aaf8da070934", size = 122431, upload-time = "2024-12-16T07:44:34.269Z" } wheels = [ @@ -1434,8 +1434,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/01/a2/b89ba36f4bc2708a7ab0115b451028b8888184b3c19bd9a3ac71afec8941/azure-mgmt-redhatopenshift-1.5.0.tar.gz", hash = "sha256:51fb7429c39c88acc9fa273d9f89f19303520662996a6d7d8e1122a98f5f2527", size = 234247, upload-time = "2024-07-23T05:59:00.053Z" } wheels = [ @@ -1450,8 +1450,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/7c/e2/7e4895296df120458af54186d788cb43abb95676e0a075c154606b8772ab/azure_mgmt_redis-14.5.0.tar.gz", hash = "sha256:5c3434c82492688e25b93aaf5113ecff0b92b7ad6da2a4fd4695530f82b152fa", size = 87997, upload-time = "2025-01-20T09:10:41.814Z" } wheels = [ @@ -1508,8 +1508,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/9c/c3/92afefab2efcea35a605910e9aeb229a94907eec6b453566f333b8e5cdff/azure_mgmt_servicebus-8.2.1.tar.gz", hash = "sha256:d4e0024bef6c619c6a65f530865147d5645b01f76b12f8611c0ebb16ef16cf47", size = 535699, upload-time = "2024-11-05T06:33:13.837Z" } wheels = [ @@ -1538,8 +1538,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/4f/68/d707b2a7fc64cbb42d1e57a183b332dfe8746deca58577a78c4fe42b803e/azure_mgmt_servicefabricmanagedclusters-2.1.0b1.tar.gz", hash = "sha256:2b16b93c8446e13372e28b378f635da1ad2aa631d9547b31b9fa3b7bc56d0f63", size = 147216, upload-time = "2024-10-21T06:17:37.128Z" } wheels = [ @@ -1554,8 +1554,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/81/b2/747b748a16f934f65eec2c37fbab23144b63365483ab19436a921d42ae31/azure_mgmt_servicelinker-1.2.0b3.tar.gz", hash = "sha256:c51c111fb76c59e58fceccfecfd119f8c83e4d64fdca77a46b62d81ec6a3ea29", size = 73179, upload-time = "2024-10-11T05:57:41.92Z" } wheels = [ @@ -1584,8 +1584,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/75/2c/02dac3293cd65277314bb07305c98d7e1a4d0fefe9d4f9304f11f2315be0/azure_mgmt_sql-4.0.0b20.tar.gz", hash = "sha256:9a986a1d47ade008662fc694a116eb18e8dba289021d1dc4c7eba7a4eabb6903", size = 584891, upload-time = "2024-11-04T09:28:41.06Z" } wheels = [ @@ -1614,8 +1614,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ca/d2/f06af604fe54231f049c861dd1556495c95ad95620ed3b14337c3e164913/azure_mgmt_storage-22.1.0.tar.gz", hash = "sha256:727b8c8be4aca4551a9b921cdf76bb92b1e988d009de3b983ce72b7343b749e9", size = 370185, upload-time = "2025-02-19T06:11:15.368Z" } wheels = [ @@ -1658,8 +1658,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/7f/a4/a47081049ae17378b920518db566587c5691ed52c15802a2b418912081ad/azure-mgmt-web-7.3.1.tar.gz", hash = "sha256:87b771436bc99a7a8df59d0ad185b96879a06dce14764a06b3fc3dafa8fcb56b", size = 5283442, upload-time = "2024-08-20T03:23:38.733Z" } wheels = [ @@ -1673,8 +1673,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "azure-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ad/16/fd06cccfc583d8d38d8d99ee92ec1bbc9604cf6e8c62e64ddca5644e0a60/azure-monitor-query-1.2.0.zip", hash = "sha256:2c57432443f203069e64e500c7e958ca31650f641950515ffe65555ba134c371", size = 185223, upload-time = "2023-05-09T19:55:12.691Z" } wheels = [ @@ -1706,8 +1706,8 @@ dependencies = [ { name = "azure-core" }, { name = "cryptography" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/cb/96/6b881bb65de8638f00a010dd86e64ca9ebb0d973efcf1d15d9fd36d06977/azure_storage_blob-12.25.0b1.tar.gz", hash = "sha256:6ad34cf8535ea2a10124d8b2f79cc42028d7d9e1242bfbb4d86479522975eba6", size = 569965, upload-time = "2025-02-11T20:17:59.002Z" } wheels = [ @@ -1751,8 +1751,8 @@ dependencies = [ { name = "azure-common" }, { name = "azure-mgmt-core" }, { name = "isodate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/be/9a/32e71b8f048d0e7cf5b6df3d652c102368de40180834956fcc968fe0c1ff/azure_synapse_artifacts-0.20.0.tar.gz", hash = "sha256:3ed6c142faf62d3191a943b3222547f7730d4cbc10355d17d64fa77e0421644a", size = 447979, upload-time = "2025-02-24T06:32:58.475Z" } wheels = [ @@ -1869,8 +1869,8 @@ version = "4.13.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "soupsieve" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/f0/3c/adaf39ce1fb4afdd21b611e3d530b183bb7759c9b673d60db0e347fd4439/beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b", size = 619516, upload-time = "2025-02-04T20:05:01.681Z" } wheels = [ @@ -2075,7 +2075,7 @@ name = "cffi" version = "2.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pycparser", marker = "implementation_name != 'PyPy' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "pycparser", marker = "implementation_name != 'PyPy'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } wheels = [ @@ -2407,8 +2407,8 @@ name = "cryptography" version = "46.0.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cffi", marker = "platform_python_implementation != 'PyPy' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/60/04/ee2a9e8542e4fa2773b81771ff8349ff19cdd56b7258a0cc442639052edb/cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d", size = 750064, upload-time = "2026-02-10T19:18:38.255Z" } wheels = [ @@ -2565,7 +2565,7 @@ name = "docker" version = "7.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pywin32", marker = "sys_platform == 'win32' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, { name = "requests" }, { name = "urllib3" }, ] @@ -2679,8 +2679,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "starlette" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b5/28/c5d26e5860df807241909a961a37d45e10533acef95fc368066c7dd186cd/fastapi-0.115.11.tar.gz", hash = "sha256:cc81f03f688678b92600a65a5e618b93592c65005db37157147204d8924bf94f", size = 294441, upload-time = "2025-03-01T22:16:50.378Z" } wheels = [ @@ -2739,8 +2739,8 @@ dependencies = [ { name = "absl-py" }, { name = "graphviz" }, { name = "libcst" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/73/36/7a4fac76351619b36bbc7937abf59f7b601326dc4efc253b3c16819f782a/fiddle-0.3.0.tar.gz", hash = "sha256:5d083d3299a479868345513385a6c5546141bd92086c15d3dcbf8008a90075d3", size = 277884, upload-time = "2024-04-09T17:23:58.974Z" } wheels = [ @@ -3285,8 +3285,8 @@ dependencies = [ { name = "pyyaml" }, { name = "requests" }, { name = "tqdm" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/4b/9e/9366b7349fc125dd68b9d384a0fea84d67b7497753fe92c71b67e13f47c4/huggingface_hub-0.33.4.tar.gz", hash = "sha256:6af13478deae120e765bfd92adad0ae1aec1ad8c439b46f23058ad5956cbca0a", size = 426674, upload-time = "2025-07-11T12:32:48.694Z" } wheels = [ @@ -3488,8 +3488,7 @@ dependencies = [ { name = "pygments" }, { name = "stack-data" }, { name = "traitlets" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/13/18/1a60aa62e9d272fcd7e658a89e1c148da10e1a5d38edcbcd834b52ca7492/ipython-8.34.0.tar.gz", hash = "sha256:c31d658e754673ecc6514583e7dda8069e47136eb62458816b7d1e6625948b5a", size = 5508477, upload-time = "2025-03-08T13:43:17.591Z" } wheels = [ @@ -3925,8 +3924,8 @@ dependencies = [ { name = "ray" }, { name = "requests" }, { name = "rich" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, { name = "uvicorn" }, ] wheels = [ @@ -4326,7 +4325,7 @@ name = "multidict" version = "6.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/d6/be/504b89a5e9ca731cd47487e91c469064f8ae5af93b7259758dcfc2b9c848/multidict-6.1.0.tar.gz", hash = "sha256:22ae2ebf9b0c69d206c003e2f6a914ea33f0a932d4aa16f236afc049d9958f4a", size = 64002, upload-time = "2024-09-09T23:49:38.163Z" } wheels = [ @@ -4515,6 +4514,9 @@ dependencies = [ ] [package.optional-dependencies] +kubeflow = [ + { name = "kubernetes" }, +] ray = [ { name = "kubernetes" }, { name = "ray", extra = ["default"] }, @@ -4562,6 +4564,7 @@ requires-dist = [ { name = "fiddle", specifier = ">=0.3.0" }, { name = "inquirerpy", specifier = ">=0.3.4" }, { name = "jinja2", specifier = ">=3.1.4" }, + { name = "kubernetes", marker = "extra == 'kubeflow'" }, { name = "kubernetes", marker = "extra == 'ray'" }, { name = "leptonai", specifier = ">=0.26.6" }, { name = "networkx", specifier = ">=3.3" }, @@ -4575,7 +4578,7 @@ requires-dist = [ { name = "torchx", specifier = ">=0.7.0" }, { name = "typer", specifier = ">=0.12.3" }, ] -provides-extras = ["ray", "skypilot", "skypilot-all"] +provides-extras = ["kubeflow", "ray", "skypilot", "skypilot-all"] [package.metadata.requires-dev] dev = [ @@ -4800,8 +4803,8 @@ version = "1.38.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "importlib-metadata" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/08/d8/0f354c375628e048bd0570645b310797299754730079853095bf000fba69/opentelemetry_api-1.38.0.tar.gz", hash = "sha256:f4c193b5e8acb0912b06ac5b16321908dd0843d75049c091487322284a3eea12", size = 65242, upload-time = "2025-10-16T08:35:50.25Z" } wheels = [ @@ -4841,8 +4844,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, { name = "opentelemetry-semantic-conventions" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/85/cb/f0eee1445161faf4c9af3ba7b848cc22a50a3d3e2515051ad8628c35ff80/opentelemetry_sdk-1.38.0.tar.gz", hash = "sha256:93df5d4d871ed09cb4272305be4d996236eedb232253e3ab864c8620f051cebe", size = 171942, upload-time = "2025-10-16T08:36:02.257Z" } wheels = [ @@ -4855,8 +4858,8 @@ version = "0.59b0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/40/bc/8b9ad3802cd8ac6583a4eb7de7e5d7db004e89cb7efe7008f9c8a537ee75/opentelemetry_semantic_conventions-0.59b0.tar.gz", hash = "sha256:7a6db3f30d70202d5bf9fa4b69bc866ca6a30437287de6c510fb594878aed6b0", size = 129861, upload-time = "2025-10-16T08:36:03.346Z" } wheels = [ @@ -5641,8 +5644,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-types" }, { name = "pydantic-core" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, { name = "typing-inspection" }, ] sdist = { url = "https://files.pythonhosted.org/packages/31/3e/a1d12c6c5ad4429b3549ebdb8553f4cd0766acb22268c0dba02b4c4dc459/pydantic-2.11.0b1.tar.gz", hash = "sha256:47ea8082d748ee14f7be613787e32b76111112add84eb6e313cc47000c4cd4e8", size = 774826, upload-time = "2025-03-06T16:29:19.074Z" } @@ -5655,8 +5658,8 @@ name = "pydantic-core" version = "2.31.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/df/ce/f4c0e43c788ebb10a8b107c9774169091a0b8b3a62b3f76ca528d1e79bf0/pydantic_core-2.31.1.tar.gz", hash = "sha256:a9cc2f56cba2b78b487325ff3de016a70670b615eaf00cad88cb17f271e01971", size = 424824, upload-time = "2025-03-06T15:30:14.296Z" } wheels = [ @@ -5744,8 +5747,8 @@ version = "2.10.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/23/ed/69f3f3de12c02ebd58b2f66ffb73d0f5a1b10b322227897499753cebe818/pydantic_extra_types-2.10.2.tar.gz", hash = "sha256:934d59ab7a02ff788759c3a97bc896f5cfdc91e62e4f88ea4669067a73f14b98", size = 86893, upload-time = "2025-01-16T16:00:07.161Z" } wheels = [ @@ -5777,8 +5780,8 @@ dependencies = [ { name = "pygments" }, { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/00/20/bb50f9de3a6de69e6abd6b087b52fa2418a0418b19597601605f855ad044/pydata_sphinx_theme-0.16.1.tar.gz", hash = "sha256:a08b7f0b7f70387219dc659bff0893a7554d5eb39b59d3b8ef37b8401b7642d7", size = 2412693, upload-time = "2024-12-17T10:53:39.537Z" } wheels = [ @@ -5794,8 +5797,8 @@ dependencies = [ { name = "azure-identity" }, { name = "isodate" }, { name = "msrest" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ac/30/08d3b6f9c53dbd6937a101ec47cfd325ace2e451f65656616b2564d77d1c/pydo-0.9.2.tar.gz", hash = "sha256:eee51a6b122aa4d33575ef00f48221d299b9d37c57c41eb96654b9d662996864", size = 1543547, upload-time = "2025-02-26T20:51:40.496Z" } wheels = [ @@ -5905,8 +5908,8 @@ name = "pyre-extensions" version = "0.0.32" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, { name = "typing-inspect" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a7/53/5bc2532536e921c48366ad1047c1344ccef6afa5e84053f0f6e20a453767/pyre_extensions-0.0.32.tar.gz", hash = "sha256:5396715f14ea56c4d5fd0a88c57ca7e44faa468f905909edd7de4ad90ed85e55", size = 10852, upload-time = "2024-11-22T19:26:44.152Z" } @@ -6278,8 +6281,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, { name = "rpds-py" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.12.*' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.13' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.13' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.13' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.12.*' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744, upload-time = "2025-01-25T08:48:16.138Z" } wheels = [ @@ -6347,7 +6350,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ab/3a/0316b28d0761c6734d6bc14e770d85506c986c85ffb239e688eeaab2c2bc/rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098", size = 223149, upload-time = "2024-11-01T16:43:57.873Z" } wheels = [ @@ -6361,8 +6364,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "rich" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/5b/8a/71cfbf6bf6257ea785d1f030c22468f763eea1b3e5417620f2ba9abd6dca/rich_toolkit-0.13.2.tar.gz", hash = "sha256:fea92557530de7c28f121cbed572ad93d9e0ddc60c3ca643f1b831f2f56b95d3", size = 72288, upload-time = "2025-01-13T19:30:02.403Z" } wheels = [ @@ -6773,8 +6776,8 @@ dependencies = [ { name = "sqlalchemy" }, { name = "sqlalchemy-adapter" }, { name = "tabulate" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, { name = "uvicorn", extra = ["standard"], marker = "extra == 'extra-8-nemo-run-skypilot' or extra == 'extra-8-nemo-run-skypilot-all'" }, { name = "wheel" }, ] @@ -7060,8 +7063,8 @@ version = "2.0.41" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version >= '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version < '3.12' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/63/66/45b165c595ec89aa7dcc2c1cd222ab269bc753f1fc7a1e68f8481bd957bf/sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9", size = 9689424, upload-time = "2025-05-14T17:10:32.339Z" } wheels = [ @@ -7387,8 +7390,8 @@ dependencies = [ { name = "click" }, { name = "rich" }, { name = "shellingham" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/8b/6f/3991f0f1c7fcb2df31aef28e0594d8d54b05393a0e4e34c65e475c2a5d41/typer-0.15.2.tar.gz", hash = "sha256:ab2fab47533a813c49fe1f16b1a370fd5819099c00b119e0633df65f22144ba5", size = 100711, upload-time = "2025-02-27T19:17:34.807Z" } wheels = [ @@ -7411,16 +7414,12 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", "python_full_version == '3.12.*' and sys_platform == 'darwin' and extra != 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", - "python_full_version == '3.11.*' and sys_platform == 'darwin' and extra != 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", "python_full_version >= '3.13' and sys_platform != 'darwin' and extra != 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", "python_full_version == '3.12.*' and sys_platform != 'darwin' and extra != 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", - "python_full_version == '3.11.*' and sys_platform != 'darwin' and extra != 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", "python_full_version >= '3.13' and extra == 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", "python_full_version == '3.12.*' and extra == 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", - "python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", "python_full_version >= '3.13' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs'", "python_full_version == '3.12.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs'", - "python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs'", "python_full_version >= '3.13' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", "python_full_version == '3.12.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", ] @@ -7434,9 +7433,13 @@ name = "typing-extensions" version = "4.15.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ + "python_full_version == '3.11.*' and sys_platform == 'darwin' and extra != 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", "python_full_version < '3.11' and sys_platform == 'darwin' and extra != 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", + "python_full_version == '3.11.*' and sys_platform != 'darwin' and extra != 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", "python_full_version < '3.11' and sys_platform != 'darwin' and extra != 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", + "python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", "python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", + "python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs'", "python_full_version < '3.11' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs'", "python_full_version < '3.12' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs'", ] @@ -7451,8 +7454,8 @@ version = "0.9.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mypy-extensions" }, - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/dc/74/1789779d91f1961fa9438e9a8710cdae6bd138c80d7303996933d117264a/typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78", size = 13825, upload-time = "2023-05-24T20:25:47.612Z" } wheels = [ @@ -7464,8 +7467,8 @@ name = "typing-inspection" version = "0.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version < '3.11' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (python_full_version >= '3.12' and extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs') or (python_full_version == '3.11.*' and extra != 'extra-8-nemo-run-skypilot' and extra != 'extra-8-nemo-run-skypilot-all' and extra != 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.12.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/82/5c/e6082df02e215b846b4b8c0b887a64d7d08ffaba30605502639d44c06b82/typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122", size = 76222, upload-time = "2025-02-25T17:27:59.638Z" } wheels = [ @@ -7569,7 +7572,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "h11" }, - { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-8-nemo-run-skypilot' and extra == 'extra-8-nemo-run-skypilot-all') or (extra == 'extra-8-nemo-run-skypilot' and extra == 'group-8-nemo-run-docs') or (extra == 'extra-8-nemo-run-skypilot-all' and extra == 'group-8-nemo-run-docs')" }, + { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/4b/4d/938bd85e5bf2edeec766267a5015ad969730bb91e31b44021dfe8b22df6c/uvicorn-0.34.0.tar.gz", hash = "sha256:404051050cd7e905de2c9a7e61790943440b3416f49cb409f965d9dcd0fa73e9", size = 76568, upload-time = "2024-12-15T13:33:30.42Z" } wheels = [ From 4872ad798e8f3b1817f0afae9862492bd141cadb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Fri, 13 Mar 2026 15:51:28 +0000 Subject: [PATCH 02/16] fix: scope PVC writes to /code and fix launch script path resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add code_dir property to KubeflowExecutor: scopes all synced files to //code so multiple users sharing the same PVC never clobber each other (mirrors KubeRayJob pattern) - Revert ConfigMap approach for launch.sh delivery; use data-mover pod to rsync job_dir into code_dir instead — avoids the subPath+PVC mount conflict that caused ContainerCreating hangs - Skip adding a duplicate PVC volume entry in package() when the PVC is already declared by the caller - Add ln -sfn /nemo_run symlink in kubeflow.sh.j2 so hardcoded /nemo_run/scripts/... paths resolve correctly, consistent with the DGXCloud pattern - Update scheduler to rewrite workdir_local_path references to code_dir Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: oliver könig fix: remove stale ConfigMap cleanup from cancel() and update test Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: oliver könig --- nemo_run/core/execution/kubeflow.py | 43 ++++++++++++++----- .../core/execution/templates/kubeflow.sh.j2 | 3 ++ .../run/torchx_backend/schedulers/kubeflow.py | 8 ++-- test/core/execution/test_kubeflow.py | 2 +- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/nemo_run/core/execution/kubeflow.py b/nemo_run/core/execution/kubeflow.py index f281a706..ebaf49f5 100644 --- a/nemo_run/core/execution/kubeflow.py +++ b/nemo_run/core/execution/kubeflow.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import getpass import logging import os import subprocess @@ -154,6 +155,15 @@ def assign(self, exp_id: str, exp_dir: str, task_id: str, task_dir: str) -> None def nnodes(self) -> int: return self.num_nodes + @property + def code_dir(self) -> str: + """Subdirectory on the PVC where user code (launch.sh, scripts) is synced. + + Scoped to ``//code`` so multiple users sharing + the same PVC never clobber each other's files. + """ + return f"{self.workdir_pvc_path.rstrip('/')}/{getpass.getuser()}/code" + def nproc_per_node(self) -> int: if self.nprocs_per_node is not None: return self.nprocs_per_node @@ -641,6 +651,7 @@ def materialize_launch_script(self, cmd: list[str], max_retries: int = 0) -> Non training_command=" ".join(cmd), env_vars=env_var_lines, max_retries=max_retries, + code_dir=self.code_dir, ) os.makedirs(self.job_dir, exist_ok=True) launch_script_path = os.path.join(self.job_dir, "launch.sh") @@ -652,7 +663,7 @@ def package(self, packager: Packager, job_name: str) -> None: if not self.workdir_pvc: return # Merge extra local files (e.g. training scripts) into job_dir so they - # get synced to the pod alongside generated files like launch.sh. + # are included alongside generated files like launch.sh. if self.workdir_local_path: os.makedirs(self.job_dir, exist_ok=True) subprocess.check_call( @@ -664,22 +675,32 @@ def package(self, packager: Packager, job_name: str) -> None: ] ) logger.info("Merged '%s' into job_dir '%s'", self.workdir_local_path, self.job_dir) - # Auto-add workdir PVC to volumes/volume_mounts so training pods can access it - vol_name = "nemo-run-workdir" - if not any(v.get("name") == vol_name for v in self.volumes): - self.volumes.append( - {"name": vol_name, "persistentVolumeClaim": {"claimName": self.workdir_pvc}} - ) - if not any(vm.get("mountPath") == self.workdir_pvc_path for vm in self.volume_mounts): - self.volume_mounts.append({"name": vol_name, "mountPath": self.workdir_pvc_path}) + # Sync job_dir to //code on the PVC via a + # throw-away data-mover pod. Scoping to a user subdirectory means we + # never clobber other data already on the shared volume. pod_name = self._data_mover_pod_name(job_name) self._start_data_mover_pod(pod_name) try: - self._rsync_to_pod(pod_name, self.job_dir, self.workdir_pvc_path) + self._rsync_to_pod(pod_name, self.job_dir, self.code_dir) finally: self._delete_data_mover_pod(pod_name) + # Mount the PVC so the training container can reach code_dir. + # If the PVC is already declared (e.g. explicitly by the caller for data), + # reuse that existing volume rather than adding a duplicate entry. + already_mounted = any( + v.get("persistentVolumeClaim", {}).get("claimName") == self.workdir_pvc + for v in self.volumes + ) + if not already_mounted: + vol_name = "nemo-run-workdir" + self.volumes.append( + {"name": vol_name, "persistentVolumeClaim": {"claimName": self.workdir_pvc}} + ) + if not any(vm.get("mountPath") == self.workdir_pvc_path for vm in self.volume_mounts): + self.volume_mounts.append({"name": vol_name, "mountPath": self.workdir_pvc_path}) + def pull_results(self, job_name: str, dest_dir: Optional[str] = None) -> None: """Sync workdir_pvc_path back to a local directory after the job completes. @@ -707,7 +728,7 @@ def pull_results(self, job_name: str, dest_dir: Optional[str] = None) -> None: pod_name = self._data_mover_pod_name(job_name) self._start_data_mover_pod(pod_name) try: - self._rsync_from_pod(pod_name, self.workdir_pvc_path, local_path) + self._rsync_from_pod(pod_name, self.code_dir, local_path) finally: self._delete_data_mover_pod(pod_name) diff --git a/nemo_run/core/execution/templates/kubeflow.sh.j2 b/nemo_run/core/execution/templates/kubeflow.sh.j2 index 962f93c2..24529d7c 100644 --- a/nemo_run/core/execution/templates/kubeflow.sh.j2 +++ b/nemo_run/core/execution/templates/kubeflow.sh.j2 @@ -10,6 +10,9 @@ export TORCHX_MAX_RETRIES={{max_retries}} {{env_var}} {%- endfor %} +# Symlink /nemo_run → code_dir so pod-side paths like /nemo_run/scripts/... resolve correctly. +ln -sfn {{code_dir}} /nemo_run + echo "Starting training command..." set +e diff --git a/nemo_run/run/torchx_backend/schedulers/kubeflow.py b/nemo_run/run/torchx_backend/schedulers/kubeflow.py index 547410e3..1064d821 100644 --- a/nemo_run/run/torchx_backend/schedulers/kubeflow.py +++ b/nemo_run/run/torchx_backend/schedulers/kubeflow.py @@ -99,15 +99,13 @@ def _submit_dryrun( # type: ignore # Jinja2 template (env vars + training command) and point the job at # it so torchrun / launcher details stay out of the manifest. if executor.workdir_pvc and getattr(executor, "job_dir", None): - # Rewrite any local workdir_local_path references in the cmd to - # their pod-side equivalents under workdir_pvc_path, so users can - # pass run.Script(path=) and the pod sees the synced path. + # Rewrite any local workdir_local_path references in the cmd. if executor.workdir_local_path: local_prefix = executor.workdir_local_path.rstrip(os.sep) - pod_prefix = executor.workdir_pvc_path.rstrip("/") + pod_prefix = executor.code_dir.rstrip("/") cmd = [c.replace(local_prefix, pod_prefix) for c in cmd] executor.materialize_launch_script(cmd) - cmd = ["/bin/bash", f"{executor.workdir_pvc_path}/launch.sh"] + cmd = ["/bin/bash", f"{executor.code_dir}/launch.sh"] req = KubeflowJobRequest(app=app, executor=executor, cmd=cmd, name=role.name) diff --git a/test/core/execution/test_kubeflow.py b/test/core/execution/test_kubeflow.py index 9c3b2b95..1f0df885 100644 --- a/test/core/execution/test_kubeflow.py +++ b/test/core/execution/test_kubeflow.py @@ -709,7 +709,7 @@ def test_pull_results_syncs_from_pvc(self, workdir_executor, mock_k8s_clients): # kubectl cp /: assert "kubectl" in cp_args assert "cp" in cp_args - assert "test-job-data-mover:/nemo_run" in cp_args + assert f"test-job-data-mover:{workdir_executor.code_dir}" in cp_args def test_pull_results_noop_without_workdir_pvc(self, mock_k8s_clients): e = KubeflowExecutor(image="test:latest") From ab9aabea088ebcf4548ec801e95611e853f71635 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Fri, 13 Mar 2026 16:25:43 +0000 Subject: [PATCH 03/16] fix: expose all replicas in describe() and improve test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Set num_replicas=nnodes() in KubeflowScheduler.describe() so the log framework spawns a tail thread for every node instead of only replica 0; fixes missing logs for ranks 4-7 in multi-node jobs - Add 35 new tests covering materialize_launch_script, package(), cancel(wait=True), data-mover timeout/error paths, scheduler dryrun with workdir_pvc, log_iter, and describe state mapping; coverage rises to ~93% / ~97% for the two modules Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: oliver könig --- .../run/torchx_backend/schedulers/kubeflow.py | 23 +- test/core/execution/test_kubeflow.py | 336 ++++++++++++++++++ .../schedulers/test_kubeflow.py | 219 ++++++++++++ 3 files changed, 567 insertions(+), 11 deletions(-) diff --git a/nemo_run/run/torchx_backend/schedulers/kubeflow.py b/nemo_run/run/torchx_backend/schedulers/kubeflow.py index 1064d821..e385d1b8 100644 --- a/nemo_run/run/torchx_backend/schedulers/kubeflow.py +++ b/nemo_run/run/torchx_backend/schedulers/kubeflow.py @@ -136,16 +136,6 @@ def describe(self, app_id: str) -> Optional[DescribeAppResponse]: job_info = stored_data.get(app_id) parts = app_id.split("___") role_name = parts[1] if len(parts) > 1 else app_id - roles = [Role(name=role_name, image="", num_replicas=1)] - roles_statuses = [ - RoleStatus( - role_name, - replicas=[ - ReplicaStatus(id=0, role=role_name, state=AppState.SUBMITTED, hostname="") - ], - ) - ] - if not job_info: return None @@ -157,7 +147,18 @@ def describe(self, app_id: str) -> Optional[DescribeAppResponse]: job_name = job_info.get("job_name") or parts[-1] kf_state = executor.status(job_name) app_state = KUBEFLOW_STATES.get(kf_state, AppState.PENDING) - roles_statuses[0].replicas[0].state = app_state + + num_replicas = executor.nnodes() + roles = [Role(name=role_name, image="", num_replicas=num_replicas)] + roles_statuses = [ + RoleStatus( + role_name, + replicas=[ + ReplicaStatus(id=i, role=role_name, state=app_state, hostname="") + for i in range(num_replicas) + ], + ) + ] return DescribeAppResponse( app_id=app_id, diff --git a/test/core/execution/test_kubeflow.py b/test/core/execution/test_kubeflow.py index 1f0df885..fe268280 100644 --- a/test/core/execution/test_kubeflow.py +++ b/test/core/execution/test_kubeflow.py @@ -747,3 +747,339 @@ def test_data_mover_pod_inherits_tolerations_affinity_pull_secrets( assert spec["tolerations"] == [{"key": "gpu", "operator": "Exists"}] assert spec["affinity"] == {"nodeAffinity": {"key": "val"}} assert spec["imagePullSecrets"] == [{"name": "my-secret"}] + + # ── ImportError when kubernetes unavailable ────────────────────────────── + + def test_import_error_when_kubernetes_unavailable(self): + import nemo_run.core.execution.kubeflow as kf_module + + original = kf_module._KUBERNETES_AVAILABLE + try: + kf_module._KUBERNETES_AVAILABLE = False + with pytest.raises(ImportError, match="kubernetes package is required"): + with ( + patch("nemo_run.core.execution.kubeflow.config.load_kube_config"), + patch("nemo_run.core.execution.kubeflow.client.CustomObjectsApi"), + patch("nemo_run.core.execution.kubeflow.client.CoreV1Api"), + ): + KubeflowExecutor.__post_init__(KubeflowExecutor.__new__(KubeflowExecutor)) + finally: + kf_module._KUBERNETES_AVAILABLE = original + + # ── _build_resources with cpu_limits and memory_limits ─────────────────── + + def test_build_resources_with_cpu_and_memory_limits(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + cpu_limits="32", + memory_limits="128Gi", + ) + resources = e._build_resources() + assert resources["limits"]["cpu"] == "32" + assert resources["limits"]["memory"] == "128Gi" + + # ── TrainJob metadata labels and annotations ───────────────────────────── + + def test_get_trainjob_body_labels_and_annotations(self, mock_k8s_clients): + e = KubeflowExecutor( + image="test:latest", + job_kind="TrainJob", + labels={"team": "nemo"}, + annotations={"owner": "ci"}, + ) + body = e.get_job_body("labeled-trainjob", ["echo"]) + assert body["metadata"]["labels"] == {"team": "nemo"} + assert body["metadata"]["annotations"] == {"owner": "ci"} + + # ── launch() with non-409 ApiException ─────────────────────────────────── + + def test_launch_reraises_non_409_api_exception(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.create_namespaced_custom_object.side_effect = ApiException(status=500) + with pytest.raises(ApiException): + executor.launch("test-job", ["echo"]) + + # ── launch(wait=True) exits early on SUCCEEDED / FAILED ────────────────── + + def test_launch_wait_exits_on_succeeded(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.create_namespaced_custom_object.return_value = {} + mock_custom.get_namespaced_custom_object.return_value = { + "status": {"conditions": [{"type": "Succeeded", "status": "True"}]} + } + with patch("time.sleep"): + _, state = executor.launch("test-job", ["echo"], wait=True, timeout=30) + assert state == KubeflowJobState.SUCCEEDED + + def test_launch_wait_exits_on_failed(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.create_namespaced_custom_object.return_value = {} + mock_custom.get_namespaced_custom_object.return_value = { + "status": {"conditions": [{"type": "Failed", "status": "True"}]} + } + with patch("time.sleep"): + _, state = executor.launch("test-job", ["echo"], wait=True, timeout=30) + assert state == KubeflowJobState.FAILED + + # ── fetch_logs streaming: retry when no lines yielded ──────────────────── + + def test_fetch_logs_stream_retries_when_no_output_then_succeeds( + self, executor, mock_k8s_clients + ): + """First Popen yields nothing; second yields a line — loop exits after output.""" + import io + + empty_proc = MagicMock() + empty_proc.stdout = io.StringIO("") + empty_proc.poll.return_value = None + + output_proc = MagicMock() + output_proc.stdout = io.StringIO("some output\n") + output_proc.poll.return_value = None + + procs = [empty_proc, output_proc] + + with ( + patch("subprocess.Popen", side_effect=procs), + patch("time.sleep"), + ): + lines = list(executor.fetch_logs("my-job", stream=True)) + + assert "some output\n" in lines + + def test_fetch_logs_stream_handles_exception(self, executor, mock_k8s_clients): + """Exception inside the readline loop is caught; generator terminates cleanly.""" + + mock_proc = MagicMock() + + def _raise_on_read(_sentinel): + raise OSError("read error") + + mock_proc.stdout.readline.side_effect = OSError("read error") + mock_proc.poll.return_value = None + + with ( + patch("subprocess.Popen", return_value=mock_proc), + patch("time.sleep"), + ): + # Should not raise; returns empty (error path) + lines = list(executor.fetch_logs("my-job", stream=True)) + + assert lines == [] + + # ── cancel() non-404 ApiException reraises ─────────────────────────────── + + def test_cancel_reraises_non_404_api_exception(self, executor, mock_k8s_clients): + mock_custom, _ = mock_k8s_clients + mock_custom.delete_namespaced_custom_object.side_effect = ApiException(status=500) + with pytest.raises(ApiException): + executor.cancel("test-job") + + # ── cancel(wait=True): CR still present on first poll, then gone ───────── + + def test_cancel_with_wait_cr_present_then_gone(self, executor, mock_k8s_clients): + mock_custom, mock_core = mock_k8s_clients + mock_custom.delete_namespaced_custom_object.return_value = {} + # First get: CR still present; second get: 404 (gone) + mock_custom.get_namespaced_custom_object.side_effect = [ + {"metadata": {"name": "test-job"}}, # still present → continue + ApiException(status=404), # gone + ] + mock_core.list_namespaced_pod.return_value = MagicMock(items=[]) + + with patch("time.sleep"): + result = executor.cancel("test-job", wait=True, timeout=60, poll_interval=0) + assert result is True + + def test_cancel_with_wait_non_404_get_continues(self, executor, mock_k8s_clients): + """Non-404 ApiException on the CR get should be treated as 'still present' (continue).""" + mock_custom, mock_core = mock_k8s_clients + mock_custom.delete_namespaced_custom_object.return_value = {} + # Non-404 on get → continue; then CR gone with pods still present + mock_custom.get_namespaced_custom_object.side_effect = ApiException(status=503) + + with patch("time.sleep"): + result = executor.cancel("test-job", wait=True, timeout=-1, poll_interval=0) + assert result is False + + def test_cancel_with_wait_pods_still_present(self, executor, mock_k8s_clients): + """When CR is gone but pods are still present, keep waiting until timeout.""" + mock_custom, mock_core = mock_k8s_clients + mock_custom.delete_namespaced_custom_object.return_value = {} + mock_custom.get_namespaced_custom_object.side_effect = ApiException(status=404) + # pods still present + mock_core.list_namespaced_pod.return_value = MagicMock(items=[MagicMock()]) + + with patch("time.sleep"): + result = executor.cancel("test-job", wait=True, timeout=-1, poll_interval=0) + assert result is False + + # ── _start_data_mover_pod: timeout when pod never reaches Running ───────── + + def test_start_data_mover_pod_timeout(self, mock_k8s_clients, tmp_path): + _, mock_core = mock_k8s_clients + mock_core.create_namespaced_pod.return_value = MagicMock() + # 404 on delete means pod already gone — _delete_data_mover_pod returns immediately + mock_core.delete_namespaced_pod.side_effect = ApiException(status=404) + # 404 on read_namespaced_pod so the delete cleanup loop exits fast + mock_core.read_namespaced_pod.side_effect = ApiException(status=404) + + e = KubeflowExecutor( + image="test:latest", + workdir_pvc="my-pvc", + ) + e.job_dir = str(tmp_path) + + with patch("kubernetes.watch.Watch") as mock_watch_cls: + mock_watch = MagicMock() + mock_watch_cls.return_value = mock_watch + # Stream returns non-Running event then exhausts — for/else fires + pod = MagicMock() + pod.status.phase = "Pending" + mock_watch.stream.return_value = iter([{"object": pod}]) + + with pytest.raises(RuntimeError, match="did not reach Running"): + e._start_data_mover_pod("my-pod", timeout=5) + + # ── _delete_data_mover_pod: non-404 ApiException on delete ─────────────── + + def test_delete_data_mover_pod_non_404_logs_warning(self, mock_k8s_clients, tmp_path): + _, mock_core = mock_k8s_clients + mock_core.delete_namespaced_pod.side_effect = ApiException(status=500) + + e = KubeflowExecutor(image="test:latest", workdir_pvc="my-pvc") + e.job_dir = str(tmp_path) + + # Should not raise; just log a warning and return + e._delete_data_mover_pod("my-pod") + mock_core.read_namespaced_pod.assert_not_called() + + def test_delete_data_mover_pod_timeout_warning(self, mock_k8s_clients, tmp_path): + _, mock_core = mock_k8s_clients + mock_core.delete_namespaced_pod.return_value = MagicMock() + # Pod never disappears (read always succeeds) + mock_core.read_namespaced_pod.return_value = MagicMock() + + e = KubeflowExecutor(image="test:latest", workdir_pvc="my-pvc") + e.job_dir = str(tmp_path) + + with patch("time.sleep"): + # timeout=-1 means deadline already passed — loop body never executes + e._delete_data_mover_pod("my-pod", timeout=-1) + # Should not raise; just hits the warning log + + # ── materialize_launch_script ───────────────────────────────────────────── + + def test_materialize_launch_script_writes_file(self, mock_k8s_clients, tmp_path): + e = KubeflowExecutor( + image="test:latest", + env_vars={"MY_VAR": "hello"}, + workdir_pvc="my-pvc", + ) + e.job_dir = str(tmp_path) + + e.materialize_launch_script(["python", "train.py"], max_retries=2) + + launch_script = tmp_path / "launch.sh" + assert launch_script.exists() + content = launch_script.read_text() + assert "python train.py" in content + assert "export MY_VAR=hello" in content + assert "TORCHX_MAX_RETRIES=2" in content + + # ── package() with workdir_local_path ───────────────────────────────────── + + def test_package_with_workdir_local_path(self, mock_k8s_clients, tmp_path): + _, mock_core = mock_k8s_clients + mock_core.create_namespaced_pod.return_value = MagicMock() + mock_core.delete_namespaced_pod.return_value = MagicMock() + mock_core.read_namespaced_pod.side_effect = ApiException(status=404) + + local_path = str(tmp_path / "local_scripts") + e = KubeflowExecutor( + image="test:latest", + workdir_pvc="my-pvc", + workdir_local_path=local_path, + ) + e.job_dir = str(tmp_path / "job_dir") + + with ( + patch("kubernetes.watch.Watch") as mock_watch_cls, + patch("subprocess.check_call") as mock_check_call, + ): + mock_watch_cls.return_value.stream.return_value = self._make_watch_events("Running") + e.package(MagicMock(), "test-job") + + # rsync local_path → job_dir + kubectl mkdir + kubectl cp = 3 calls + assert mock_check_call.call_count == 3 + first_call_cmd = mock_check_call.call_args_list[0][0][0] + assert "rsync" in first_call_cmd + + # ── package(): PVC volume mount already present — no duplicate ──────────── + + def test_package_pvc_already_mounted_no_duplicate_volume(self, mock_k8s_clients, tmp_path): + _, mock_core = mock_k8s_clients + mock_core.create_namespaced_pod.return_value = MagicMock() + mock_core.delete_namespaced_pod.return_value = MagicMock() + mock_core.read_namespaced_pod.side_effect = ApiException(status=404) + + e = KubeflowExecutor( + image="test:latest", + workdir_pvc="my-pvc", + volumes=[{"name": "pre-vol", "persistentVolumeClaim": {"claimName": "my-pvc"}}], + ) + e.job_dir = str(tmp_path) + + with ( + patch("kubernetes.watch.Watch") as mock_watch_cls, + patch("subprocess.check_call"), + ): + mock_watch_cls.return_value.stream.return_value = self._make_watch_events("Running") + e.package(MagicMock(), "test-job") + + pvc_vols = [ + v for v in e.volumes if v.get("persistentVolumeClaim", {}).get("claimName") == "my-pvc" + ] + assert len(pvc_vols) == 1 # no duplicate added + + # ── pull_results: no job_dir set and _lookup_job_dir returns empty ──────── + + def test_pull_results_raises_when_no_job_dir_resolvable(self, mock_k8s_clients): + e = KubeflowExecutor(image="test:latest", workdir_pvc="my-pvc") + # job_dir not set + + with patch.object(e, "_lookup_job_dir", return_value=""): + with pytest.raises(RuntimeError, match="Cannot determine destination directory"): + e.pull_results("test-job") + + def test_pull_results_uses_dest_dir_when_no_job_dir(self, mock_k8s_clients, tmp_path): + _, mock_core = mock_k8s_clients + mock_core.create_namespaced_pod.return_value = MagicMock() + mock_core.delete_namespaced_pod.return_value = MagicMock() + mock_core.read_namespaced_pod.side_effect = ApiException(status=404) + + e = KubeflowExecutor(image="test:latest", workdir_pvc="my-pvc") + # job_dir not set + + with ( + patch("kubernetes.watch.Watch") as mock_watch_cls, + patch("subprocess.check_call"), + ): + mock_watch_cls.return_value.stream.return_value = self._make_watch_events("Running") + e.pull_results("test-job", dest_dir=str(tmp_path)) + + mock_core.create_namespaced_pod.assert_called_once() + + # ── _lookup_job_dir ─────────────────────────────────────────────────────── + + def test_lookup_job_dir_returns_empty_when_no_jobs_file(self, mock_k8s_clients, tmp_path): + e = KubeflowExecutor(image="test:latest", workdir_pvc="my-pvc") + with patch("nemo_run.config.get_nemorun_home", return_value=str(tmp_path)): + result = e._lookup_job_dir("nonexistent-job") + assert result == "" + + def test_lookup_job_dir_returns_empty_on_exception(self, mock_k8s_clients): + e = KubeflowExecutor(image="test:latest", workdir_pvc="my-pvc") + with patch("nemo_run.config.get_nemorun_home", side_effect=Exception("boom")): + result = e._lookup_job_dir("test-job") + assert result == "" diff --git a/test/run/torchx_backend/schedulers/test_kubeflow.py b/test/run/torchx_backend/schedulers/test_kubeflow.py index 209335fb..2260d920 100644 --- a/test/run/torchx_backend/schedulers/test_kubeflow.py +++ b/test/run/torchx_backend/schedulers/test_kubeflow.py @@ -287,3 +287,222 @@ def test_get_job_dirs_file_not_found(tmp_path): def test_unknown_state_maps_to_pending(): assert KUBEFLOW_STATES[KubeflowJobState.UNKNOWN] == AppState.PENDING + + +# ── _run_opts ───────────────────────────────────────────────────────────────── + + +def test_run_opts_has_job_dir(scheduler): + opts = scheduler._run_opts() + # runopts is dict-like; key existence is checked via the internal dict + assert "job_dir" in opts._opts + + +# ── _submit_dryrun: macro values applied ───────────────────────────────────── + + +def test_submit_dryrun_applies_macro_values(scheduler, mock_app_def, executor): + """macro_values() returns an ExecutorMacros that rewrites PET_* vars in role args.""" + dryrun_info = scheduler._submit_dryrun(mock_app_def, executor) + assert dryrun_info.request is not None + # cmd is constructed from role.entrypoint + role.args after macro substitution + assert dryrun_info.request.cmd[0] == "python" + + +# ── _submit_dryrun: workdir_pvc with workdir_local_path cmd rewriting ───────── + + +def test_submit_dryrun_with_workdir_pvc_and_local_path(scheduler, mock_app_def, mock_k8s, tmp_path): + local_path = str(tmp_path / "scripts") + e = KubeflowExecutor( + image="nvcr.io/nvidian/nemo:nightly", + workdir_pvc="my-pvc", + workdir_local_path=local_path, + ) + e.experiment_id = "test_exp" + e.job_dir = str(tmp_path) + e.experiment_dir = str(tmp_path) + e.job_name = "test_role" + + app = AppDef( + name="test_app", + roles=[ + Role( + name="test_role", + image="test:latest", + entrypoint="python", + args=[f"{local_path}/train.py"], + ) + ], + ) + + with mock.patch.object(KubeflowExecutor, "materialize_launch_script") as mock_mat: + dryrun_info = scheduler._submit_dryrun(app, e) + + mock_mat.assert_called_once() + # cmd should be rewritten to bash launch.sh + assert dryrun_info.request.cmd == ["/bin/bash", f"{e.code_dir}/launch.sh"] + + +def test_submit_dryrun_with_workdir_pvc_no_local_path(scheduler, mock_app_def, mock_k8s, tmp_path): + e = KubeflowExecutor( + image="nvcr.io/nvidian/nemo:nightly", + workdir_pvc="my-pvc", + ) + e.experiment_id = "test_exp" + e.job_dir = str(tmp_path) + e.experiment_dir = str(tmp_path) + e.job_name = "test_role" + + with mock.patch.object(KubeflowExecutor, "materialize_launch_script") as mock_mat: + dryrun_info = scheduler._submit_dryrun(mock_app_def, e) + + mock_mat.assert_called_once() + assert dryrun_info.request.cmd == ["/bin/bash", f"{e.code_dir}/launch.sh"] + + +# ── schedule: raises when no job_name returned ──────────────────────────────── + + +def test_schedule_raises_when_no_job_name(scheduler, mock_app_def, executor): + with ( + mock.patch.object(KubeflowExecutor, "package"), + mock.patch.object(KubeflowExecutor, "launch", return_value=("", KubeflowJobState.CREATED)), + ): + dryrun_info = scheduler._submit_dryrun(mock_app_def, executor) + with pytest.raises(RuntimeError, match="no job_name returned"): + scheduler.schedule(dryrun_info) + + +# ── describe: missing job_info or missing executor ─────────────────────────── + + +def test_describe_returns_none_when_app_id_unknown(scheduler): + with mock.patch( + "nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs", return_value={} + ): + resp = scheduler.describe("nonexistent___role___job") + assert resp is None + + +def test_describe_returns_none_when_executor_missing(scheduler): + with mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs: + mock_dirs.return_value = { + "test_exp___role___job": { + "job_status": "Created", + "job_name": "job", + "executor": None, + } + } + resp = scheduler.describe("test_exp___role___job") + assert resp is None + + +# ── log_iter: missing job_info or missing executor ─────────────────────────── + + +def test_log_iter_returns_empty_when_app_id_unknown(scheduler): + with mock.patch( + "nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs", return_value={} + ): + result = list(scheduler.log_iter("nonexistent___role___job", "role")) + assert result == [] + + +def test_log_iter_returns_empty_when_executor_missing(scheduler): + with mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs: + mock_dirs.return_value = { + "test_exp___role___job": { + "job_status": "Running", + "job_name": "job", + "executor": None, + } + } + result = list(scheduler.log_iter("test_exp___role___job", "role")) + assert result == [] + + +def test_log_iter_with_should_tail(scheduler, executor): + with mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs: + mock_dirs.return_value = { + "test_exp___test_role___test-job": { + "job_status": "Running", + "job_name": "test-job", + "executor": executor, + } + } + executor.fetch_logs = MagicMock(return_value=["line1", "line2"]) + lines = list( + scheduler.log_iter("test_exp___test_role___test-job", "test_role", should_tail=True) + ) + executor.fetch_logs.assert_called_once_with(job_name="test-job", stream=True) + assert lines == ["line1", "line2"] + + +def test_log_iter_str_empty(scheduler, executor): + """Empty string logs should return an empty list.""" + with mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs: + mock_dirs.return_value = { + "test_exp___test_role___test-job": { + "job_status": "Running", + "job_name": "test-job", + "executor": executor, + } + } + executor.fetch_logs = MagicMock(return_value="") + lines = list(scheduler.log_iter("test_exp___test_role___test-job", "test_role")) + assert lines == [] + + +# ── _cancel_existing: missing job_info or missing executor ─────────────────── + + +def test_cancel_existing_noop_when_unknown(scheduler): + with mock.patch( + "nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs", return_value={} + ): + # Should not raise + scheduler._cancel_existing("nonexistent___role___job") + + +def test_cancel_existing_noop_when_no_executor(scheduler): + with mock.patch("nemo_run.run.torchx_backend.schedulers.kubeflow._get_job_dirs") as mock_dirs: + mock_dirs.return_value = { + "test_exp___role___job": { + "job_status": "Running", + "job_name": "job", + "executor": None, + } + } + # Should not raise + scheduler._cancel_existing("test_exp___role___job") + + +# ── _get_job_dirs: deserialization failure ──────────────────────────────────── + + +def test_get_job_dirs_skips_corrupt_entries(tmp_path): + from nemo_run.config import set_nemorun_home + + set_nemorun_home(str(tmp_path)) + + import json + + jobs_file = tmp_path / ".kubeflow_jobs.json" + jobs_file.write_text( + json.dumps( + { + "bad_app": { + "job_status": "Created", + "job_name": "bad-job", + "executor": "not-valid-base64-zlib", + } + } + ) + ) + + from nemo_run.run.torchx_backend.schedulers.kubeflow import _get_job_dirs + + # Should not raise; corrupt entry is skipped (executor stays as string or is absent) + result = _get_job_dirs() + assert "bad_app" in result From fc6cb44ad5b8cc4a56b2b650582facbd80a0c437 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Fri, 13 Mar 2026 16:47:55 +0000 Subject: [PATCH 04/16] fix: address code review comments (autoescape note, empty except comment, test cleanup) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add explanatory comment to empty AttributeError except in _get_job_dirs (backwards-compat field migration — absence is expected and handled) - Add noqa + comment to Jinja2 Environment for shell-script template (autoescape intentionally disabled for .sh/.j2; no XSS risk) - Remove unused _raise_on_read helper in test_fetch_logs_stream_handles_exception - Use sys.modules lookup instead of duplicate import in test_import_error_when_kubernetes_unavailable Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: oliver könig --- nemo_run/core/execution/kubeflow.py | 4 +++- nemo_run/run/torchx_backend/schedulers/kubeflow.py | 5 +++-- test/core/execution/test_kubeflow.py | 6 ++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/nemo_run/core/execution/kubeflow.py b/nemo_run/core/execution/kubeflow.py index ebaf49f5..9da5c7d0 100644 --- a/nemo_run/core/execution/kubeflow.py +++ b/nemo_run/core/execution/kubeflow.py @@ -640,7 +640,9 @@ def _rsync_from_pod(self, pod_name: str, remote_path: str, local_path: str) -> N def materialize_launch_script(self, cmd: list[str], max_retries: int = 0) -> None: """Render kubeflow.sh.j2 with *cmd* as the training command and write it to ``{job_dir}/launch.sh`` so it can be synced to the pod.""" - env = Environment( + # Shell-script template: autoescape is intentionally disabled for non-HTML/XML + # extensions (.sh, .j2). There is no XSS risk — output is executed locally. + env = Environment( # noqa: S701 loader=PackageLoader("nemo_run", "core/execution/templates"), keep_trailing_newline=True, autoescape=select_autoescape(["html", "xml"]), diff --git a/nemo_run/run/torchx_backend/schedulers/kubeflow.py b/nemo_run/run/torchx_backend/schedulers/kubeflow.py index e385d1b8..27bfdaf1 100644 --- a/nemo_run/run/torchx_backend/schedulers/kubeflow.py +++ b/nemo_run/run/torchx_backend/schedulers/kubeflow.py @@ -263,13 +263,14 @@ def _get_job_dirs() -> dict[str, dict[str, Any]]: for app in data.values(): try: cfg = serializer.deserialize(app["executor"]) - # Backwards compat: migrate renamed field nproc_per_node → nprocs_per_node + # Backwards compat: migrate renamed field nproc_per_node → nprocs_per_node. + # AttributeError means the field doesn't exist so no migration is needed. try: val = cfg.nproc_per_node del cfg.nproc_per_node cfg.nprocs_per_node = val except AttributeError: - pass + pass # field absent — already using the new name, nothing to migrate app["executor"] = fdl.build(cfg) except Exception as e: log.debug("Failed to deserialize executor: %s", e) diff --git a/test/core/execution/test_kubeflow.py b/test/core/execution/test_kubeflow.py index fe268280..e433d5b3 100644 --- a/test/core/execution/test_kubeflow.py +++ b/test/core/execution/test_kubeflow.py @@ -751,8 +751,9 @@ def test_data_mover_pod_inherits_tolerations_affinity_pull_secrets( # ── ImportError when kubernetes unavailable ────────────────────────────── def test_import_error_when_kubernetes_unavailable(self): - import nemo_run.core.execution.kubeflow as kf_module + import sys + kf_module = sys.modules["nemo_run.core.execution.kubeflow"] original = kf_module._KUBERNETES_AVAILABLE try: kf_module._KUBERNETES_AVAILABLE = False @@ -852,9 +853,6 @@ def test_fetch_logs_stream_handles_exception(self, executor, mock_k8s_clients): mock_proc = MagicMock() - def _raise_on_read(_sentinel): - raise OSError("read error") - mock_proc.stdout.readline.side_effect = OSError("read error") mock_proc.poll.return_value = None From 446bead19e35779346f41bcf432f5e64de84aba9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 16 Mar 2026 16:18:22 +0000 Subject: [PATCH 05/16] feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- docs/guides/execution.md | 10 +++++++--- test/core/execution/test_kubeflow.py | 10 +++++----- test/run/torchx_backend/schedulers/test_kubeflow.py | 8 ++++---- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/docs/guides/execution.md b/docs/guides/execution.md index 8560759b..e1efc51f 100644 --- a/docs/guides/execution.md +++ b/docs/guides/execution.md @@ -311,13 +311,13 @@ Here's an example configuration: # PyTorchJob (default) executor = run.KubeflowExecutor( namespace="runai-nemo-ci", - image="nvcr.io/nvidian/nemo:nightly", + image="nvcr.io/nvidia/nemo:26.02", num_nodes=3, # total pods: 1 Master + (num_nodes-1) Workers gpus_per_node=8, # also sets nproc_per_node unless overridden explicitly cpu_requests="16", memory_requests="64Gi", volumes=[ - {"name": "model-cache", "persistentVolumeClaim": {"claimName": "nemo-ci-datasets-project-nkf5l"}} + {"name": "model-cache", "persistentVolumeClaim": {"claimName": "data-pvc"}} ], volume_mounts=[{"name": "model-cache", "mountPath": "/nemo-workspace"}], labels={"app": "nemo-ci-training"}, @@ -329,7 +329,7 @@ executor = run.KubeflowExecutor( job_kind="TrainJob", runtime_ref="torch-distributed", # name of the ClusterTrainingRuntime namespace="runai-nemo-ci", - image="nvcr.io/nvidian/nemo:nightly", + image="nvcr.io/nvidia/nemo:26.02", num_nodes=3, gpus_per_node=8, ) @@ -337,6 +337,10 @@ executor = run.KubeflowExecutor( `cancel(wait=True)` polls until both the CR and all associated pods are fully terminated before returning. +##### Limitations + +Attributes like `resourceClaims` are not [supported](https://github.com/kubeflow/trainer/issues/3264) and must be injected in different ways, like by Mutating Webhooks. + #### LeptonExecutor The `LeptonExecutor` integrates with an NVIDIA DGX Cloud Lepton cluster's Python SDK to launch distributed jobs. It uses API calls behind the Lepton SDK to authenticate, identify the target node group and resource shapes, and submit the job specification which will be launched as a batch job on the cluster. diff --git a/test/core/execution/test_kubeflow.py b/test/core/execution/test_kubeflow.py index e433d5b3..d9bffe0b 100644 --- a/test/core/execution/test_kubeflow.py +++ b/test/core/execution/test_kubeflow.py @@ -34,7 +34,7 @@ def mock_k8s_clients(self): @pytest.fixture def executor(self, mock_k8s_clients): return KubeflowExecutor( - image="nvcr.io/nvidian/nemo:nightly", + image="nvcr.io/nvidia/nemo:26.02", num_nodes=3, gpus_per_node=8, ) @@ -209,7 +209,7 @@ def test_get_job_body_container_kwargs(self, mock_k8s_clients): def test_get_job_body_artifact(self, mock_k8s_clients): e = KubeflowExecutor( - image="nvcr.io/nvidian/nemo:nightly", + image="nvcr.io/nvidia/nemo:26.02", namespace="runai-nemo-ci", num_nodes=3, nprocs_per_node=8, @@ -234,7 +234,7 @@ def test_get_job_body_artifact(self, mock_k8s_clients): assert worker["replicas"] == 2 for replica in [master, worker]: container = replica["template"]["spec"]["containers"][0] - assert container["image"] == "nvcr.io/nvidian/nemo:nightly" + assert container["image"] == "nvcr.io/nvidia/nemo:26.02" assert container["resources"]["limits"]["nvidia.com/gpu"] == "8" assert container["resources"]["requests"]["cpu"] == "16" assert container["resources"]["requests"]["memory"] == "64Gi" @@ -243,7 +243,7 @@ def test_get_job_body_artifact(self, mock_k8s_clients): def test_get_trainjob_body_structure(self, mock_k8s_clients): e = KubeflowExecutor( - image="nvcr.io/nvidian/nemo:nightly", + image="nvcr.io/nvidia/nemo:26.02", job_kind="TrainJob", num_nodes=2, gpus_per_node=8, @@ -257,7 +257,7 @@ def test_get_trainjob_body_structure(self, mock_k8s_clients): trainer = spec["trainer"] assert trainer["numNodes"] == 2 assert trainer["numProcPerNode"] == 8 # defaults to gpus_per_node, int not str - assert trainer["image"] == "nvcr.io/nvidian/nemo:nightly" + assert trainer["image"] == "nvcr.io/nvidia/nemo:26.02" assert trainer["command"] == ["python", "train.py"] def test_get_trainjob_body_resources(self, mock_k8s_clients): diff --git a/test/run/torchx_backend/schedulers/test_kubeflow.py b/test/run/torchx_backend/schedulers/test_kubeflow.py index 2260d920..c8398dfa 100644 --- a/test/run/torchx_backend/schedulers/test_kubeflow.py +++ b/test/run/torchx_backend/schedulers/test_kubeflow.py @@ -41,7 +41,7 @@ def mock_k8s(): @pytest.fixture def executor(mock_k8s, tmp_path): e = KubeflowExecutor( - image="nvcr.io/nvidian/nemo:nightly", + image="nvcr.io/nvidia/nemo:26.02", num_nodes=3, gpus_per_node=8, ) @@ -64,7 +64,7 @@ def mock_app_def(): roles=[ Role( name="test_role", - image="nvcr.io/nvidian/nemo:nightly", + image="nvcr.io/nvidia/nemo:26.02", entrypoint="python", args=["train.py"], ) @@ -315,7 +315,7 @@ def test_submit_dryrun_applies_macro_values(scheduler, mock_app_def, executor): def test_submit_dryrun_with_workdir_pvc_and_local_path(scheduler, mock_app_def, mock_k8s, tmp_path): local_path = str(tmp_path / "scripts") e = KubeflowExecutor( - image="nvcr.io/nvidian/nemo:nightly", + image="nvcr.io/nvidia/nemo:26.02", workdir_pvc="my-pvc", workdir_local_path=local_path, ) @@ -346,7 +346,7 @@ def test_submit_dryrun_with_workdir_pvc_and_local_path(scheduler, mock_app_def, def test_submit_dryrun_with_workdir_pvc_no_local_path(scheduler, mock_app_def, mock_k8s, tmp_path): e = KubeflowExecutor( - image="nvcr.io/nvidian/nemo:nightly", + image="nvcr.io/nvidia/nemo:26.02", workdir_pvc="my-pvc", ) e.experiment_id = "test_exp" From e10da5ee124b079d6c5e0e73afc468d36c172440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 16 Mar 2026 16:20:13 +0000 Subject: [PATCH 06/16] num_nodes: int = 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- nemo_run/core/execution/kubeflow.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nemo_run/core/execution/kubeflow.py b/nemo_run/core/execution/kubeflow.py index 9da5c7d0..df7e0c09 100644 --- a/nemo_run/core/execution/kubeflow.py +++ b/nemo_run/core/execution/kubeflow.py @@ -18,12 +18,12 @@ import os import subprocess import time - -from jinja2 import Environment, PackageLoader, select_autoescape from dataclasses import dataclass, field from enum import Enum from typing import Any, Iterable, Optional +from jinja2 import Environment, PackageLoader, select_autoescape + try: from kubernetes import client, config, watch from kubernetes.client.rest import ApiException @@ -77,7 +77,7 @@ class KubeflowExecutor(Executor): runtime_ref: str = "torch-distributed" namespace: str = "default" image: str = "" - num_nodes: int = 2 + num_nodes: int = 1 nprocs_per_node: Optional[int] = None # defaults to gpus_per_node when not set gpus_per_node: Optional[int] = None cpu_requests: Optional[str] = None From ff80ab6db503de9accc34c5a08cc9a8f69fcbf27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 16 Mar 2026 16:23:45 +0000 Subject: [PATCH 07/16] alpine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- nemo_run/core/execution/kubeflow.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemo_run/core/execution/kubeflow.py b/nemo_run/core/execution/kubeflow.py index df7e0c09..ca614229 100644 --- a/nemo_run/core/execution/kubeflow.py +++ b/nemo_run/core/execution/kubeflow.py @@ -96,6 +96,7 @@ class KubeflowExecutor(Executor): # pod_spec_overrides merges extra fields into the pod spec (PyTorchJob) or # podTemplateOverrides[].spec (TrainJob) — e.g. {"resourceClaims": [...]}. pod_spec_overrides: dict[str, Any] = field(default_factory=dict) + data_mover_image: str = "alpine:3.19" restart_policy: str = "OnFailure" image_pull_secrets: list[str] = field(default_factory=list) spec_kwargs: dict[str, Any] = field(default_factory=dict) @@ -538,7 +539,7 @@ def _start_data_mover_pod(self, pod_name: str, timeout: int = 120) -> None: "containers": [ { "name": "mover", - "image": "alpine:3.19", + "image": self.data_mover_image, "command": ["sleep", "infinity"], "volumeMounts": [{"name": vol_name, "mountPath": self.workdir_pvc_path}], } From 7cac07280b8180f74a29373965bf47db10be9e25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 16 Mar 2026 16:48:21 +0000 Subject: [PATCH 08/16] refactor: remove PyTorchJob support from KubeflowExecutor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PyTorchJob (Training Operator v1) is deprecated in favour of TrainJob (Training Operator v2). Simplify the executor to support TrainJob only: - Remove PyTorchJob constants, `job_kind` field, `_get_pytorchjob_body`, and all PyTorchJob branches in `_group`, `_version`, `_plural`, `_pod_label_selector`, `get_job_body`, and `status`. - Inline the trivial `_group()`, `_version()`, `_plural()`, and `_pod_label_selector()` helpers; callers now reference the `_TRAINJOB_*` constants and the label-selector format string directly. - Rename `_get_trainjob_body` → `get_job_body` (drop the one-line wrapper). - Remove backwards-compat `nproc_per_node → nprocs_per_node` migration block in `schedulers/kubeflow.py` (only relevant for legacy PyTorchJob persisted state). - Add docstrings to all public methods that lacked them. - Update tests: remove PyTorchJob-specific tests, drop `job_kind="TrainJob"` params (now the only kind), fix status/launch-wait fixtures to use the TrainJob `jobsStatus` format. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: oliver könig --- nemo_run/core/execution/kubeflow.py | 207 +++++--------- .../run/torchx_backend/schedulers/kubeflow.py | 8 - test/core/execution/test_kubeflow.py | 260 +----------------- 3 files changed, 76 insertions(+), 399 deletions(-) diff --git a/nemo_run/core/execution/kubeflow.py b/nemo_run/core/execution/kubeflow.py index ca614229..decb8321 100644 --- a/nemo_run/core/execution/kubeflow.py +++ b/nemo_run/core/execution/kubeflow.py @@ -37,12 +37,6 @@ logger = logging.getLogger(__name__) -# PyTorchJob (Kubeflow Training Operator v1) -_PYTORCHJOB_GROUP = "kubeflow.org" -_PYTORCHJOB_VERSION = "v1" -_PYTORCHJOB_PLURAL = "pytorchjobs" -_PYTORCHJOB_KIND = "PyTorchJob" - # TrainJob (Kubeflow Training Operator v2) _TRAINJOB_GROUP = "trainer.kubeflow.org" _TRAINJOB_VERSION = "v1alpha1" @@ -63,17 +57,13 @@ class KubeflowExecutor(Executor): """ Dataclass to configure a Kubeflow Executor for the Kubeflow Training Operator on Kubernetes. - Supports both PyTorchJob (Training Operator v1) and TrainJob (Training Operator v2) via - the ``job_kind`` parameter. Kubernetes configuration is loaded automatically (local kubeconfig - with in-cluster fallback). + Uses TrainJob (Training Operator v2). Kubernetes configuration is loaded automatically + (local kubeconfig with in-cluster fallback). Args: - job_kind: ``"PyTorchJob"`` (default) or ``"TrainJob"``. runtime_ref: ``ClusterTrainingRuntime`` name used by TrainJob (e.g. ``"torch-distributed"``). - Ignored for PyTorchJob. """ - job_kind: str = "PyTorchJob" runtime_ref: str = "torch-distributed" namespace: str = "default" image: str = "" @@ -93,8 +83,7 @@ class KubeflowExecutor(Executor): # env_list accepts full env var dicts (e.g. valueFrom/secretKeyRef). # Simple key=value pairs should use the inherited env_vars dict instead. env_list: list[dict[str, Any]] = field(default_factory=list) - # pod_spec_overrides merges extra fields into the pod spec (PyTorchJob) or - # podTemplateOverrides[].spec (TrainJob) — e.g. {"resourceClaims": [...]}. + # pod_spec_overrides merges extra fields into podTemplateOverrides[].spec — e.g. {"resourceClaims": [...]}. pod_spec_overrides: dict[str, Any] = field(default_factory=dict) data_mover_image: str = "alpine:3.19" restart_policy: str = "OnFailure" @@ -116,8 +105,6 @@ def __post_init__(self): "kubernetes package is required for KubeflowExecutor. " "Install it with: pip install nemo-run[kubeflow]" ) - if self.job_kind not in (_PYTORCHJOB_KIND, _TRAINJOB_KIND): - raise ValueError(f"job_kind must be 'PyTorchJob' or 'TrainJob', got {self.job_kind!r}") try: config.load_kube_config() except Exception as original_exc: @@ -128,32 +115,17 @@ def __post_init__(self): self._custom_objects_api = client.CustomObjectsApi() self._core_v1_api = client.CoreV1Api() - # ── K8s API coordinates ─────────────────────────────────────────────────── - - def _group(self) -> str: - return _PYTORCHJOB_GROUP if self.job_kind == _PYTORCHJOB_KIND else _TRAINJOB_GROUP - - def _version(self) -> str: - return _PYTORCHJOB_VERSION if self.job_kind == _PYTORCHJOB_KIND else _TRAINJOB_VERSION - - def _plural(self) -> str: - return _PYTORCHJOB_PLURAL if self.job_kind == _PYTORCHJOB_KIND else _TRAINJOB_PLURAL - - def _pod_label_selector(self, job_name: str) -> str: - if self.job_kind == _PYTORCHJOB_KIND: - return f"training.kubeflow.org/job-name={job_name}" - # TrainJob delegates to JobSet; pods carry the jobset label - return f"jobset.sigs.k8s.io/jobset-name={job_name}" - # ── Executor interface ──────────────────────────────────────────────────── def assign(self, exp_id: str, exp_dir: str, task_id: str, task_dir: str) -> None: + """Bind this executor to a specific experiment task, setting job identity and directories.""" self.experiment_id = exp_id self.experiment_dir = exp_dir self.job_name = task_id self.job_dir = os.path.join(exp_dir, task_dir) def nnodes(self) -> int: + """Return the total number of nodes requested.""" return self.num_nodes @property @@ -166,6 +138,7 @@ def code_dir(self) -> str: return f"{self.workdir_pvc_path.rstrip('/')}/{getpass.getuser()}/code" def nproc_per_node(self) -> int: + """Return processes per node: nprocs_per_node → gpus_per_node → 1.""" if self.nprocs_per_node is not None: return self.nprocs_per_node if self.gpus_per_node is not None: @@ -174,12 +147,6 @@ def nproc_per_node(self) -> int: # ── Manifest builders ───────────────────────────────────────────────────── - def get_job_body(self, name: str, command: list[str]) -> dict: - """Build the CRD manifest dict for the configured ``job_kind``.""" - if self.job_kind == _PYTORCHJOB_KIND: - return self._get_pytorchjob_body(name, command) - return self._get_trainjob_body(name, command) - def _build_resources(self) -> dict[str, Any]: limits: dict[str, Any] = {} requests: dict[str, Any] = {} @@ -201,69 +168,8 @@ def _build_resources(self) -> dict[str, Any]: resources["requests"] = requests return resources - def _get_pytorchjob_body(self, name: str, command: list[str]) -> dict: - resources = self._build_resources() - env = [{"name": k, "value": v} for k, v in self.env_vars.items()] + self.env_list - - container: dict[str, Any] = { - "name": "pytorch", - "image": self.image, - "command": command, - "env": env, - } - if self.volume_mounts: - container["volumeMounts"] = self.volume_mounts - if resources: - container["resources"] = resources - container.update(self.container_kwargs) - - pod_spec: dict[str, Any] = {"containers": [container]} - if self.volumes: - pod_spec["volumes"] = self.volumes - if self.image_pull_secrets: - pod_spec["imagePullSecrets"] = [{"name": s} for s in self.image_pull_secrets] - if self.tolerations: - pod_spec["tolerations"] = self.tolerations - if self.affinity: - pod_spec["affinity"] = self.affinity - pod_spec.update(self.pod_spec_overrides) - - template_metadata: dict[str, Any] = {} - if self.labels: - template_metadata["labels"] = self.labels - if self.annotations: - template_metadata["annotations"] = self.annotations - - replica_spec: dict[str, Any] = { - "restartPolicy": self.restart_policy, - "template": { - "metadata": template_metadata, - "spec": pod_spec, - }, - } - - spec: dict[str, Any] = { - "nprocPerNode": str(self.nproc_per_node()), - "pytorchReplicaSpecs": { - "Master": {"replicas": 1, **replica_spec}, - "Worker": {"replicas": self.num_nodes - 1, **replica_spec}, - }, - **self.spec_kwargs, - } - - return { - "apiVersion": f"{_PYTORCHJOB_GROUP}/{_PYTORCHJOB_VERSION}", - "kind": _PYTORCHJOB_KIND, - "metadata": { - "name": name, - "namespace": self.namespace, - "labels": self.labels, - "annotations": self.annotations, - }, - "spec": spec, - } - - def _get_trainjob_body(self, name: str, command: list[str]) -> dict: + def get_job_body(self, name: str, command: list[str]) -> dict: + """Build and return the TrainJob CRD manifest dict.""" resources = self._build_resources() env = [{"name": k, "value": v} for k, v in self.env_vars.items()] + self.env_list @@ -332,24 +238,30 @@ def launch( timeout: int = 300, poll_interval: int = 10, ) -> tuple[str, KubeflowJobState]: + """Submit a TrainJob and optionally wait until it reaches a terminal or running state. + + Returns ``(job_name, state)`` where state is ``CREATED`` when not waiting, or the + observed ``RUNNING``, ``SUCCEEDED``, or ``FAILED`` state when *wait* is ``True``. + Raises ``RuntimeError`` if the job already exists or *timeout* expires. + """ name = name.replace("_", "-").replace(".", "-").lower() job_body = self.get_job_body(name, cmd) try: self._custom_objects_api.create_namespaced_custom_object( - group=self._group(), - version=self._version(), + group=_TRAINJOB_GROUP, + version=_TRAINJOB_VERSION, namespace=self.namespace, - plural=self._plural(), + plural=_TRAINJOB_PLURAL, body=job_body, ) except ApiException as e: if e.status == 409: raise RuntimeError( - f"{self.job_kind} {name} already exists in namespace {self.namespace}" + f"{_TRAINJOB_KIND} {name} already exists in namespace {self.namespace}" ) from e raise - logger.info("Submitted %s %s to namespace %s", self.job_kind, name, self.namespace) + logger.info("Submitted %s %s to namespace %s", _TRAINJOB_KIND, name, self.namespace) if not wait: return name, KubeflowJobState.CREATED @@ -360,7 +272,7 @@ def launch( while time.time() < deadline: state = self.status(name) or KubeflowJobState.UNKNOWN if state != last_logged_state: - logger.info("%s %s: %s", self.job_kind, name, state.value) + logger.info("%s %s: %s", _TRAINJOB_KIND, name, state.value) last_logged_state = state if state == KubeflowJobState.RUNNING: return name, state @@ -369,16 +281,17 @@ def launch( time.sleep(poll_interval) raise RuntimeError( - f"{self.job_kind} {name} did not reach RUNNING within {timeout}s, last state: {state}" + f"{_TRAINJOB_KIND} {name} did not reach RUNNING within {timeout}s, last state: {state}" ) def status(self, job_name: str) -> Optional[KubeflowJobState]: + """Return the current state of *job_name*, or ``None`` if it no longer exists.""" try: resp = self._custom_objects_api.get_namespaced_custom_object( - group=self._group(), - version=self._version(), + group=_TRAINJOB_GROUP, + version=_TRAINJOB_VERSION, namespace=self.namespace, - plural=self._plural(), + plural=_TRAINJOB_PLURAL, name=job_name, ) except ApiException as e: @@ -389,29 +302,16 @@ def status(self, job_name: str) -> Optional[KubeflowJobState]: job_status = resp.get("status", {}) - if self.job_kind == _TRAINJOB_KIND: - # TrainJob (v2) uses status.jobsStatus[].{active,ready,succeeded,failed} - jobs_status = job_status.get("jobsStatus", []) - if any(js.get("failed", 0) > 0 for js in jobs_status): - return KubeflowJobState.FAILED - if jobs_status and all( - js.get("succeeded", 0) > 0 and js.get("active", 0) == 0 for js in jobs_status - ): - return KubeflowJobState.SUCCEEDED - if any(js.get("active", 0) > 0 or js.get("ready", 0) > 0 for js in jobs_status): - return KubeflowJobState.RUNNING - return KubeflowJobState.UNKNOWN - - # PyTorchJob (v1) uses status.conditions[].{type,status} - conditions = job_status.get("conditions", []) - state_map = { - "Running": KubeflowJobState.RUNNING, - "Succeeded": KubeflowJobState.SUCCEEDED, - "Failed": KubeflowJobState.FAILED, - } - for cond in reversed(conditions): - if cond.get("status") == "True" and cond.get("type") in state_map: - return state_map[cond["type"]] + # TrainJob (v2) uses status.jobsStatus[].{active,ready,succeeded,failed} + jobs_status = job_status.get("jobsStatus", []) + if any(js.get("failed", 0) > 0 for js in jobs_status): + return KubeflowJobState.FAILED + if jobs_status and all( + js.get("succeeded", 0) > 0 and js.get("active", 0) == 0 for js in jobs_status + ): + return KubeflowJobState.SUCCEEDED + if any(js.get("active", 0) > 0 or js.get("ready", 0) > 0 for js in jobs_status): + return KubeflowJobState.RUNNING return KubeflowJobState.UNKNOWN def fetch_logs( @@ -421,7 +321,13 @@ def fetch_logs( lines: int = 100, timeout: int = 60, ) -> Iterable[str]: - label_selector = self._pod_label_selector(job_name) + """Yield log lines from all pods of *job_name* via ``kubectl logs``. + + When *stream* is ``True`` the method follows the log stream and retries + until pods are running (up to 10 minutes). Otherwise it returns the last + *lines* lines from a single ``kubectl logs`` call. + """ + label_selector = f"jobset.sigs.k8s.io/jobset-name={job_name}" cmd = [ "kubectl", "logs", @@ -473,24 +379,30 @@ def cancel( timeout: int = 300, poll_interval: int = 5, ) -> Optional[bool]: + """Delete *job_name* from the cluster. + + When *wait* is ``True``, blocks until both the CR and its pods are gone, + returning ``True`` on success or ``False`` if *timeout* expires. + Returns ``None`` when not waiting or when the job was already absent. + """ try: self._custom_objects_api.delete_namespaced_custom_object( - group=self._group(), - version=self._version(), + group=_TRAINJOB_GROUP, + version=_TRAINJOB_VERSION, namespace=self.namespace, - plural=self._plural(), + plural=_TRAINJOB_PLURAL, name=job_name, ) except ApiException as e: if e.status == 404: - logger.info("%s %s already deleted", self.job_kind, job_name) + logger.info("%s %s already deleted", _TRAINJOB_KIND, job_name) return None raise if not wait: return None - label_selector = self._pod_label_selector(job_name) + label_selector = f"jobset.sigs.k8s.io/jobset-name={job_name}" deadline = time.time() + timeout while time.time() < deadline: @@ -499,10 +411,10 @@ def cancel( # Check if CR is gone try: self._custom_objects_api.get_namespaced_custom_object( - group=self._group(), - version=self._version(), + group=_TRAINJOB_GROUP, + version=_TRAINJOB_VERSION, namespace=self.namespace, - plural=self._plural(), + plural=_TRAINJOB_PLURAL, name=job_name, ) continue # CR still present @@ -663,6 +575,12 @@ def materialize_launch_script(self, cmd: list[str], max_retries: int = 0) -> Non logger.info("Wrote launch script to %s", launch_script_path) def package(self, packager: Packager, job_name: str) -> None: + """Sync job_dir to the workdir PVC via a temporary data-mover pod before launch. + + Does nothing when ``workdir_pvc`` is unset. If ``workdir_local_path`` is set, + its contents are first rsynced into ``job_dir`` so hand-written scripts are + included alongside generated files such as ``launch.sh``. + """ if not self.workdir_pvc: return # Merge extra local files (e.g. training scripts) into job_dir so they @@ -767,6 +685,7 @@ def _lookup_job_dir(self, job_name: str) -> str: return "" def macro_values(self) -> Optional[ExecutorMacros]: + """Return the torchrun environment variable names injected by the Training Operator.""" return ExecutorMacros( head_node_ip_var="PET_MASTER_ADDR", nproc_per_node_var="PET_NPROC_PER_NODE", diff --git a/nemo_run/run/torchx_backend/schedulers/kubeflow.py b/nemo_run/run/torchx_backend/schedulers/kubeflow.py index 27bfdaf1..be6e9db8 100644 --- a/nemo_run/run/torchx_backend/schedulers/kubeflow.py +++ b/nemo_run/run/torchx_backend/schedulers/kubeflow.py @@ -263,14 +263,6 @@ def _get_job_dirs() -> dict[str, dict[str, Any]]: for app in data.values(): try: cfg = serializer.deserialize(app["executor"]) - # Backwards compat: migrate renamed field nproc_per_node → nprocs_per_node. - # AttributeError means the field doesn't exist so no migration is needed. - try: - val = cfg.nproc_per_node - del cfg.nproc_per_node - cfg.nprocs_per_node = val - except AttributeError: - pass # field absent — already using the new name, nothing to migrate app["executor"] = fdl.build(cfg) except Exception as e: log.debug("Failed to deserialize executor: %s", e) diff --git a/test/core/execution/test_kubeflow.py b/test/core/execution/test_kubeflow.py index d9bffe0b..9b313f95 100644 --- a/test/core/execution/test_kubeflow.py +++ b/test/core/execution/test_kubeflow.py @@ -45,11 +45,6 @@ def test_executor_defaults(self, executor): assert executor.namespace == "default" assert executor.restart_policy == "OnFailure" assert executor.nprocs_per_node is None # unset; resolved at manifest build time - assert executor.job_kind == "PyTorchJob" - - def test_invalid_job_kind(self, mock_k8s_clients): - with pytest.raises(ValueError, match="job_kind must be"): - KubeflowExecutor(image="test:latest", job_kind="InvalidKind") def test_kubeconfig_fallback_to_incluster(self): with ( @@ -99,152 +94,11 @@ def test_assign(self, executor): assert executor.experiment_dir == "/tmp/exp" assert executor.job_dir == "/tmp/exp/task-0" - # ── PyTorchJob manifest generation ────────────────────────────────────────── - - def test_get_job_body_structure(self, executor): - body = executor.get_job_body("my-job", ["/bin/bash", "-c", "echo hi"]) - assert body["apiVersion"] == "kubeflow.org/v1" - assert body["kind"] == "PyTorchJob" - assert body["metadata"]["name"] == "my-job" - spec = body["spec"] - assert spec["nprocPerNode"] == "8" # defaults to gpus_per_node - assert "Master" in spec["pytorchReplicaSpecs"] - assert "Worker" in spec["pytorchReplicaSpecs"] - assert spec["pytorchReplicaSpecs"]["Master"]["replicas"] == 1 - assert spec["pytorchReplicaSpecs"]["Worker"]["replicas"] == 2 - - def test_get_job_body_resources(self, executor): - executor.cpu_requests = "16" - executor.memory_requests = "64Gi" - body = executor.get_job_body("my-job", ["python", "train.py"]) - container = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"]["containers"][ - 0 - ] - resources = container["resources"] - assert resources["limits"]["nvidia.com/gpu"] == "8" - assert resources["requests"]["cpu"] == "16" - assert resources["requests"]["memory"] == "64Gi" - - def test_get_job_body_no_gpu(self, mock_k8s_clients): - e = KubeflowExecutor(image="test:latest", gpus_per_node=None) - body = e.get_job_body("cpu-job", ["python", "train.py"]) - container = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"]["containers"][ - 0 - ] - resources = container.get("resources", {}) - limits = resources.get("limits", {}) - requests = resources.get("requests", {}) - assert "nvidia.com/gpu" not in limits - assert "nvidia.com/gpu" not in requests - - def test_get_job_body_volumes(self, mock_k8s_clients): - e = KubeflowExecutor( - image="test:latest", - volumes=[{"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}}], - volume_mounts=[{"name": "data", "mountPath": "/data"}], - ) - body = e.get_job_body("vol-job", ["echo", "hi"]) - spec = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"] - assert spec["volumes"] == [ - {"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}} - ] - container = spec["containers"][0] - assert container["volumeMounts"] == [{"name": "data", "mountPath": "/data"}] - - def test_get_job_body_env_vars(self, mock_k8s_clients): - e = KubeflowExecutor( - image="test:latest", - env_vars={"MY_VAR": "hello", "OTHER": "world"}, - ) - body = e.get_job_body("env-job", ["echo"]) - container = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"]["containers"][ - 0 - ] - env_names = {item["name"]: item["value"] for item in container["env"]} - assert env_names["MY_VAR"] == "hello" - assert env_names["OTHER"] == "world" - - def test_get_job_body_labels_annotations(self, mock_k8s_clients): - e = KubeflowExecutor( - image="test:latest", - labels={"app": "my-app"}, - annotations={"note": "test"}, - ) - body = e.get_job_body("labeled-job", ["echo"]) - assert body["metadata"]["labels"] == {"app": "my-app"} - assert body["metadata"]["annotations"] == {"note": "test"} - pod_meta = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["metadata"] - assert pod_meta["labels"] == {"app": "my-app"} - - def test_get_job_body_image_pull_secrets(self, mock_k8s_clients): - e = KubeflowExecutor( - image="test:latest", - image_pull_secrets=["my-secret", "other-secret"], - ) - body = e.get_job_body("secret-job", ["echo"]) - pod_spec = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"] - assert pod_spec["imagePullSecrets"] == [ - {"name": "my-secret"}, - {"name": "other-secret"}, - ] - - def test_get_job_body_spec_kwargs(self, mock_k8s_clients): - e = KubeflowExecutor( - image="test:latest", - spec_kwargs={"elasticPolicy": {"maxRestarts": 3}}, - ) - body = e.get_job_body("spec-job", ["echo"]) - assert body["spec"]["elasticPolicy"] == {"maxRestarts": 3} - - def test_get_job_body_container_kwargs(self, mock_k8s_clients): - e = KubeflowExecutor( - image="test:latest", - container_kwargs={"securityContext": {"runAsUser": 1000}}, - ) - body = e.get_job_body("ckwargs-job", ["echo"]) - container = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"]["containers"][ - 0 - ] - assert container["securityContext"] == {"runAsUser": 1000} - - def test_get_job_body_artifact(self, mock_k8s_clients): - e = KubeflowExecutor( - image="nvcr.io/nvidia/nemo:26.02", - namespace="runai-nemo-ci", - num_nodes=3, - nprocs_per_node=8, - gpus_per_node=8, - cpu_requests="16", - memory_requests="64Gi", - volumes=[{"name": "model-cache", "persistentVolumeClaim": {"claimName": "my-pvc"}}], - volume_mounts=[{"name": "model-cache", "mountPath": "/nemo-workspace"}], - labels={"app": "nemo-ci-training"}, - ) - body = e.get_job_body("nemo-ci-training", ["/bin/bash", "-c", "echo hi"]) - - assert body["apiVersion"] == "kubeflow.org/v1" - assert body["kind"] == "PyTorchJob" - assert body["metadata"]["name"] == "nemo-ci-training" - assert body["metadata"]["namespace"] == "runai-nemo-ci" - spec = body["spec"] - assert spec["nprocPerNode"] == "8" - master = spec["pytorchReplicaSpecs"]["Master"] - worker = spec["pytorchReplicaSpecs"]["Worker"] - assert master["replicas"] == 1 - assert worker["replicas"] == 2 - for replica in [master, worker]: - container = replica["template"]["spec"]["containers"][0] - assert container["image"] == "nvcr.io/nvidia/nemo:26.02" - assert container["resources"]["limits"]["nvidia.com/gpu"] == "8" - assert container["resources"]["requests"]["cpu"] == "16" - assert container["resources"]["requests"]["memory"] == "64Gi" - # ── TrainJob manifest generation ───────────────────────────────────────────── def test_get_trainjob_body_structure(self, mock_k8s_clients): e = KubeflowExecutor( image="nvcr.io/nvidia/nemo:26.02", - job_kind="TrainJob", num_nodes=2, gpus_per_node=8, ) @@ -263,7 +117,6 @@ def test_get_trainjob_body_structure(self, mock_k8s_clients): def test_get_trainjob_body_resources(self, mock_k8s_clients): e = KubeflowExecutor( image="test:latest", - job_kind="TrainJob", gpus_per_node=4, cpu_requests="8", memory_requests="32Gi", @@ -277,21 +130,19 @@ def test_get_trainjob_body_resources(self, mock_k8s_clients): def test_get_trainjob_body_custom_runtime_ref(self, mock_k8s_clients): e = KubeflowExecutor( image="test:latest", - job_kind="TrainJob", runtime_ref="my-custom-runtime", ) body = e.get_job_body("rt-job", ["echo"]) assert body["spec"]["runtimeRef"] == {"name": "my-custom-runtime"} def test_get_trainjob_body_no_resources_when_no_gpu(self, mock_k8s_clients): - e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") + e = KubeflowExecutor(image="test:latest") body = e.get_job_body("cpu-job", ["echo"]) assert "resourcesPerNode" not in body["spec"]["trainer"] def test_get_trainjob_body_volumes_via_pod_template_overrides(self, mock_k8s_clients): e = KubeflowExecutor( image="test:latest", - job_kind="TrainJob", volumes=[{"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}}], volume_mounts=[{"name": "data", "mountPath": "/data"}], ) @@ -312,7 +163,6 @@ def test_get_trainjob_body_image_pull_secrets_via_pod_template_overrides( ): e = KubeflowExecutor( image="test:latest", - job_kind="TrainJob", image_pull_secrets=["my-secret"], ) body = e.get_job_body("secret-job", ["echo"]) @@ -320,14 +170,13 @@ def test_get_trainjob_body_image_pull_secrets_via_pod_template_overrides( assert pod_spec["imagePullSecrets"] == [{"name": "my-secret"}] def test_get_trainjob_body_no_overrides_when_no_volumes(self, mock_k8s_clients): - e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") + e = KubeflowExecutor(image="test:latest") body = e.get_job_body("plain-job", ["echo"]) assert "podTemplateOverrides" not in body["spec"] def test_get_trainjob_body_tolerations_and_affinity(self, mock_k8s_clients): e = KubeflowExecutor( image="test:latest", - job_kind="TrainJob", tolerations=[{"key": "nvidia.com/gpu", "operator": "Exists", "effect": "NoSchedule"}], affinity={"nodeAffinity": {"requiredDuringSchedulingIgnoredDuringExecution": {}}}, ) @@ -341,7 +190,6 @@ def test_get_trainjob_body_tolerations_and_affinity(self, mock_k8s_clients): def test_get_trainjob_body_env_list(self, mock_k8s_clients): e = KubeflowExecutor( image="test:latest", - job_kind="TrainJob", env_vars={"SIMPLE": "value"}, env_list=[ { @@ -359,7 +207,6 @@ def test_get_trainjob_body_env_list(self, mock_k8s_clients): def test_get_trainjob_body_pod_spec_overrides(self, mock_k8s_clients): e = KubeflowExecutor( image="test:latest", - job_kind="TrainJob", pod_spec_overrides={ "resourceClaims": [ {"name": "imex-channel", "resourceClaimTemplateName": "my-template"} @@ -375,7 +222,6 @@ def test_get_trainjob_body_all_overrides_in_single_entry(self, mock_k8s_clients) # must all land in ONE podTemplateOverrides entry, not multiple. e = KubeflowExecutor( image="test:latest", - job_kind="TrainJob", volumes=[{"name": "data", "emptyDir": {}}], tolerations=[{"key": "gpu", "operator": "Exists"}], image_pull_secrets=["my-secret"], @@ -390,34 +236,6 @@ def test_get_trainjob_body_all_overrides_in_single_entry(self, mock_k8s_clients) assert "imagePullSecrets" in pod_spec assert "resourceClaims" in pod_spec - def test_get_pytorchjob_body_tolerations_and_affinity(self, mock_k8s_clients): - e = KubeflowExecutor( - image="test:latest", - tolerations=[{"key": "nvidia.com/gpu", "operator": "Exists"}], - affinity={"nodeAffinity": {}}, - ) - body = e.get_job_body("tol-job", ["echo"]) - pod_spec = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"] - assert pod_spec["tolerations"] == [{"key": "nvidia.com/gpu", "operator": "Exists"}] - assert "nodeAffinity" in pod_spec["affinity"] - - def test_get_pytorchjob_body_env_list(self, mock_k8s_clients): - e = KubeflowExecutor( - image="test:latest", - env_list=[ - { - "name": "SECRET", - "valueFrom": {"secretKeyRef": {"name": "s", "key": "k"}}, - } - ], - ) - body = e.get_job_body("env-job", ["echo"]) - container = body["spec"]["pytorchReplicaSpecs"]["Master"]["template"]["spec"]["containers"][ - 0 - ] - env_by_name = {e["name"]: e for e in container["env"]} - assert "valueFrom" in env_by_name["SECRET"] - # ── Launch / status / cancel ───────────────────────────────────────────────── def test_launch_success(self, executor, mock_k8s_clients): @@ -433,8 +251,8 @@ def test_launch_wait_until_running(self, executor, mock_k8s_clients): mock_custom, _ = mock_k8s_clients mock_custom.create_namespaced_custom_object.return_value = {} mock_custom.get_namespaced_custom_object.side_effect = [ - {"status": {"conditions": [{"type": "Created", "status": "True"}]}}, - {"status": {"conditions": [{"type": "Running", "status": "True"}]}}, + {"status": {"jobsStatus": [{"active": 0, "ready": 0, "succeeded": 0, "failed": 0}]}}, + {"status": {"jobsStatus": [{"active": 2, "ready": 2, "succeeded": 0, "failed": 0}]}}, ] with patch("time.sleep"): @@ -447,7 +265,7 @@ def test_launch_wait_timeout(self, executor, mock_k8s_clients): mock_custom, _ = mock_k8s_clients mock_custom.create_namespaced_custom_object.return_value = {} mock_custom.get_namespaced_custom_object.return_value = { - "status": {"conditions": [{"type": "Created", "status": "True"}]} + "status": {"jobsStatus": [{"active": 0, "ready": 0, "succeeded": 0, "failed": 0}]} } with patch("time.sleep"): @@ -464,36 +282,21 @@ def test_launch_conflict(self, executor, mock_k8s_clients): def test_status_running(self, executor, mock_k8s_clients): mock_custom, _ = mock_k8s_clients mock_custom.get_namespaced_custom_object.return_value = { - "status": { - "conditions": [ - {"type": "Created", "status": "True"}, - {"type": "Running", "status": "True"}, - ] - } + "status": {"jobsStatus": [{"active": 2, "ready": 2, "succeeded": 0, "failed": 0}]} } assert executor.status("test-job") == KubeflowJobState.RUNNING def test_status_succeeded(self, executor, mock_k8s_clients): mock_custom, _ = mock_k8s_clients mock_custom.get_namespaced_custom_object.return_value = { - "status": { - "conditions": [ - {"type": "Running", "status": "False"}, - {"type": "Succeeded", "status": "True"}, - ] - } + "status": {"jobsStatus": [{"active": 0, "ready": 0, "succeeded": 3, "failed": 0}]} } assert executor.status("test-job") == KubeflowJobState.SUCCEEDED def test_status_failed(self, executor, mock_k8s_clients): mock_custom, _ = mock_k8s_clients mock_custom.get_namespaced_custom_object.return_value = { - "status": { - "conditions": [ - {"type": "Running", "status": "False"}, - {"type": "Failed", "status": "True"}, - ] - } + "status": {"jobsStatus": [{"active": 0, "ready": 0, "succeeded": 0, "failed": 1}]} } assert executor.status("test-job") == KubeflowJobState.FAILED @@ -553,7 +356,7 @@ def test_fetch_logs_no_follow(self, executor, mock_k8s_clients): assert "--tail" in called_cmd assert "50" in called_cmd label_arg = " ".join(called_cmd) - assert "training.kubeflow.org/job-name=my-job" in label_arg + assert "jobset.sigs.k8s.io/jobset-name=my-job" in label_arg assert "-f" not in called_cmd assert lines == ["line1", "line2"] @@ -572,45 +375,9 @@ def test_fetch_logs_follow(self, executor, mock_k8s_clients): assert "-f" in called_cmd assert lines == ["line1\n", "line2\n"] - def test_fetch_logs_trainjob_label_selector(self, mock_k8s_clients): - e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") - with patch("subprocess.run") as mock_run: - mock_run.return_value = MagicMock(stdout="") - list(e.fetch_logs("my-trainjob", stream=False)) - - called_cmd = mock_run.call_args[0][0] - label_arg = " ".join(called_cmd) - assert "jobset.sigs.k8s.io/jobset-name=my-trainjob" in label_arg - - # ── TrainJob status (jobsStatus-based) ──────────────────────────────────── - - def test_trainjob_status_running(self, mock_k8s_clients): - mock_custom, _ = mock_k8s_clients - e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") - mock_custom.get_namespaced_custom_object.return_value = { - "status": {"jobsStatus": [{"active": 2, "ready": 2, "succeeded": 0, "failed": 0}]} - } - assert e.status("test-job") == KubeflowJobState.RUNNING - - def test_trainjob_status_succeeded(self, mock_k8s_clients): + def test_status_unknown_when_empty(self, mock_k8s_clients): mock_custom, _ = mock_k8s_clients - e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") - mock_custom.get_namespaced_custom_object.return_value = { - "status": {"jobsStatus": [{"active": 0, "ready": 0, "succeeded": 3, "failed": 0}]} - } - assert e.status("test-job") == KubeflowJobState.SUCCEEDED - - def test_trainjob_status_failed(self, mock_k8s_clients): - mock_custom, _ = mock_k8s_clients - e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") - mock_custom.get_namespaced_custom_object.return_value = { - "status": {"jobsStatus": [{"active": 0, "ready": 0, "succeeded": 0, "failed": 1}]} - } - assert e.status("test-job") == KubeflowJobState.FAILED - - def test_trainjob_status_unknown_when_empty(self, mock_k8s_clients): - mock_custom, _ = mock_k8s_clients - e = KubeflowExecutor(image="test:latest", job_kind="TrainJob") + e = KubeflowExecutor(image="test:latest") mock_custom.get_namespaced_custom_object.return_value = {"status": {}} assert e.status("test-job") == KubeflowJobState.UNKNOWN @@ -784,7 +551,6 @@ def test_build_resources_with_cpu_and_memory_limits(self, mock_k8s_clients): def test_get_trainjob_body_labels_and_annotations(self, mock_k8s_clients): e = KubeflowExecutor( image="test:latest", - job_kind="TrainJob", labels={"team": "nemo"}, annotations={"owner": "ci"}, ) @@ -806,7 +572,7 @@ def test_launch_wait_exits_on_succeeded(self, executor, mock_k8s_clients): mock_custom, _ = mock_k8s_clients mock_custom.create_namespaced_custom_object.return_value = {} mock_custom.get_namespaced_custom_object.return_value = { - "status": {"conditions": [{"type": "Succeeded", "status": "True"}]} + "status": {"jobsStatus": [{"active": 0, "ready": 0, "succeeded": 3, "failed": 0}]} } with patch("time.sleep"): _, state = executor.launch("test-job", ["echo"], wait=True, timeout=30) @@ -816,7 +582,7 @@ def test_launch_wait_exits_on_failed(self, executor, mock_k8s_clients): mock_custom, _ = mock_k8s_clients mock_custom.create_namespaced_custom_object.return_value = {} mock_custom.get_namespaced_custom_object.return_value = { - "status": {"conditions": [{"type": "Failed", "status": "True"}]} + "status": {"jobsStatus": [{"active": 0, "ready": 0, "succeeded": 0, "failed": 1}]} } with patch("time.sleep"): _, state = executor.launch("test-job", ["echo"], wait=True, timeout=30) From e8b68afb995fa944b48e962f254ff8526a331465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 16 Mar 2026 17:21:29 +0000 Subject: [PATCH 09/16] fix: handle legacy job_kind field when deserializing persisted executor state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Persisted entries in ~/.nemo_run/.kubeflow_jobs.json written before PyTorchJob was removed still carry job_kind in their serialized Fiddle config. Strip it before fdl.build() to avoid a TypeError on status polling and log fetching for those old runs. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: oliver könig --- nemo_run/run/torchx_backend/schedulers/kubeflow.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nemo_run/run/torchx_backend/schedulers/kubeflow.py b/nemo_run/run/torchx_backend/schedulers/kubeflow.py index be6e9db8..99edabda 100644 --- a/nemo_run/run/torchx_backend/schedulers/kubeflow.py +++ b/nemo_run/run/torchx_backend/schedulers/kubeflow.py @@ -263,6 +263,11 @@ def _get_job_dirs() -> dict[str, dict[str, Any]]: for app in data.values(): try: cfg = serializer.deserialize(app["executor"]) + # Backwards compat: drop removed field job_kind (PyTorchJob was removed). + try: + del cfg.job_kind + except AttributeError: + pass app["executor"] = fdl.build(cfg) except Exception as e: log.debug("Failed to deserialize executor: %s", e) From d675e8dbcf8b41a80ffc1f59e2177299250974a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 16 Mar 2026 18:26:37 +0000 Subject: [PATCH 10/16] fix: also migrate legacy nproc_per_node field when deserializing executor state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Entries written before the nproc_per_node → nprocs_per_node rename still exist in ~/.nemo_run/.kubeflow_jobs.json. Migrate the value and drop the old key alongside the existing job_kind removal so both old field names are handled in one place. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: oliver könig --- nemo_run/run/torchx_backend/schedulers/kubeflow.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/nemo_run/run/torchx_backend/schedulers/kubeflow.py b/nemo_run/run/torchx_backend/schedulers/kubeflow.py index 99edabda..7afea619 100644 --- a/nemo_run/run/torchx_backend/schedulers/kubeflow.py +++ b/nemo_run/run/torchx_backend/schedulers/kubeflow.py @@ -263,9 +263,16 @@ def _get_job_dirs() -> dict[str, dict[str, Any]]: for app in data.values(): try: cfg = serializer.deserialize(app["executor"]) - # Backwards compat: drop removed field job_kind (PyTorchJob was removed). + # Backwards compat: drop/migrate fields removed or renamed in past versions. + for removed in ("job_kind",): + try: + delattr(cfg, removed) + except AttributeError: + pass + # nproc_per_node was renamed to nprocs_per_node; migrate if present. try: - del cfg.job_kind + cfg.nprocs_per_node = cfg.nproc_per_node + del cfg.nproc_per_node except AttributeError: pass app["executor"] = fdl.build(cfg) From 4e1a307ab0ce4a0a0fac6f5f3ddd1ac4b9c06a3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 16 Mar 2026 18:43:56 +0000 Subject: [PATCH 11/16] chore: remove backwards compat shims for job_kind and nproc_per_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: oliver könig --- nemo_run/run/torchx_backend/schedulers/kubeflow.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/nemo_run/run/torchx_backend/schedulers/kubeflow.py b/nemo_run/run/torchx_backend/schedulers/kubeflow.py index 7afea619..be6e9db8 100644 --- a/nemo_run/run/torchx_backend/schedulers/kubeflow.py +++ b/nemo_run/run/torchx_backend/schedulers/kubeflow.py @@ -263,18 +263,6 @@ def _get_job_dirs() -> dict[str, dict[str, Any]]: for app in data.values(): try: cfg = serializer.deserialize(app["executor"]) - # Backwards compat: drop/migrate fields removed or renamed in past versions. - for removed in ("job_kind",): - try: - delattr(cfg, removed) - except AttributeError: - pass - # nproc_per_node was renamed to nprocs_per_node; migrate if present. - try: - cfg.nprocs_per_node = cfg.nproc_per_node - del cfg.nproc_per_node - except AttributeError: - pass app["executor"] = fdl.build(cfg) except Exception as e: log.debug("Failed to deserialize executor: %s", e) From cdf90335a634b7de483e3b35e28b7ea2713caa45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 16 Mar 2026 20:14:40 +0000 Subject: [PATCH 12/16] fix: add --max-log-requests to kubectl logs for num_nodes > 5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kubectl enforces a default max of 5 concurrent log requests when using a label selector. Pass --max-log-requests=num_nodes so fetch_logs works correctly for larger jobs. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: oliver könig --- nemo_run/core/execution/kubeflow.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nemo_run/core/execution/kubeflow.py b/nemo_run/core/execution/kubeflow.py index decb8321..5196883c 100644 --- a/nemo_run/core/execution/kubeflow.py +++ b/nemo_run/core/execution/kubeflow.py @@ -337,6 +337,8 @@ def fetch_logs( self.namespace, "--tail", str(lines), + "--max-log-requests", + str(self.num_nodes), ] if stream: cmd.append("-f") From 495a7ad476124c17e29ec443b125d65133e0224f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Tue, 17 Mar 2026 21:31:15 +0000 Subject: [PATCH 13/16] comment on chmod MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- nemo_run/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo_run/config.py b/nemo_run/config.py index 8f20cc26..9c3c2b8e 100644 --- a/nemo_run/config.py +++ b/nemo_run/config.py @@ -495,6 +495,7 @@ def to_command( ) with open(filename, "w") as f: f.write("#!/usr/bin/bash\n" + inline_content) + # chmod with mimimal +x permissions os.chmod(filename, os.stat(filename).st_mode | 0o755) if is_local: From bc629c02f2f3b28da259fbcbe189680c5ae6bb26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Tue, 17 Mar 2026 21:35:50 +0000 Subject: [PATCH 14/16] typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- nemo_run/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_run/config.py b/nemo_run/config.py index 9c3c2b8e..a4babd9c 100644 --- a/nemo_run/config.py +++ b/nemo_run/config.py @@ -495,7 +495,7 @@ def to_command( ) with open(filename, "w") as f: f.write("#!/usr/bin/bash\n" + inline_content) - # chmod with mimimal +x permissions + # chmod with minimal +x permissions os.chmod(filename, os.stat(filename).st_mode | 0o755) if is_local: From 39523c439a68cd62fe78fde65a6b50e33d08e7b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 18 Mar 2026 08:35:40 +0000 Subject: [PATCH 15/16] fix: make fetch_logs streaming resilient to flaky kubectl exits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the brittle `lines_yielded > 0` and 10-minute deadline heuristics with `status()`-based termination: the retry loop now runs until the job reaches SUCCEEDED or FAILED, handling slow container pulls, mid-stream crashes, and transient network failures correctly. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: oliver könig --- nemo_run/core/execution/kubeflow.py | 23 ++++++++++++++--------- test/core/execution/test_kubeflow.py | 24 +++++++++++++++--------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/nemo_run/core/execution/kubeflow.py b/nemo_run/core/execution/kubeflow.py index 5196883c..28f40cf2 100644 --- a/nemo_run/core/execution/kubeflow.py +++ b/nemo_run/core/execution/kubeflow.py @@ -342,10 +342,9 @@ def fetch_logs( ] if stream: cmd.append("-f") - # Pods may not be running yet when the log thread starts. Retry - # kubectl logs -f until we get output (or 10 minutes pass). - deadline = time.time() + 600 - while time.time() < deadline: + # Retry kubectl logs -f until the job reaches a terminal state. + # This handles both pods not yet running and transient mid-stream failures. + while True: proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, bufsize=1 ) @@ -362,14 +361,20 @@ def fetch_logs( yield remaining break except Exception as e: - logger.error("Error streaming logs: %s", e) - break + logger.warning("Error streaming logs: %s; retrying", e) finally: proc.terminate() proc.wait(timeout=2) - if lines_yielded > 0: - break # kubectl exited after producing output — job done - time.sleep(5) # no pods running yet, retry + state = self.status(job_name) + if state in (KubeflowJobState.SUCCEEDED, KubeflowJobState.FAILED): + break # job reached a terminal state, stop streaming + logger.warning( + "kubectl logs exited (rc=%d, lines=%d, state=%s); retrying", + proc.returncode, + lines_yielded, + state, + ) + time.sleep(5) else: result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) yield from result.stdout.splitlines() diff --git a/test/core/execution/test_kubeflow.py b/test/core/execution/test_kubeflow.py index 9b313f95..af0866b2 100644 --- a/test/core/execution/test_kubeflow.py +++ b/test/core/execution/test_kubeflow.py @@ -588,45 +588,51 @@ def test_launch_wait_exits_on_failed(self, executor, mock_k8s_clients): _, state = executor.launch("test-job", ["echo"], wait=True, timeout=30) assert state == KubeflowJobState.FAILED - # ── fetch_logs streaming: retry when no lines yielded ──────────────────── + # ── fetch_logs streaming: retry until terminal state ───────────────────── - def test_fetch_logs_stream_retries_when_no_output_then_succeeds( + def test_fetch_logs_stream_retries_until_terminal_state( self, executor, mock_k8s_clients ): - """First Popen yields nothing; second yields a line — loop exits after output.""" + """First Popen yields nothing and job is RUNNING; second yields a line and job is + SUCCEEDED — loop exits on terminal status.""" import io empty_proc = MagicMock() empty_proc.stdout = io.StringIO("") empty_proc.poll.return_value = None + empty_proc.returncode = 1 output_proc = MagicMock() output_proc.stdout = io.StringIO("some output\n") output_proc.poll.return_value = None - - procs = [empty_proc, output_proc] + output_proc.returncode = 0 with ( - patch("subprocess.Popen", side_effect=procs), + patch("subprocess.Popen", side_effect=[empty_proc, output_proc]), patch("time.sleep"), + patch.object( + executor, + "status", + side_effect=[KubeflowJobState.RUNNING, KubeflowJobState.SUCCEEDED], + ), ): lines = list(executor.fetch_logs("my-job", stream=True)) assert "some output\n" in lines def test_fetch_logs_stream_handles_exception(self, executor, mock_k8s_clients): - """Exception inside the readline loop is caught; generator terminates cleanly.""" + """Exception inside the readline loop is caught; loop exits when job is terminal.""" mock_proc = MagicMock() - mock_proc.stdout.readline.side_effect = OSError("read error") mock_proc.poll.return_value = None + mock_proc.returncode = 1 with ( patch("subprocess.Popen", return_value=mock_proc), patch("time.sleep"), + patch.object(executor, "status", return_value=KubeflowJobState.FAILED), ): - # Should not raise; returns empty (error path) lines = list(executor.fetch_logs("my-job", stream=True)) assert lines == [] From 7474d3b202e3b4ccd7bec1608b8cbbf0e1e20e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 18 Mar 2026 09:44:21 +0000 Subject: [PATCH 16/16] chore: ruff format test_kubeflow.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: oliver könig --- test/core/execution/test_kubeflow.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/core/execution/test_kubeflow.py b/test/core/execution/test_kubeflow.py index af0866b2..eb406fa9 100644 --- a/test/core/execution/test_kubeflow.py +++ b/test/core/execution/test_kubeflow.py @@ -590,9 +590,7 @@ def test_launch_wait_exits_on_failed(self, executor, mock_k8s_clients): # ── fetch_logs streaming: retry until terminal state ───────────────────── - def test_fetch_logs_stream_retries_until_terminal_state( - self, executor, mock_k8s_clients - ): + def test_fetch_logs_stream_retries_until_terminal_state(self, executor, mock_k8s_clients): """First Popen yields nothing and job is RUNNING; second yields a line and job is SUCCEEDED — loop exits on terminal status.""" import io