diff --git a/docs/source/jobs.rst b/docs/source/jobs.rst index 97e23887..9129a8d9 100644 --- a/docs/source/jobs.rst +++ b/docs/source/jobs.rst @@ -25,6 +25,7 @@ instance to the :attr:`osekit.public.project.Project.job_builder` attribute: job_config = JobConfig( nb_nodes=1, # Number of nodes on which the job runs ncpus=28, # Number of total cores used per node + ngpus=1, # Number of total GPU used per node mem="60gb", # Maximum amount of physical memory used by the job walltime=Timedelta(hours=5), # Maximum amount of real itime during which the job can be running venv_name=os.environ["CONDA_DEFAULT_ENV"], # Works only for conda venvs diff --git a/src/osekit/utils/job.py b/src/osekit/utils/job.py index 778290ec..edb78931 100644 --- a/src/osekit/utils/job.py +++ b/src/osekit/utils/job.py @@ -46,6 +46,8 @@ class JobConfig: Number of nodes on which the job runs. ncpus: int Number of total cores used per node. + ngpus: int | None + Number of total GPU used per node. mem: str Maximum amount of physical memory used by the job. walltime: str | Timedelta @@ -59,9 +61,10 @@ class JobConfig: nb_nodes: int = 1 ncpus: int = 2 + ngpus: int | None = None mem: str = "8gb" walltime: str | Timedelta = "01:00:00" - venv_name: str = "osmose" + venv_name: str = "osekit" queue: Literal["omp", "mpi"] = "omp" @@ -97,6 +100,7 @@ def __init__( self.script_args = script_args if script_args else {} self.nb_nodes = config.nb_nodes self.ncpus = config.ncpus + self.ngpus = config.ngpus self.mem = config.mem self.walltime = config.walltime self.venv_name = config.venv_name @@ -144,6 +148,15 @@ def ncpus(self) -> int: def ncpus(self, ncpus: int) -> None: self._ncpus = ncpus + @property + def ngpus(self) -> int: + """Number of total GPU used per node.""" + return self._ngpus + + @ngpus.setter + def ngpus(self, ngpus: int) -> None: + self._ngpus = ngpus + @property def mem(self) -> str: """Maximum amount of physical memory used by the job.""" @@ -283,11 +296,21 @@ def write_pbs(self, path: Path) -> None: """ preamble = "#!/bin/bash" + + select_parts = { + "select": self.nb_nodes, + "ncpus": self.ncpus, + "mem": self.mem, + } + if self.ngpus is not None: + select_parts["ngpus"] = self.ngpus + select_str = ":".join(f"{k}={v}" for k, v in select_parts.items()) + request = { "-N": self.name, "-q": self.queue, "-l": [ - f"select={self.nb_nodes}:ncpus={self.ncpus}:mem={self.mem}", + select_str, f"walltime={self.walltime_str}", ], "-o": f"{self.output_folder}/{self.name}.out" diff --git a/tests/test_job.py b/tests/test_job.py index b94ef841..76633663 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -70,6 +70,7 @@ def test_properties_and_venv_activation() -> None: assert job.script_args == {"purple": "bottle"} assert job.nb_nodes == nb_nodes assert job.ncpus == ncpus + assert job.ngpus is None assert job.mem == "16gb" assert job.walltime == Timedelta(hours=2) assert job.venv_name == "merriweather" @@ -138,7 +139,7 @@ def test_write_pbs(tmp_path: Path) -> None: ) assert ( - ". /appli/anaconda/latest/etc/profile.d/conda.sh; conda activate osmose" + ". /appli/anaconda/latest/etc/profile.d/conda.sh; conda activate osekit" in content ) last = content[-1] @@ -152,6 +153,29 @@ def test_write_pbs(tmp_path: Path) -> None: assert job.status == JobStatus.PREPARED +def test_write_pbs_job_with_gpu(tmp_path: Path) -> None: + script = tmp_path / "deville.py" + script.write_text("print('cruella')") + output_dir = tmp_path / "output" + output_dir.mkdir() + + job = Job( + script_path=script, + script_args={"cruelle": "diablesse"}, + name="penny", + config=JobConfig(ngpus=2), + output_folder=output_dir, + ) + pbs_path = tmp_path / "patch.pbs" + job.write_pbs(pbs_path) + + content = pbs_path.read_text().splitlines() + assert any("select=1:ncpus=2:mem=8gb:ngpus=2" in line for line in content) + last = content[-1] + assert last.startswith(f"python {script}") + assert "--cruelle diablesse" in last + + def test_submit_pbs_without_write_raises() -> None: job = Job(Path("script.py")) with pytest.raises(