Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/jobs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ instance to the :attr:`osekit.public.project.Project.job_builder` attribute:
job_config = JobConfig(
nb_nodes=1, # Number of nodes on which the job runs
ncpus=28, # Number of total cores used per node
ngpus=1, # Number of total GPU used per node
mem="60gb", # Maximum amount of physical memory used by the job
walltime=Timedelta(hours=5), # Maximum amount of real itime during which the job can be running
venv_name=os.environ["CONDA_DEFAULT_ENV"], # Works only for conda venvs
Expand Down
27 changes: 25 additions & 2 deletions src/osekit/utils/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class JobConfig:
Number of nodes on which the job runs.
ncpus: int
Number of total cores used per node.
ngpus: int | None
Number of total GPU used per node.
mem: str
Maximum amount of physical memory used by the job.
walltime: str | Timedelta
Expand All @@ -59,9 +61,10 @@ class JobConfig:

nb_nodes: int = 1
ncpus: int = 2
ngpus: int | None = None
mem: str = "8gb"
walltime: str | Timedelta = "01:00:00"
venv_name: str = "osmose"
venv_name: str = "osekit"
queue: Literal["omp", "mpi"] = "omp"


Expand Down Expand Up @@ -97,6 +100,7 @@ def __init__(
self.script_args = script_args if script_args else {}
self.nb_nodes = config.nb_nodes
self.ncpus = config.ncpus
self.ngpus = config.ngpus
self.mem = config.mem
self.walltime = config.walltime
self.venv_name = config.venv_name
Expand Down Expand Up @@ -144,6 +148,15 @@ def ncpus(self) -> int:
def ncpus(self, ncpus: int) -> None:
self._ncpus = ncpus

@property
def ngpus(self) -> int:
"""Number of total GPU used per node."""
return self._ngpus

@ngpus.setter
def ngpus(self, ngpus: int) -> None:
self._ngpus = ngpus

@property
def mem(self) -> str:
"""Maximum amount of physical memory used by the job."""
Expand Down Expand Up @@ -283,11 +296,21 @@ def write_pbs(self, path: Path) -> None:

"""
preamble = "#!/bin/bash"

select_parts = {
"select": self.nb_nodes,
"ncpus": self.ncpus,
"mem": self.mem,
}
if self.ngpus is not None:
select_parts["ngpus"] = self.ngpus
select_str = ":".join(f"{k}={v}" for k, v in select_parts.items())

request = {
"-N": self.name,
"-q": self.queue,
"-l": [
f"select={self.nb_nodes}:ncpus={self.ncpus}:mem={self.mem}",
select_str,
f"walltime={self.walltime_str}",
],
"-o": f"{self.output_folder}/{self.name}.out"
Expand Down
26 changes: 25 additions & 1 deletion tests/test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def test_properties_and_venv_activation() -> None:
assert job.script_args == {"purple": "bottle"}
assert job.nb_nodes == nb_nodes
assert job.ncpus == ncpus
assert job.ngpus is None
assert job.mem == "16gb"
assert job.walltime == Timedelta(hours=2)
assert job.venv_name == "merriweather"
Expand Down Expand Up @@ -138,7 +139,7 @@ def test_write_pbs(tmp_path: Path) -> None:
)

assert (
". /appli/anaconda/latest/etc/profile.d/conda.sh; conda activate osmose"
". /appli/anaconda/latest/etc/profile.d/conda.sh; conda activate osekit"
in content
)
last = content[-1]
Expand All @@ -152,6 +153,29 @@ def test_write_pbs(tmp_path: Path) -> None:
assert job.status == JobStatus.PREPARED


def test_write_pbs_job_with_gpu(tmp_path: Path) -> None:
script = tmp_path / "deville.py"
script.write_text("print('cruella')")
output_dir = tmp_path / "output"
output_dir.mkdir()

job = Job(
script_path=script,
script_args={"cruelle": "diablesse"},
name="penny",
config=JobConfig(ngpus=2),
output_folder=output_dir,
)
pbs_path = tmp_path / "patch.pbs"
job.write_pbs(pbs_path)

content = pbs_path.read_text().splitlines()
assert any("select=1:ncpus=2:mem=8gb:ngpus=2" in line for line in content)
last = content[-1]
assert last.startswith(f"python {script}")
assert "--cruelle diablesse" in last


def test_submit_pbs_without_write_raises() -> None:
job = Job(Path("script.py"))
with pytest.raises(
Expand Down