From 29c250b9abd7a9b22997c5f587a4c8376222313f Mon Sep 17 00:00:00 2001 From: one Date: Fri, 27 Mar 2026 10:32:08 +0800 Subject: [PATCH 1/4] Runner: make MPI bind-to configurable --- superbench/runner/runner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/superbench/runner/runner.py b/superbench/runner/runner.py index 5787274c7..4d836b195 100644 --- a/superbench/runner/runner.py +++ b/superbench/runner/runner.py @@ -91,6 +91,8 @@ def __validate_sb_config(self): # noqa: C901 'btl_tcp_if_exclude': 'lo,docker0', 'coll_hcoll_enable': 0, } + if 'bind_to' not in mode: + self._sb_benchmarks[name].modes[idx].bind_to = 'numa' for key in ['PATH', 'LD_LIBRARY_PATH', 'SB_MICRO_PATH', 'SB_WORKSPACE']: self._sb_benchmarks[name].modes[idx].env.setdefault(key, None) if 'pattern' in mode: @@ -182,13 +184,14 @@ def __get_mode_command(self, benchmark_name, mode, timeout=None): '-tag-output ' # tag mpi output with [jobid,rank] prefix '-allow-run-as-root ' # allow mpirun to run when executed by root user '{host_list} ' # use prepared hostfile or specify nodes and launch {proc_num} processes on each node - '-bind-to numa ' # bind processes to numa + '-bind-to {bind_to} ' # bind processes according to mode config '{mca_list} {env_list} {command}' ).format( trace=trace_command, host_list=f'-host localhost:{mode.proc_num}' if 'node_num' in mode and mode.node_num == 1 else f'-hostfile hostfile -map-by ppr:{mode.proc_num}:node' if 'host_list' not in mode else '-host ' + ','.join(f'{host}:{mode.proc_num}' for host in mode.host_list), + bind_to=mode.bind_to, mca_list=' '.join(f'-mca {k} {v}' for k, v in mode.mca.items()), env_list=' '.join( f'-x {k}={str(v).format(proc_rank=mode.proc_rank, proc_num=mode.proc_num)}' From a2e59d1d3eb156cf6235c5c370354e0e2e7c2f86 Mon Sep 17 00:00:00 2001 From: one Date: Fri, 27 Mar 2026 10:40:55 +0800 Subject: [PATCH 2/4] Runner: update docs --- docs/superbench-config.mdx | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/superbench-config.mdx b/docs/superbench-config.mdx index feebef6d6..2d3454ee6 100644 --- a/docs/superbench-config.mdx +++ b/docs/superbench-config.mdx @@ -370,6 +370,7 @@ proc_num: int node_num: int env: dict mca: dict +bind_to: string prefix: str parallel: bool ``` @@ -403,6 +404,7 @@ Some attributes may only be suitable for specific mode. | `prefix` | ✓ | ✘ | ✘ | | `env` | ✓ | ✓ | ✓ | | `mca` | ✘ | ✘ | ✓ | +| `bind_to` | ✘ | ✘ | ✓ | | `parallel` | ✓ | ✘ | ✘ | | `pattern` | ✘ | ✘ | ✓ | @@ -452,6 +454,16 @@ MCA (Modular Component Architecture) frameworks, components, or modules to use i in a flatten key-value dictionary. Only available for `mpi` mode. +### `bind_to` + +Process binding policy passed to `mpirun -bind-to`. +Only available for `mpi` mode. + +Use this option when a benchmark needs to override the runner's default MPI binding behavior, +for example when the benchmark implements its own topology-aware CPU/NUMA affinity logic. + +* default value: `numa` + ### `parallel` Whether run benchmarks in parallel (all ranks at the same time) or in sequence (one rank at a time). From 9304f0dd1a330352f3255083fb603445b114ea13 Mon Sep 17 00:00:00 2001 From: one Date: Sat, 18 Apr 2026 10:55:18 +0800 Subject: [PATCH 3/4] Runner: validate MPI bind-to option and cover configurable bind-to in tests --- superbench/runner/runner.py | 10 +++++++ tests/runner/test_runner.py | 59 +++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/superbench/runner/runner.py b/superbench/runner/runner.py index 777f1b5b3..be9d340fc 100644 --- a/superbench/runner/runner.py +++ b/superbench/runner/runner.py @@ -60,6 +60,14 @@ def __set_logger(self, filename): """ SuperBenchLogger.add_handler(logger.logger, filename=str(self._output_path / filename)) + def __validate_mpi_bind_to(self, bind_to): + """Validate mpi bind_to option.""" + valid_mpi_bind_to = {'slot', 'hwthread', 'core', 'l1cache', 'l2cache', 'l3cache', 'package', 'numa', 'none'} + if bind_to not in valid_mpi_bind_to: + raise ValueError( + 'Invalid bind_to value {}. Must be one of: {}'.format(bind_to, sorted(valid_mpi_bind_to)) + ) + def __validate_sb_config(self): # noqa: C901 """Validate SuperBench config object. @@ -93,6 +101,8 @@ def __validate_sb_config(self): # noqa: C901 } if 'bind_to' not in mode: self._sb_benchmarks[name].modes[idx].bind_to = 'numa' + else: + self.__validate_mpi_bind_to(mode.bind_to) for key in ['PATH', 'LD_LIBRARY_PATH', 'SB_MICRO_PATH', 'SB_WORKSPACE']: self._sb_benchmarks[name].modes[idx].env.setdefault(key, None) if 'pattern' in mode: diff --git a/tests/runner/test_runner.py b/tests/runner/test_runner.py index fd45ae0a8..81a3d44c9 100644 --- a/tests/runner/test_runner.py +++ b/tests/runner/test_runner.py @@ -56,6 +56,8 @@ def test_validate_sb_config(self): self.assertIn('proc_num', mode) if mode.name == 'mpi': self.assertIn('mca', mode) + self.assertIn('bind_to', mode) + self.assertEqual('numa', mode.bind_to) def test_get_failure_count(self): """Test get_failure_count.""" @@ -153,6 +155,7 @@ def test_get_mode_command(self): 'name': 'mpi', 'proc_num': 8, 'proc_rank': 1, + 'bind_to': 'numa', 'mca': {}, 'env': { 'PATH': None, @@ -172,6 +175,7 @@ def test_get_mode_command(self): 'name': 'mpi', 'proc_num': 8, 'proc_rank': 2, + 'bind_to': 'numa', 'mca': { 'coll_hcoll_enable': 0, }, @@ -196,6 +200,7 @@ def test_get_mode_command(self): 'node_num': 1, 'proc_num': 8, 'proc_rank': 2, + 'bind_to': 'numa', 'mca': { 'coll_hcoll_enable': 0, }, @@ -219,6 +224,7 @@ def test_get_mode_command(self): 'name': 'mpi', 'proc_num': 8, 'proc_rank': 1, + 'bind_to': 'numa', 'mca': {}, 'pattern': { 'type': 'all-nodes', @@ -234,6 +240,44 @@ def test_get_mode_command(self): f'sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo' ), }, + { + 'benchmark_name': + 'foo', + 'mode': { + 'name': 'mpi', + 'proc_num': 8, + 'proc_rank': 0, + 'bind_to': 'core', + 'mca': {}, + 'env': { + 'PATH': None, + }, + }, + 'expected_command': ( + 'mpirun -tag-output -allow-run-as-root -hostfile hostfile -map-by ppr:8:node -bind-to core ' + ' -x PATH ' + f'sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo' + ), + }, + { + 'benchmark_name': + 'foo', + 'mode': { + 'name': 'mpi', + 'proc_num': 8, + 'proc_rank': 0, + 'bind_to': 'none', + 'mca': {}, + 'env': { + 'PATH': None, + }, + }, + 'expected_command': ( + 'mpirun -tag-output -allow-run-as-root -hostfile hostfile -map-by ppr:8:node -bind-to none ' + ' -x PATH ' + f'sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo' + ), + }, ] for test_case in test_cases: @@ -264,6 +308,21 @@ def test_get_mode_command(self): ), expected_command ) + def test_validate_sb_config_invalid_mpi_bind_to(self): + """Test validate_sb_config rejects unsupported mpi bind_to values.""" + test_config_file = Path(__file__).parent / '../../tests/data/test.yaml' + with test_config_file.open() as fp: + invalid_config = OmegaConf.create(yaml.load(fp, Loader=yaml.SafeLoader)) + invalid_config.superbench.benchmarks['nccl-bw:all-nodes'].modes[0].bind_to = 'socket' + + with self.assertRaisesRegex(ValueError, 'Invalid bind_to value'): + SuperBenchRunner( + invalid_config, + OmegaConf.create({}), + OmegaConf.create({}), + self.sb_output_dir, + ) + def test_run_empty_benchmarks(self): """Test run empty benchmarks, nothing should happen.""" self.runner._sb_enabled_benchmarks = [] From 13604f8674492653ec56d37ffd0f06b2dcf1b208 Mon Sep 17 00:00:00 2001 From: one Date: Sat, 18 Apr 2026 11:06:04 +0800 Subject: [PATCH 4/4] Format python code --- superbench/runner/runner.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/superbench/runner/runner.py b/superbench/runner/runner.py index be9d340fc..afa7c5f7f 100644 --- a/superbench/runner/runner.py +++ b/superbench/runner/runner.py @@ -64,9 +64,7 @@ def __validate_mpi_bind_to(self, bind_to): """Validate mpi bind_to option.""" valid_mpi_bind_to = {'slot', 'hwthread', 'core', 'l1cache', 'l2cache', 'l3cache', 'package', 'numa', 'none'} if bind_to not in valid_mpi_bind_to: - raise ValueError( - 'Invalid bind_to value {}. Must be one of: {}'.format(bind_to, sorted(valid_mpi_bind_to)) - ) + raise ValueError('Invalid bind_to value {}. Must be one of: {}'.format(bind_to, sorted(valid_mpi_bind_to))) def __validate_sb_config(self): # noqa: C901 """Validate SuperBench config object.