Skip to content

Commit b396284

Browse files
committed
random port initialization logic for single and multi, correcting the pre req for L2 distributed test
1 parent d92f040 commit b396284

File tree

2 files changed

+13
-4
lines changed

2 files changed

+13
-4
lines changed

.github/workflows/build-test-linux-x86_64.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ jobs:
536536
537537
L2-dynamo-distributed-tests:
538538
name: L2 dynamo distributed tests
539-
needs: [filter-matrix, build]
539+
needs: [filter-matrix, build, L1-dynamo-core-tests, L1-dynamo-compile-tests, L1-torch-compile-tests, L1-torchscript-tests]
540540
if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }}
541541
strategy:
542542
fail-fast: false

tests/py/dynamo/distributed/distributed_utils.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# the below two functions are used to set the environment variables for the pytest single and multi process
1313
# this is for the github CI where we use pytest
1414
def set_environment_variables_pytest_single_process():
15+
# Random port avoids conflicts when multiple single-process pytest sessions run in parallel. Useful for local cases
1516
port = 29500 + random.randint(1, 1000)
1617
os.environ["WORLD_SIZE"] = str(1)
1718
os.environ["RANK"] = str(0)
@@ -22,10 +23,18 @@ def set_environment_variables_pytest_single_process():
2223
def set_environment_variables_pytest_multi_process(
2324
rank: int = 0, world_size: int = 1
2425
) -> None:
25-
# Use existing MASTER_PORT if set, otherwise generate random one
26+
# Multi-process tests require MASTER_PORT to be set before mpirun
27+
# so all ranks connect to the same rendezvous point
28+
# CI uses a fixed port
2629
if "MASTER_PORT" not in os.environ:
27-
port = 29500 + random.randint(1, 1000)
28-
os.environ["MASTER_PORT"] = str(port)
30+
raise RuntimeError(
31+
"MASTER_PORT must be set before mpirun to ensure all ranks use the same port.\n"
32+
"\n"
33+
"For local testing (random port avoids 'Address already in use' errors):\n"
34+
" export MASTER_PORT=$((29500 + RANDOM % 1000))\n"
35+
" mpirun -n 2 python -m pytest distributed/test_nccl_ops.py\n"
36+
"\n"
37+
)
2938

3039
# these variables are set by mpirun -n 2
3140
local_rank = int(

0 commit comments

Comments
 (0)