From c4a18b81167bb20a190a821e9796d6159a8ef7e4 Mon Sep 17 00:00:00 2001 From: zeke <40004347+KAJdev@users.noreply.github.com> Date: Wed, 11 Mar 2026 15:22:22 -0700 Subject: [PATCH 1/2] feat: add datacenter selection example and update volume examples --- .../02_datacenters/README.md | 76 +++++++++++++++++++ .../02_datacenters/cpu_worker.py | 29 +++++++ .../02_datacenters/gpu_worker.py | 39 ++++++++++ .../01_network_volumes/cpu_worker.py | 5 +- .../01_network_volumes/gpu_worker.py | 4 +- 5 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 04_scaling_performance/02_datacenters/README.md create mode 100644 04_scaling_performance/02_datacenters/cpu_worker.py create mode 100644 04_scaling_performance/02_datacenters/gpu_worker.py diff --git a/04_scaling_performance/02_datacenters/README.md b/04_scaling_performance/02_datacenters/README.md new file mode 100644 index 0000000..2ecb9c7 --- /dev/null +++ b/04_scaling_performance/02_datacenters/README.md @@ -0,0 +1,76 @@ +# 02_datacenters + +Pin endpoints to specific RunPod data centers for latency, compliance, or availability reasons. + +## Overview + +By default, endpoints deploy across all available data centers. The `datacenter` parameter restricts placement to one or more specific DCs. CPU endpoints are limited to a subset of DCs that support CPU serverless (see `CPU_DATACENTERS`). + +## Quick Start + +```bash +pip install -r requirements.txt +flash run +``` + +## What You'll Learn + +- How to pin a GPU endpoint to a single datacenter +- How to deploy across multiple datacenters +- How CPU datacenter restrictions work + +## Available Data Centers + +| ID | Location | +|----|----------| +| `US-GA-1` | US - Georgia | +| `US-KS-1` | US - Kansas | +| `US-TX-1` | US - Texas | +| `US-OR-1` | US - Oregon | +| `CA-MTL-1` | Canada - Montreal | +| `EU-NL-1` | Europe - Netherlands | +| `EU-CZ-1` | Europe - Czech Republic | +| `EU-RO-1` | Europe - Romania | +| `EU-NO-1` | Europe - Norway | +| `EU-SE-1` | Europe - Sweden | + +CPU endpoints support: `EU-RO-1`, `US-TX-1`, `EU-SE-1`. + +## Examples + +**Single datacenter:** + +```python +@Endpoint(name="us-worker", gpu=GpuGroup.ANY, datacenter=DataCenter.US_GA_1) +async def inference(data: dict) -> dict: + ... +``` + +**Multiple datacenters:** + +```python +@Endpoint( + name="global-worker", + gpu=GpuGroup.ANY, + datacenter=[DataCenter.US_GA_1, DataCenter.EU_RO_1], +) +async def inference(data: dict) -> dict: + ... +``` + +**No datacenter (default, all DCs):** + +```python +@Endpoint(name="anywhere", gpu=GpuGroup.ANY) +async def inference(data: dict) -> dict: + ... +``` + +## Project Structure + +``` +02_datacenters/ +├── gpu_worker.py # single-DC and multi-DC GPU endpoints +├── cpu_worker.py # CPU endpoint in a supported DC +└── README.md +``` diff --git a/04_scaling_performance/02_datacenters/cpu_worker.py b/04_scaling_performance/02_datacenters/cpu_worker.py new file mode 100644 index 0000000..142a3de --- /dev/null +++ b/04_scaling_performance/02_datacenters/cpu_worker.py @@ -0,0 +1,29 @@ +# cpu worker pinned to a cpu-supported datacenter. +# cpu endpoints are only available in a subset of datacenters +# (see CPU_DATACENTERS). selecting an unsupported DC raises an error. +# run with: flash run +from runpod_flash import Endpoint, DataCenter + +api = Endpoint( + name="04_02_cpu_eu", + cpu="cpu3c-2-4", + workers=(0, 2), + datacenter=DataCenter.EU_RO_1, +) + + +@api.post("/process") +async def process(data: dict) -> dict: + """CPU processing pinned to EU-RO-1.""" + return {"datacenter": "EU-RO-1", "result": data} + + +@api.get("/health") +async def health(): + return {"status": "ok"} + + +if __name__ == "__main__": + import asyncio + + print(asyncio.run(process({"text": "hello"}))) diff --git a/04_scaling_performance/02_datacenters/gpu_worker.py b/04_scaling_performance/02_datacenters/gpu_worker.py new file mode 100644 index 0000000..4723c77 --- /dev/null +++ b/04_scaling_performance/02_datacenters/gpu_worker.py @@ -0,0 +1,39 @@ +# gpu workers pinned to specific datacenters. +# run with: flash run +from runpod_flash import Endpoint, GpuGroup, DataCenter + + +# pin to a single datacenter +@Endpoint( + name="04_02_gpu_us", + gpu=GpuGroup.ANY, + workers=(0, 3), + datacenter=DataCenter.US_GA_1, +) +async def us_inference(payload: dict) -> dict: + """GPU inference pinned to US-GA-1.""" + return {"datacenter": "US-GA-1", "result": payload} + + +# deploy across multiple datacenters for broader availability +@Endpoint( + name="04_02_gpu_multi", + gpu=GpuGroup.ANY, + workers=(0, 3), + datacenter=[DataCenter.US_GA_1, DataCenter.EU_RO_1], +) +async def multi_dc_inference(payload: dict) -> dict: + """GPU inference available in US-GA-1 and EU-RO-1.""" + return {"result": payload} + + +if __name__ == "__main__": + import asyncio + + async def test(): + print("=== US datacenter ===") + print(await us_inference({"prompt": "hello"})) + print("\n=== Multi-DC ===") + print(await multi_dc_inference({"prompt": "hello"})) + + asyncio.run(test()) diff --git a/05_data_workflows/01_network_volumes/cpu_worker.py b/05_data_workflows/01_network_volumes/cpu_worker.py index 5d1dad4..07d81db 100644 --- a/05_data_workflows/01_network_volumes/cpu_worker.py +++ b/05_data_workflows/01_network_volumes/cpu_worker.py @@ -1,11 +1,13 @@ # cpu worker with network volume for listing and serving generated images. # run with: flash run # test directly: python cpu_worker.py -from runpod_flash import Endpoint, NetworkVolume +from runpod_flash import Endpoint, DataCenter, NetworkVolume +# same volume as gpu_worker.py -- must match name and datacenter volume = NetworkVolume( name="flash-05-volume", size=50, + dataCenterId=DataCenter.EU_RO_1, ) api = Endpoint( @@ -13,6 +15,7 @@ cpu="cpu3c-1-2", workers=(1, 3), idle_timeout=120, + datacenter=DataCenter.EU_RO_1, volume=volume, ) diff --git a/05_data_workflows/01_network_volumes/gpu_worker.py b/05_data_workflows/01_network_volumes/gpu_worker.py index fd4c7b2..7a2de17 100644 --- a/05_data_workflows/01_network_volumes/gpu_worker.py +++ b/05_data_workflows/01_network_volumes/gpu_worker.py @@ -3,7 +3,7 @@ # test directly: python gpu_worker.py import logging -from runpod_flash import Endpoint, GpuType, NetworkVolume +from runpod_flash import Endpoint, GpuType, DataCenter, NetworkVolume logger = logging.getLogger(__name__) @@ -12,6 +12,7 @@ volume = NetworkVolume( name="flash-05-volume", size=50, + dataCenterId=DataCenter.EU_RO_1, ) @@ -20,6 +21,7 @@ gpu=GpuType.NVIDIA_GEFORCE_RTX_5090, workers=(0, 3), idle_timeout=300, + datacenter=DataCenter.EU_RO_1, volume=volume, env={"HF_HUB_CACHE": MODEL_PATH, "MODEL_PATH": MODEL_PATH}, dependencies=["torch", "diffusers", "transformers", "accelerate"], From 537855c862882784e73ca941555a3182d5b5c0de Mon Sep 17 00:00:00 2001 From: zeke <40004347+KAJdev@users.noreply.github.com> Date: Wed, 11 Mar 2026 15:29:14 -0700 Subject: [PATCH 2/2] fix: use datacenter= alias on NetworkVolume --- 05_data_workflows/01_network_volumes/cpu_worker.py | 2 +- 05_data_workflows/01_network_volumes/gpu_worker.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/05_data_workflows/01_network_volumes/cpu_worker.py b/05_data_workflows/01_network_volumes/cpu_worker.py index 07d81db..d9e0408 100644 --- a/05_data_workflows/01_network_volumes/cpu_worker.py +++ b/05_data_workflows/01_network_volumes/cpu_worker.py @@ -7,7 +7,7 @@ volume = NetworkVolume( name="flash-05-volume", size=50, - dataCenterId=DataCenter.EU_RO_1, + datacenter=DataCenter.EU_RO_1, ) api = Endpoint( diff --git a/05_data_workflows/01_network_volumes/gpu_worker.py b/05_data_workflows/01_network_volumes/gpu_worker.py index 7a2de17..34fb339 100644 --- a/05_data_workflows/01_network_volumes/gpu_worker.py +++ b/05_data_workflows/01_network_volumes/gpu_worker.py @@ -12,7 +12,7 @@ volume = NetworkVolume( name="flash-05-volume", size=50, - dataCenterId=DataCenter.EU_RO_1, + datacenter=DataCenter.EU_RO_1, )