From 93e231bfcaa2c5cfc8650a0d1c2c83304eebaee0 Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Wed, 27 Aug 2025 18:20:35 -0700 Subject: [PATCH 01/10] add main content Signed-off-by: Aydin Abiar --- 05_deploy_deepseek_r1/Dockerfile | 12 +++++ 05_deploy_deepseek_r1/README.md | 58 ++++++++++++++++++++++ 05_deploy_deepseek_r1/query.py | 26 ++++++++++ 05_deploy_deepseek_r1/serve_deepseek_r1.py | 27 ++++++++++ 05_deploy_deepseek_r1/service.yaml | 41 +++++++++++++++ 5 files changed, 164 insertions(+) create mode 100644 05_deploy_deepseek_r1/Dockerfile create mode 100644 05_deploy_deepseek_r1/README.md create mode 100644 05_deploy_deepseek_r1/query.py create mode 100644 05_deploy_deepseek_r1/serve_deepseek_r1.py create mode 100644 05_deploy_deepseek_r1/service.yaml diff --git a/05_deploy_deepseek_r1/Dockerfile b/05_deploy_deepseek_r1/Dockerfile new file mode 100644 index 0000000..f53b5a0 --- /dev/null +++ b/05_deploy_deepseek_r1/Dockerfile @@ -0,0 +1,12 @@ +FROM anyscale/ray:2.48.0-slim-py312-cu128 + +# C compiler for Triton’s runtime build step (vLLM V1 engine) +# https://github.com/vllm-project/vllm/issues/2997 +RUN sudo apt-get update && \ + sudo apt-get install -y --no-install-recommends build-essential + +RUN curl -LsSf https://astral.sh/uv/install.sh | sh + +RUN uv pip install --system vllm==0.9.2 +# Avoid https://github.com/vllm-project/vllm-ascend/issues/2046 with transformers < 4.54.0 +RUN uv pip install --system transformers==4.53.3 diff --git a/05_deploy_deepseek_r1/README.md b/05_deploy_deepseek_r1/README.md new file mode 100644 index 0000000..9f9b742 --- /dev/null +++ b/05_deploy_deepseek_r1/README.md @@ -0,0 +1,58 @@ +--- +description: "Deploy DeepSeek R1 with Ray Serve LLM." +--- + +# Deploy DeepSeek R1 + +This example uses Ray Serve along with vLLM to deploy a DeepSeek R1 model as an Anyscale service. + +## Install the Anyscale CLI + +```bash +pip install -U anyscale +anyscale login +``` + +## Deploy the service + +Clone the example from GitHub. + +```bash +git clone https://github.com/anyscale/examples.git +cd examples/05_deploy_deepseek_r1 +``` + +Deploy the service. +```bash +export HF_TOKEN=*** +anyscale service deploy -f service.yaml +``` + +## Understanding the example + +- The [application code](https://github.com/anyscale/examples/blob/main/05_deploy_deepseek_r1/serve_deepseek_r1.py) sets the required accelerator type with `accelerator_type="H100"`. To use a different accelerator, replace `"H100"` with the desired name. See the [list of supported accelerators](https://docs.ray.io/en/latest/ray-core/accelerator-types.html#accelerator-types) for available options. +- Ray Serve automatically autoscales the number of model replicas between `min_replicas` and `max_replicas`. Ray Serve adapts the number of replicas by monitoring queue sizes. For more information on configuring autoscaling, see the [AutoscalingConfig documentation](https://docs.ray.io/en/latest/serve/api/doc/ray.serve.config.AutoscalingConfig.html). +- This example uses vLLM, and the [Dockerfile](https://github.com/anyscale/examples/blob/main/05_deploy_deepseek_r1/Dockerfile) defines the service’s dependencies. When you run `anyscale service deploy`, the build process adds these dependencies on top of an Anyscale-provided base image. +- To configure vLLM, modify the `engine_kwargs` dictionary. See [Ray documentation for the `LLMConfig` object](https://docs.ray.io/en/latest/serve/api/doc/ray.serve.llm.LLMConfig.html#ray.serve.llm.LLMConfig). + + +## Query the service + +The `anyscale service deploy` command outputs a line that looks like +```text +curl -H "Authorization: Bearer " +``` + +From the output, you can extract the service token and base URL. Open [query.py](https://github.com/anyscale/examples/blob/main/05_deploy_deepseek_r1/query.py) and add them to the appropriate fields. +```python +token = +base_url = +``` + +Query the model +```bash +pip install openai +python query.py +``` + +View the service in the [services tab](https://console.anyscale.com/services) of the Anyscale console. \ No newline at end of file diff --git a/05_deploy_deepseek_r1/query.py b/05_deploy_deepseek_r1/query.py new file mode 100644 index 0000000..518c357 --- /dev/null +++ b/05_deploy_deepseek_r1/query.py @@ -0,0 +1,26 @@ +from urllib.parse import urljoin +from openai import OpenAI + +# The "anyscale service deploy" script outputs a line that looks like +# +# curl -H "Authorization: Bearer " +# +# From this, you can parse out the service token and base URL. +token = # Fill this in. If deploying and querying locally, use token = "FAKE_KEY" +base_url = # Fill this in. If deploying and querying locally, use base_url = "http://localhost:8000" + +client = OpenAI(base_url= urljoin(base_url, "v1"), api_key=token) + +response = client.chat.completions.create( + model="my-deepseek-r1", + messages=[ + {"role": "user", "content": "What's the capital of France?"} + ], + stream=True +) + +# Stream and print JSON +for chunk in response: + data = chunk.choices[0].delta.content + if data: + print(data, end="", flush=True) \ No newline at end of file diff --git a/05_deploy_deepseek_r1/serve_deepseek_r1.py b/05_deploy_deepseek_r1/serve_deepseek_r1.py new file mode 100644 index 0000000..1a9852e --- /dev/null +++ b/05_deploy_deepseek_r1/serve_deepseek_r1.py @@ -0,0 +1,27 @@ +#serve_deepseek_r1.py +from ray import serve +from ray.serve.llm import LLMConfig, build_openai_app + +llm_config = LLMConfig( + model_loading_config=dict( + model_id="my-deepseek-r1", + model_source="deepseek-ai/DeepSeek-R1", + ), + accelerator_type="H100", + deployment_config=dict( + autoscaling_config=dict( + min_replicas=1, max_replicas=1, + ) + ), + engine_kwargs=dict( + max_model_len=16384, + # Split weights among 8 GPUs in each node + tensor_parallel_size=8, + pipeline_parallel_size=2 + ) +) + +app = build_openai_app({"llm_configs": [llm_config]}) + +# Uncomment the below line to run the service locally with Python. +# serve.run(app, blocking=True) diff --git a/05_deploy_deepseek_r1/service.yaml b/05_deploy_deepseek_r1/service.yaml new file mode 100644 index 0000000..f6d5fdb --- /dev/null +++ b/05_deploy_deepseek_r1/service.yaml @@ -0,0 +1,41 @@ +# View the docs https://docs.anyscale.com/reference/service-api#serviceconfig. + +name: deploy-deepseek-r1 + +# When empty, use the default image. This can be an Anyscale-provided base image +# like anyscale/ray:2.43.0-slim-py312-cu125, a user-provided base image (provided +# that it meets certain specs), or you can build new images using the Anyscale +# image builder at https://console.anyscale-staging.com/v2/container-images. + +containerfile: ./Dockerfile + +# When empty, Anyscale will auto-select the instance types. You can also specify +# minimum and maximum resources. +compute_config: +# head_node: +# instance_type: m5.2xlarge +# worker_nodes: +# - instance_type: m5.16xlarge +# min_nodes: 0 +# max_nodes: 100 +# - instance_type: m7a.24xlarge +# min_nodes: 0 +# max_nodes: 100 +# market_type: PREFER_SPOT # Defaults to ON_DEMAND +# - instance_type: g4dn.2xlarge +# min_nodes: 0 +# max_nodes: 100 +# market_type: PREFER_SPOT # Defaults to ON_DEMAND + auto_select_worker_config: true + +# Path to a local directory or a remote URI to a .zip file (S3, GS, HTTP) that +# will be the working directory for the job. The files in the directory will be +# automatically uploaded to the job environment in Anyscale. +working_dir: . + +# When empty, this uses the default Anyscale Cloud in your organization. +cloud: + +# Specify the Ray Serve app to deploy. +applications: +- import_path: serve_deepseek_r1:app \ No newline at end of file From 7553c7d37df2d7dade036c76a28f4927a65c7a58 Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Sun, 31 Aug 2025 21:10:55 -0700 Subject: [PATCH 02/10] dockerfile with ray 2.49 Signed-off-by: Aydin Abiar --- 05_deploy_deepseek_r1/Dockerfile | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/05_deploy_deepseek_r1/Dockerfile b/05_deploy_deepseek_r1/Dockerfile index f53b5a0..a241239 100644 --- a/05_deploy_deepseek_r1/Dockerfile +++ b/05_deploy_deepseek_r1/Dockerfile @@ -1,12 +1,8 @@ -FROM anyscale/ray:2.48.0-slim-py312-cu128 +FROM anyscale/ray:2.49.0-slim-py312-cu128 # C compiler for Triton’s runtime build step (vLLM V1 engine) # https://github.com/vllm-project/vllm/issues/2997 RUN sudo apt-get update && \ sudo apt-get install -y --no-install-recommends build-essential -RUN curl -LsSf https://astral.sh/uv/install.sh | sh - -RUN uv pip install --system vllm==0.9.2 -# Avoid https://github.com/vllm-project/vllm-ascend/issues/2046 with transformers < 4.54.0 -RUN uv pip install --system transformers==4.53.3 +RUN pip install vllm==0.10.0 \ No newline at end of file From e3c621af2eb8dfda68520a5dc4e49c8b8d870a2b Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Sun, 31 Aug 2025 21:11:13 -0700 Subject: [PATCH 03/10] remove export HF_token command Signed-off-by: Aydin Abiar --- 05_deploy_deepseek_r1/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/05_deploy_deepseek_r1/README.md b/05_deploy_deepseek_r1/README.md index 9f9b742..41e12c8 100644 --- a/05_deploy_deepseek_r1/README.md +++ b/05_deploy_deepseek_r1/README.md @@ -24,7 +24,6 @@ cd examples/05_deploy_deepseek_r1 Deploy the service. ```bash -export HF_TOKEN=*** anyscale service deploy -f service.yaml ``` From 2666aec894bfa516fe482ddf68df2abed7a77bad Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Mon, 1 Sep 2025 13:56:59 -0700 Subject: [PATCH 04/10] add terminate command Signed-off-by: Aydin Abiar --- 05_deploy_deepseek_r1/README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/05_deploy_deepseek_r1/README.md b/05_deploy_deepseek_r1/README.md index 41e12c8..f22970b 100644 --- a/05_deploy_deepseek_r1/README.md +++ b/05_deploy_deepseek_r1/README.md @@ -54,4 +54,11 @@ pip install openai python query.py ``` -View the service in the [services tab](https://console.anyscale.com/services) of the Anyscale console. \ No newline at end of file +View the service in the [services tab](https://console.anyscale.com/services) of the Anyscale console. + +## Shutdown + +Shutdown your Anyscale Service: +```bash +anyscale service terminate -n deploy-deepseek-r1 +``` \ No newline at end of file From 306e861559282e3ad966a04b0d99d0b6988d1a46 Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Mon, 1 Sep 2025 13:57:20 -0700 Subject: [PATCH 05/10] stream response now Signed-off-by: Aydin Abiar --- 05_deploy_deepseek_r1/query.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/05_deploy_deepseek_r1/query.py b/05_deploy_deepseek_r1/query.py index 518c357..2034e60 100644 --- a/05_deploy_deepseek_r1/query.py +++ b/05_deploy_deepseek_r1/query.py @@ -21,6 +21,13 @@ # Stream and print JSON for chunk in response: - data = chunk.choices[0].delta.content - if data: - print(data, end="", flush=True) \ No newline at end of file + # Stream reasoning content + if hasattr(chunk.choices[0].delta, "reasoning_content"): + data_reasoning = chunk.choices[0].delta.reasoning_content + if data_reasoning: + print(data_reasoning, end="", flush=True) + # Later, stream the final answer + if hasattr(chunk.choices[0].delta, "content"): + data_content = chunk.choices[0].delta.content + if data_content: + print(data_content, end="", flush=True) \ No newline at end of file From 25b24e42780df024571cd80d2c354a5d13d9f5b4 Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Mon, 1 Sep 2025 13:57:35 -0700 Subject: [PATCH 06/10] add reasoning parser Signed-off-by: Aydin Abiar --- 05_deploy_deepseek_r1/serve_deepseek_r1.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/05_deploy_deepseek_r1/serve_deepseek_r1.py b/05_deploy_deepseek_r1/serve_deepseek_r1.py index 1a9852e..ad67735 100644 --- a/05_deploy_deepseek_r1/serve_deepseek_r1.py +++ b/05_deploy_deepseek_r1/serve_deepseek_r1.py @@ -17,8 +17,9 @@ max_model_len=16384, # Split weights among 8 GPUs in each node tensor_parallel_size=8, - pipeline_parallel_size=2 - ) + pipeline_parallel_size=2, + reasoning_parser="deepseek_r1", + ), ) app = build_openai_app({"llm_configs": [llm_config]}) From 0cb4f969cb6a902dd9b8386a8fde2517809510ae Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Fri, 26 Sep 2025 17:48:09 -0700 Subject: [PATCH 07/10] update to be consistent with anyscale template Signed-off-by: Aydin Abiar --- 05_deploy_deepseek_r1/README.md | 6 +----- 05_deploy_deepseek_r1/serve_deepseek_r1.py | 11 +++++++---- 05_deploy_deepseek_r1/service.yaml | 18 ++++++++++++++++++ 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/05_deploy_deepseek_r1/README.md b/05_deploy_deepseek_r1/README.md index f22970b..a57160f 100644 --- a/05_deploy_deepseek_r1/README.md +++ b/05_deploy_deepseek_r1/README.md @@ -1,7 +1,3 @@ ---- -description: "Deploy DeepSeek R1 with Ray Serve LLM." ---- - # Deploy DeepSeek R1 This example uses Ray Serve along with vLLM to deploy a DeepSeek R1 model as an Anyscale service. @@ -60,5 +56,5 @@ View the service in the [services tab](https://console.anyscale.com/services) of Shutdown your Anyscale Service: ```bash -anyscale service terminate -n deploy-deepseek-r1 +anyscale service terminate -n deploy-llama-3-1-70b ``` \ No newline at end of file diff --git a/05_deploy_deepseek_r1/serve_deepseek_r1.py b/05_deploy_deepseek_r1/serve_deepseek_r1.py index ad67735..7057331 100644 --- a/05_deploy_deepseek_r1/serve_deepseek_r1.py +++ b/05_deploy_deepseek_r1/serve_deepseek_r1.py @@ -1,4 +1,4 @@ -#serve_deepseek_r1.py +# serve_deepseek_r1.py from ray import serve from ray.serve.llm import LLMConfig, build_openai_app @@ -10,15 +10,18 @@ accelerator_type="H100", deployment_config=dict( autoscaling_config=dict( - min_replicas=1, max_replicas=1, + min_replicas=1, + max_replicas=1, ) ), + ### Uncomment if your model is gated and needs your Hugging Face token to access it. You can also pass the token to your Anyscale Service with `--env HF_TOKEN=$HF_TOKEN` + # runtime_env=dict(env_vars={"HF_TOKEN": os.environ.get("HF_TOKEN")}), engine_kwargs=dict( max_model_len=16384, - # Split weights among 8 GPUs in each node + # Split weights among 8 GPUs in the node tensor_parallel_size=8, pipeline_parallel_size=2, - reasoning_parser="deepseek_r1", + reasoning_parser="deepseek_r1", # Optional: separate reasoning content from the final answer ), ) diff --git a/05_deploy_deepseek_r1/service.yaml b/05_deploy_deepseek_r1/service.yaml index f6d5fdb..edaf67c 100644 --- a/05_deploy_deepseek_r1/service.yaml +++ b/05_deploy_deepseek_r1/service.yaml @@ -27,6 +27,24 @@ compute_config: # max_nodes: 100 # market_type: PREFER_SPOT # Defaults to ON_DEMAND auto_select_worker_config: true + # Change default disk size to 1000GB + advanced_instance_config: + ## AWS ## + BlockDeviceMappings: + - Ebs: + - VolumeSize: 1000 + VolumeType: gp3 + DeleteOnTermination: true + DeviceName: "/dev/sda1" + ######### + ## GCP ## + #instanceProperties: + # disks: + # - boot: true + # auto_delete: true + # initialize_params: + # - disk_size_gb: 1000 + ######### # Path to a local directory or a remote URI to a .zip file (S3, GS, HTTP) that # will be the working directory for the job. The files in the directory will be From e8590f88b4561679855bde5032c37c86280be6c0 Mon Sep 17 00:00:00 2001 From: Aydin Abiar Date: Sun, 28 Sep 2025 21:08:12 -0700 Subject: [PATCH 08/10] fix typo Signed-off-by: Aydin Abiar --- 05_deploy_deepseek_r1/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/05_deploy_deepseek_r1/README.md b/05_deploy_deepseek_r1/README.md index a57160f..0393e88 100644 --- a/05_deploy_deepseek_r1/README.md +++ b/05_deploy_deepseek_r1/README.md @@ -56,5 +56,5 @@ View the service in the [services tab](https://console.anyscale.com/services) of Shutdown your Anyscale Service: ```bash -anyscale service terminate -n deploy-llama-3-1-70b +anyscale service terminate -n deploy-deepseek-r1 ``` \ No newline at end of file From 57057e5bf7f2ee485926cb7c2b82162436f6791f Mon Sep 17 00:00:00 2001 From: Robert Nishihara Date: Sun, 12 Oct 2025 22:43:46 -0700 Subject: [PATCH 09/10] Minor --- {05_deploy_deepseek_r1 => deploy_deepseek_r1}/Dockerfile | 4 ++-- {05_deploy_deepseek_r1 => deploy_deepseek_r1}/README.md | 0 {05_deploy_deepseek_r1 => deploy_deepseek_r1}/query.py | 0 .../serve_deepseek_r1.py | 0 {05_deploy_deepseek_r1 => deploy_deepseek_r1}/service.yaml | 0 5 files changed, 2 insertions(+), 2 deletions(-) rename {05_deploy_deepseek_r1 => deploy_deepseek_r1}/Dockerfile (75%) rename {05_deploy_deepseek_r1 => deploy_deepseek_r1}/README.md (100%) rename {05_deploy_deepseek_r1 => deploy_deepseek_r1}/query.py (100%) rename {05_deploy_deepseek_r1 => deploy_deepseek_r1}/serve_deepseek_r1.py (100%) rename {05_deploy_deepseek_r1 => deploy_deepseek_r1}/service.yaml (100%) diff --git a/05_deploy_deepseek_r1/Dockerfile b/deploy_deepseek_r1/Dockerfile similarity index 75% rename from 05_deploy_deepseek_r1/Dockerfile rename to deploy_deepseek_r1/Dockerfile index a241239..ea0023c 100644 --- a/05_deploy_deepseek_r1/Dockerfile +++ b/deploy_deepseek_r1/Dockerfile @@ -1,8 +1,8 @@ -FROM anyscale/ray:2.49.0-slim-py312-cu128 +FROM anyscale/ray:2.50.0-slim-py312-cu128 # C compiler for Triton’s runtime build step (vLLM V1 engine) # https://github.com/vllm-project/vllm/issues/2997 RUN sudo apt-get update && \ sudo apt-get install -y --no-install-recommends build-essential -RUN pip install vllm==0.10.0 \ No newline at end of file +RUN pip install vllm==0.11.0 diff --git a/05_deploy_deepseek_r1/README.md b/deploy_deepseek_r1/README.md similarity index 100% rename from 05_deploy_deepseek_r1/README.md rename to deploy_deepseek_r1/README.md diff --git a/05_deploy_deepseek_r1/query.py b/deploy_deepseek_r1/query.py similarity index 100% rename from 05_deploy_deepseek_r1/query.py rename to deploy_deepseek_r1/query.py diff --git a/05_deploy_deepseek_r1/serve_deepseek_r1.py b/deploy_deepseek_r1/serve_deepseek_r1.py similarity index 100% rename from 05_deploy_deepseek_r1/serve_deepseek_r1.py rename to deploy_deepseek_r1/serve_deepseek_r1.py diff --git a/05_deploy_deepseek_r1/service.yaml b/deploy_deepseek_r1/service.yaml similarity index 100% rename from 05_deploy_deepseek_r1/service.yaml rename to deploy_deepseek_r1/service.yaml From 2a5e53f9b9675f71554894b397f32f9e5f03745b Mon Sep 17 00:00:00 2001 From: Robert Nishihara Date: Sun, 12 Oct 2025 23:01:39 -0700 Subject: [PATCH 10/10] minor --- deploy_deepseek_r1/README.md | 8 ++++---- deploy_deepseek_r1/serve_deepseek_r1.py | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/deploy_deepseek_r1/README.md b/deploy_deepseek_r1/README.md index 0393e88..db4036f 100644 --- a/deploy_deepseek_r1/README.md +++ b/deploy_deepseek_r1/README.md @@ -15,7 +15,7 @@ Clone the example from GitHub. ```bash git clone https://github.com/anyscale/examples.git -cd examples/05_deploy_deepseek_r1 +cd examples/deploy_deepseek_r1 ``` Deploy the service. @@ -25,9 +25,9 @@ anyscale service deploy -f service.yaml ## Understanding the example -- The [application code](https://github.com/anyscale/examples/blob/main/05_deploy_deepseek_r1/serve_deepseek_r1.py) sets the required accelerator type with `accelerator_type="H100"`. To use a different accelerator, replace `"H100"` with the desired name. See the [list of supported accelerators](https://docs.ray.io/en/latest/ray-core/accelerator-types.html#accelerator-types) for available options. +- The [application code](https://github.com/anyscale/examples/blob/main/deploy_deepseek_r1/serve_deepseek_r1.py) sets the required accelerator type with `accelerator_type="H100"`. To use a different accelerator, replace `"H100"` with the desired name. See the [list of supported accelerators](https://docs.ray.io/en/latest/ray-core/accelerator-types.html#accelerator-types) for available options. - Ray Serve automatically autoscales the number of model replicas between `min_replicas` and `max_replicas`. Ray Serve adapts the number of replicas by monitoring queue sizes. For more information on configuring autoscaling, see the [AutoscalingConfig documentation](https://docs.ray.io/en/latest/serve/api/doc/ray.serve.config.AutoscalingConfig.html). -- This example uses vLLM, and the [Dockerfile](https://github.com/anyscale/examples/blob/main/05_deploy_deepseek_r1/Dockerfile) defines the service’s dependencies. When you run `anyscale service deploy`, the build process adds these dependencies on top of an Anyscale-provided base image. +- This example uses vLLM, and the [Dockerfile](https://github.com/anyscale/examples/blob/main/deploy_deepseek_r1/Dockerfile) defines the service’s dependencies. When you run `anyscale service deploy`, the build process adds these dependencies on top of an Anyscale-provided base image. - To configure vLLM, modify the `engine_kwargs` dictionary. See [Ray documentation for the `LLMConfig` object](https://docs.ray.io/en/latest/serve/api/doc/ray.serve.llm.LLMConfig.html#ray.serve.llm.LLMConfig). @@ -38,7 +38,7 @@ The `anyscale service deploy` command outputs a line that looks like curl -H "Authorization: Bearer " ``` -From the output, you can extract the service token and base URL. Open [query.py](https://github.com/anyscale/examples/blob/main/05_deploy_deepseek_r1/query.py) and add them to the appropriate fields. +From the output, you can extract the service token and base URL. Open [query.py](https://github.com/anyscale/examples/blob/main/deploy_deepseek_r1/query.py) and add them to the appropriate fields. ```python token = base_url = diff --git a/deploy_deepseek_r1/serve_deepseek_r1.py b/deploy_deepseek_r1/serve_deepseek_r1.py index 7057331..a521467 100644 --- a/deploy_deepseek_r1/serve_deepseek_r1.py +++ b/deploy_deepseek_r1/serve_deepseek_r1.py @@ -14,8 +14,6 @@ max_replicas=1, ) ), - ### Uncomment if your model is gated and needs your Hugging Face token to access it. You can also pass the token to your Anyscale Service with `--env HF_TOKEN=$HF_TOKEN` - # runtime_env=dict(env_vars={"HF_TOKEN": os.environ.get("HF_TOKEN")}), engine_kwargs=dict( max_model_len=16384, # Split weights among 8 GPUs in the node