From 93e231bfcaa2c5cfc8650a0d1c2c83304eebaee0 Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Wed, 27 Aug 2025 18:20:35 -0700
Subject: [PATCH 01/10] add main content

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 05_deploy_deepseek_r1/Dockerfile           | 12 +++++
 05_deploy_deepseek_r1/README.md            | 58 ++++++++++++++++++++++
 05_deploy_deepseek_r1/query.py             | 26 ++++++++++
 05_deploy_deepseek_r1/serve_deepseek_r1.py | 27 ++++++++++
 05_deploy_deepseek_r1/service.yaml         | 41 +++++++++++++++
 5 files changed, 164 insertions(+)
 create mode 100644 05_deploy_deepseek_r1/Dockerfile
 create mode 100644 05_deploy_deepseek_r1/README.md
 create mode 100644 05_deploy_deepseek_r1/query.py
 create mode 100644 05_deploy_deepseek_r1/serve_deepseek_r1.py
 create mode 100644 05_deploy_deepseek_r1/service.yaml

diff --git a/05_deploy_deepseek_r1/Dockerfile b/05_deploy_deepseek_r1/Dockerfile
new file mode 100644
index 0000000..f53b5a0
--- /dev/null
+++ b/05_deploy_deepseek_r1/Dockerfile
@@ -0,0 +1,12 @@
+FROM anyscale/ray:2.48.0-slim-py312-cu128
+
+# C compiler for Triton’s runtime build step (vLLM V1 engine)
+# https://github.com/vllm-project/vllm/issues/2997
+RUN sudo apt-get update && \
+    sudo apt-get install -y --no-install-recommends build-essential
+
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+
+RUN uv pip install --system vllm==0.9.2
+# Avoid https://github.com/vllm-project/vllm-ascend/issues/2046 with transformers < 4.54.0
+RUN uv pip install --system transformers==4.53.3
diff --git a/05_deploy_deepseek_r1/README.md b/05_deploy_deepseek_r1/README.md
new file mode 100644
index 0000000..9f9b742
--- /dev/null
+++ b/05_deploy_deepseek_r1/README.md
@@ -0,0 +1,58 @@
+---
+description: "Deploy DeepSeek R1 with Ray Serve LLM."
+---
+
+# Deploy DeepSeek R1
+
+This example uses Ray Serve along with vLLM to deploy a DeepSeek R1 model as an Anyscale service.
+
+## Install the Anyscale CLI
+
+```bash
+pip install -U anyscale
+anyscale login
+```
+
+## Deploy the service
+
+Clone the example from GitHub.
+
+```bash
+git clone https://github.com/anyscale/examples.git
+cd examples/05_deploy_deepseek_r1
+```
+
+Deploy the service. 
+```bash
+export HF_TOKEN=***
+anyscale service deploy -f service.yaml
+```
+
+## Understanding the example
+
+- The [application code](https://github.com/anyscale/examples/blob/main/05_deploy_deepseek_r1/serve_deepseek_r1.py) sets the required accelerator type with `accelerator_type="H100"`. To use a different accelerator, replace `"H100"` with the desired name. See the [list of supported accelerators](https://docs.ray.io/en/latest/ray-core/accelerator-types.html#accelerator-types) for available options.
+- Ray Serve automatically autoscales the number of model replicas between `min_replicas` and `max_replicas`. Ray Serve adapts the number of replicas by monitoring queue sizes. For more information on configuring autoscaling, see the [AutoscalingConfig documentation](https://docs.ray.io/en/latest/serve/api/doc/ray.serve.config.AutoscalingConfig.html).
+- This example uses vLLM, and the [Dockerfile](https://github.com/anyscale/examples/blob/main/05_deploy_deepseek_r1/Dockerfile) defines the service’s dependencies. When you run `anyscale service deploy`, the build process adds these dependencies on top of an Anyscale-provided base image.
+- To configure vLLM, modify the `engine_kwargs` dictionary. See [Ray documentation for the `LLMConfig` object](https://docs.ray.io/en/latest/serve/api/doc/ray.serve.llm.LLMConfig.html#ray.serve.llm.LLMConfig).
+
+
+## Query the service
+
+The `anyscale service deploy` command outputs a line that looks like  
+```text
+curl -H "Authorization: Bearer <SERVICE_TOKEN>" <BASE_URL>
+```
+
+From the output, you can extract the service token and base URL. Open [query.py](https://github.com/anyscale/examples/blob/main/05_deploy_deepseek_r1/query.py) and add them to the appropriate fields.
+```python
+token = <SERVICE_TOKEN> 
+base_url = <BASE_URL> 
+```
+
+Query the model  
+```bash
+pip install openai
+python query.py
+```
+
+View the service in the [services tab](https://console.anyscale.com/services) of the Anyscale console.
\ No newline at end of file
diff --git a/05_deploy_deepseek_r1/query.py b/05_deploy_deepseek_r1/query.py
new file mode 100644
index 0000000..518c357
--- /dev/null
+++ b/05_deploy_deepseek_r1/query.py
@@ -0,0 +1,26 @@
+from urllib.parse import urljoin
+from openai import OpenAI
+
+# The "anyscale service deploy" script outputs a line that looks like
+# 
+#     curl -H "Authorization: Bearer <SERVICE_TOKEN>" <BASE_URL>
+# 
+# From this, you can parse out the service token and base URL.
+token = <SERVICE_TOKEN>  # Fill this in. If deploying and querying locally, use token = "FAKE_KEY"
+base_url = <BASE_URL>  # Fill this in. If deploying and querying locally, use base_url = "http://localhost:8000"
+
+client = OpenAI(base_url= urljoin(base_url, "v1"), api_key=token)
+
+response = client.chat.completions.create(
+    model="my-deepseek-r1",
+    messages=[
+        {"role": "user", "content": "What's the capital of France?"}
+    ],
+    stream=True
+)
+
+# Stream and print JSON
+for chunk in response:
+    data = chunk.choices[0].delta.content
+    if data:
+        print(data, end="", flush=True)
\ No newline at end of file
diff --git a/05_deploy_deepseek_r1/serve_deepseek_r1.py b/05_deploy_deepseek_r1/serve_deepseek_r1.py
new file mode 100644
index 0000000..1a9852e
--- /dev/null
+++ b/05_deploy_deepseek_r1/serve_deepseek_r1.py
@@ -0,0 +1,27 @@
+#serve_deepseek_r1.py
+from ray import serve
+from ray.serve.llm import LLMConfig, build_openai_app
+
+llm_config = LLMConfig(
+    model_loading_config=dict(
+        model_id="my-deepseek-r1",
+        model_source="deepseek-ai/DeepSeek-R1",
+    ),
+    accelerator_type="H100",
+    deployment_config=dict(
+        autoscaling_config=dict(
+            min_replicas=1, max_replicas=1,
+        )
+    ),
+    engine_kwargs=dict(
+        max_model_len=16384,
+        # Split weights among 8 GPUs in each node
+        tensor_parallel_size=8,
+        pipeline_parallel_size=2
+    )
+)
+
+app = build_openai_app({"llm_configs": [llm_config]})
+
+# Uncomment the below line to run the service locally with Python.
+# serve.run(app, blocking=True)
diff --git a/05_deploy_deepseek_r1/service.yaml b/05_deploy_deepseek_r1/service.yaml
new file mode 100644
index 0000000..f6d5fdb
--- /dev/null
+++ b/05_deploy_deepseek_r1/service.yaml
@@ -0,0 +1,41 @@
+# View the docs https://docs.anyscale.com/reference/service-api#serviceconfig.
+
+name: deploy-deepseek-r1
+
+# When empty, use the default image. This can be an Anyscale-provided base image
+# like anyscale/ray:2.43.0-slim-py312-cu125, a user-provided base image (provided
+# that it meets certain specs), or you can build new images using the Anyscale
+# image builder at https://console.anyscale-staging.com/v2/container-images.
+
+containerfile: ./Dockerfile
+
+# When empty, Anyscale will auto-select the instance types. You can also specify
+# minimum and maximum resources.
+compute_config:
+#   head_node:
+#     instance_type: m5.2xlarge
+#   worker_nodes:
+#     - instance_type: m5.16xlarge
+#       min_nodes: 0
+#       max_nodes: 100
+#     - instance_type: m7a.24xlarge
+#       min_nodes: 0
+#       max_nodes: 100
+#       market_type: PREFER_SPOT # Defaults to ON_DEMAND
+#     - instance_type: g4dn.2xlarge
+#       min_nodes: 0
+#       max_nodes: 100
+#       market_type: PREFER_SPOT # Defaults to ON_DEMAND
+  auto_select_worker_config: true
+
+# Path to a local directory or a remote URI to a .zip file (S3, GS, HTTP) that
+# will be the working directory for the job. The files in the directory will be
+# automatically uploaded to the job environment in Anyscale.
+working_dir: .
+
+# When empty, this uses the default Anyscale Cloud in your organization.
+cloud:
+
+# Specify the Ray Serve app to deploy.
+applications:
+- import_path: serve_deepseek_r1:app
\ No newline at end of file

From 7553c7d37df2d7dade036c76a28f4927a65c7a58 Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Sun, 31 Aug 2025 21:10:55 -0700
Subject: [PATCH 02/10] dockerfile with ray 2.49

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 05_deploy_deepseek_r1/Dockerfile | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/05_deploy_deepseek_r1/Dockerfile b/05_deploy_deepseek_r1/Dockerfile
index f53b5a0..a241239 100644
--- a/05_deploy_deepseek_r1/Dockerfile
+++ b/05_deploy_deepseek_r1/Dockerfile
@@ -1,12 +1,8 @@
-FROM anyscale/ray:2.48.0-slim-py312-cu128
+FROM anyscale/ray:2.49.0-slim-py312-cu128
 
 # C compiler for Triton’s runtime build step (vLLM V1 engine)
 # https://github.com/vllm-project/vllm/issues/2997
 RUN sudo apt-get update && \
     sudo apt-get install -y --no-install-recommends build-essential
 
-RUN curl -LsSf https://astral.sh/uv/install.sh | sh
-
-RUN uv pip install --system vllm==0.9.2
-# Avoid https://github.com/vllm-project/vllm-ascend/issues/2046 with transformers < 4.54.0
-RUN uv pip install --system transformers==4.53.3
+RUN pip install vllm==0.10.0
\ No newline at end of file

From e3c621af2eb8dfda68520a5dc4e49c8b8d870a2b Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Sun, 31 Aug 2025 21:11:13 -0700
Subject: [PATCH 03/10] remove export HF_token command

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 05_deploy_deepseek_r1/README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/05_deploy_deepseek_r1/README.md b/05_deploy_deepseek_r1/README.md
index 9f9b742..41e12c8 100644
--- a/05_deploy_deepseek_r1/README.md
+++ b/05_deploy_deepseek_r1/README.md
@@ -24,7 +24,6 @@ cd examples/05_deploy_deepseek_r1
 
 Deploy the service. 
 ```bash
-export HF_TOKEN=***
 anyscale service deploy -f service.yaml
 ```
 

From 2666aec894bfa516fe482ddf68df2abed7a77bad Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Mon, 1 Sep 2025 13:56:59 -0700
Subject: [PATCH 04/10] add terminate command

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 05_deploy_deepseek_r1/README.md | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/05_deploy_deepseek_r1/README.md b/05_deploy_deepseek_r1/README.md
index 41e12c8..f22970b 100644
--- a/05_deploy_deepseek_r1/README.md
+++ b/05_deploy_deepseek_r1/README.md
@@ -54,4 +54,11 @@ pip install openai
 python query.py
 ```
 
-View the service in the [services tab](https://console.anyscale.com/services) of the Anyscale console.
\ No newline at end of file
+View the service in the [services tab](https://console.anyscale.com/services) of the Anyscale console.
+
+## Shutdown 
+ 
+Shutdown your Anyscale Service:
+```bash
+anyscale service terminate -n deploy-deepseek-r1
+```
\ No newline at end of file

From 306e861559282e3ad966a04b0d99d0b6988d1a46 Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Mon, 1 Sep 2025 13:57:20 -0700
Subject: [PATCH 05/10] stream response now

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 05_deploy_deepseek_r1/query.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/05_deploy_deepseek_r1/query.py b/05_deploy_deepseek_r1/query.py
index 518c357..2034e60 100644
--- a/05_deploy_deepseek_r1/query.py
+++ b/05_deploy_deepseek_r1/query.py
@@ -21,6 +21,13 @@
 
 # Stream and print JSON
 for chunk in response:
-    data = chunk.choices[0].delta.content
-    if data:
-        print(data, end="", flush=True)
\ No newline at end of file
+    # Stream reasoning content
+    if hasattr(chunk.choices[0].delta, "reasoning_content"):
+        data_reasoning = chunk.choices[0].delta.reasoning_content
+        if data_reasoning:
+            print(data_reasoning, end="", flush=True)
+    # Later, stream the final answer
+    if hasattr(chunk.choices[0].delta, "content"):
+        data_content = chunk.choices[0].delta.content
+        if data_content:
+            print(data_content, end="", flush=True)
\ No newline at end of file

From 25b24e42780df024571cd80d2c354a5d13d9f5b4 Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Mon, 1 Sep 2025 13:57:35 -0700
Subject: [PATCH 06/10] add reasoning parser

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 05_deploy_deepseek_r1/serve_deepseek_r1.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/05_deploy_deepseek_r1/serve_deepseek_r1.py b/05_deploy_deepseek_r1/serve_deepseek_r1.py
index 1a9852e..ad67735 100644
--- a/05_deploy_deepseek_r1/serve_deepseek_r1.py
+++ b/05_deploy_deepseek_r1/serve_deepseek_r1.py
@@ -17,8 +17,9 @@
         max_model_len=16384,
         # Split weights among 8 GPUs in each node
         tensor_parallel_size=8,
-        pipeline_parallel_size=2
-    )
+        pipeline_parallel_size=2,
+        reasoning_parser="deepseek_r1",
+    ),
 )
 
 app = build_openai_app({"llm_configs": [llm_config]})

From 0cb4f969cb6a902dd9b8386a8fde2517809510ae Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Fri, 26 Sep 2025 17:48:09 -0700
Subject: [PATCH 07/10] update to be consistent with anyscale template

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 05_deploy_deepseek_r1/README.md            |  6 +-----
 05_deploy_deepseek_r1/serve_deepseek_r1.py | 11 +++++++----
 05_deploy_deepseek_r1/service.yaml         | 18 ++++++++++++++++++
 3 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/05_deploy_deepseek_r1/README.md b/05_deploy_deepseek_r1/README.md
index f22970b..a57160f 100644
--- a/05_deploy_deepseek_r1/README.md
+++ b/05_deploy_deepseek_r1/README.md
@@ -1,7 +1,3 @@
----
-description: "Deploy DeepSeek R1 with Ray Serve LLM."
----
-
 # Deploy DeepSeek R1
 
 This example uses Ray Serve along with vLLM to deploy a DeepSeek R1 model as an Anyscale service.
@@ -60,5 +56,5 @@ View the service in the [services tab](https://console.anyscale.com/services) of
  
 Shutdown your Anyscale Service:
 ```bash
-anyscale service terminate -n deploy-deepseek-r1
+anyscale service terminate -n deploy-llama-3-1-70b
 ```
\ No newline at end of file
diff --git a/05_deploy_deepseek_r1/serve_deepseek_r1.py b/05_deploy_deepseek_r1/serve_deepseek_r1.py
index ad67735..7057331 100644
--- a/05_deploy_deepseek_r1/serve_deepseek_r1.py
+++ b/05_deploy_deepseek_r1/serve_deepseek_r1.py
@@ -1,4 +1,4 @@
-#serve_deepseek_r1.py
+# serve_deepseek_r1.py
 from ray import serve
 from ray.serve.llm import LLMConfig, build_openai_app
 
@@ -10,15 +10,18 @@
     accelerator_type="H100",
     deployment_config=dict(
         autoscaling_config=dict(
-            min_replicas=1, max_replicas=1,
+            min_replicas=1,
+            max_replicas=1,
         )
     ),
+    ### Uncomment if your model is gated and needs your Hugging Face token to access it. You can also pass the token to your Anyscale Service with `--env HF_TOKEN=$HF_TOKEN`
+    # runtime_env=dict(env_vars={"HF_TOKEN": os.environ.get("HF_TOKEN")}),
     engine_kwargs=dict(
         max_model_len=16384,
-        # Split weights among 8 GPUs in each node
+        # Split weights among 8 GPUs in the node
         tensor_parallel_size=8,
         pipeline_parallel_size=2,
-        reasoning_parser="deepseek_r1",
+        reasoning_parser="deepseek_r1",  # Optional: separate reasoning content from the final answer
     ),
 )
 
diff --git a/05_deploy_deepseek_r1/service.yaml b/05_deploy_deepseek_r1/service.yaml
index f6d5fdb..edaf67c 100644
--- a/05_deploy_deepseek_r1/service.yaml
+++ b/05_deploy_deepseek_r1/service.yaml
@@ -27,6 +27,24 @@ compute_config:
 #       max_nodes: 100
 #       market_type: PREFER_SPOT # Defaults to ON_DEMAND
   auto_select_worker_config: true
+  # Change default disk size to 1000GB
+  advanced_instance_config:
+    ## AWS ##
+    BlockDeviceMappings:
+      - Ebs:
+        - VolumeSize: 1000
+          VolumeType: gp3
+          DeleteOnTermination: true
+        DeviceName: "/dev/sda1"
+    #########
+    ## GCP ##
+    #instanceProperties:
+    #  disks:
+    #    - boot: true
+    #      auto_delete: true
+    #      initialize_params:
+    #        - disk_size_gb: 1000
+    #########
 
 # Path to a local directory or a remote URI to a .zip file (S3, GS, HTTP) that
 # will be the working directory for the job. The files in the directory will be

From e8590f88b4561679855bde5032c37c86280be6c0 Mon Sep 17 00:00:00 2001
From: Aydin Abiar <aydin@anyscale.com>
Date: Sun, 28 Sep 2025 21:08:12 -0700
Subject: [PATCH 08/10] fix typo

Signed-off-by: Aydin Abiar <aydin@anyscale.com>
---
 05_deploy_deepseek_r1/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/05_deploy_deepseek_r1/README.md b/05_deploy_deepseek_r1/README.md
index a57160f..0393e88 100644
--- a/05_deploy_deepseek_r1/README.md
+++ b/05_deploy_deepseek_r1/README.md
@@ -56,5 +56,5 @@ View the service in the [services tab](https://console.anyscale.com/services) of
  
 Shutdown your Anyscale Service:
 ```bash
-anyscale service terminate -n deploy-llama-3-1-70b
+anyscale service terminate -n deploy-deepseek-r1
 ```
\ No newline at end of file

From 57057e5bf7f2ee485926cb7c2b82162436f6791f Mon Sep 17 00:00:00 2001
From: Robert Nishihara <rkn@anyscale.com>
Date: Sun, 12 Oct 2025 22:43:46 -0700
Subject: [PATCH 09/10] Minor

---
 {05_deploy_deepseek_r1 => deploy_deepseek_r1}/Dockerfile      | 4 ++--
 {05_deploy_deepseek_r1 => deploy_deepseek_r1}/README.md       | 0
 {05_deploy_deepseek_r1 => deploy_deepseek_r1}/query.py        | 0
 .../serve_deepseek_r1.py                                      | 0
 {05_deploy_deepseek_r1 => deploy_deepseek_r1}/service.yaml    | 0
 5 files changed, 2 insertions(+), 2 deletions(-)
 rename {05_deploy_deepseek_r1 => deploy_deepseek_r1}/Dockerfile (75%)
 rename {05_deploy_deepseek_r1 => deploy_deepseek_r1}/README.md (100%)
 rename {05_deploy_deepseek_r1 => deploy_deepseek_r1}/query.py (100%)
 rename {05_deploy_deepseek_r1 => deploy_deepseek_r1}/serve_deepseek_r1.py (100%)
 rename {05_deploy_deepseek_r1 => deploy_deepseek_r1}/service.yaml (100%)

diff --git a/05_deploy_deepseek_r1/Dockerfile b/deploy_deepseek_r1/Dockerfile
similarity index 75%
rename from 05_deploy_deepseek_r1/Dockerfile
rename to deploy_deepseek_r1/Dockerfile
index a241239..ea0023c 100644
--- a/05_deploy_deepseek_r1/Dockerfile
+++ b/deploy_deepseek_r1/Dockerfile
@@ -1,8 +1,8 @@
-FROM anyscale/ray:2.49.0-slim-py312-cu128
+FROM anyscale/ray:2.50.0-slim-py312-cu128
 
 # C compiler for Triton’s runtime build step (vLLM V1 engine)
 # https://github.com/vllm-project/vllm/issues/2997
 RUN sudo apt-get update && \
     sudo apt-get install -y --no-install-recommends build-essential
 
-RUN pip install vllm==0.10.0
\ No newline at end of file
+RUN pip install vllm==0.11.0
diff --git a/05_deploy_deepseek_r1/README.md b/deploy_deepseek_r1/README.md
similarity index 100%
rename from 05_deploy_deepseek_r1/README.md
rename to deploy_deepseek_r1/README.md
diff --git a/05_deploy_deepseek_r1/query.py b/deploy_deepseek_r1/query.py
similarity index 100%
rename from 05_deploy_deepseek_r1/query.py
rename to deploy_deepseek_r1/query.py
diff --git a/05_deploy_deepseek_r1/serve_deepseek_r1.py b/deploy_deepseek_r1/serve_deepseek_r1.py
similarity index 100%
rename from 05_deploy_deepseek_r1/serve_deepseek_r1.py
rename to deploy_deepseek_r1/serve_deepseek_r1.py
diff --git a/05_deploy_deepseek_r1/service.yaml b/deploy_deepseek_r1/service.yaml
similarity index 100%
rename from 05_deploy_deepseek_r1/service.yaml
rename to deploy_deepseek_r1/service.yaml

From 2a5e53f9b9675f71554894b397f32f9e5f03745b Mon Sep 17 00:00:00 2001
From: Robert Nishihara <rkn@anyscale.com>
Date: Sun, 12 Oct 2025 23:01:39 -0700
Subject: [PATCH 10/10] minor

---
 deploy_deepseek_r1/README.md            | 8 ++++----
 deploy_deepseek_r1/serve_deepseek_r1.py | 2 --
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/deploy_deepseek_r1/README.md b/deploy_deepseek_r1/README.md
index 0393e88..db4036f 100644
--- a/deploy_deepseek_r1/README.md
+++ b/deploy_deepseek_r1/README.md
@@ -15,7 +15,7 @@ Clone the example from GitHub.
 
 ```bash
 git clone https://github.com/anyscale/examples.git
-cd examples/05_deploy_deepseek_r1
+cd examples/deploy_deepseek_r1
 ```
 
 Deploy the service. 
@@ -25,9 +25,9 @@ anyscale service deploy -f service.yaml
 
 ## Understanding the example
 
-- The [application code](https://github.com/anyscale/examples/blob/main/05_deploy_deepseek_r1/serve_deepseek_r1.py) sets the required accelerator type with `accelerator_type="H100"`. To use a different accelerator, replace `"H100"` with the desired name. See the [list of supported accelerators](https://docs.ray.io/en/latest/ray-core/accelerator-types.html#accelerator-types) for available options.
+- The [application code](https://github.com/anyscale/examples/blob/main/deploy_deepseek_r1/serve_deepseek_r1.py) sets the required accelerator type with `accelerator_type="H100"`. To use a different accelerator, replace `"H100"` with the desired name. See the [list of supported accelerators](https://docs.ray.io/en/latest/ray-core/accelerator-types.html#accelerator-types) for available options.
 - Ray Serve automatically autoscales the number of model replicas between `min_replicas` and `max_replicas`. Ray Serve adapts the number of replicas by monitoring queue sizes. For more information on configuring autoscaling, see the [AutoscalingConfig documentation](https://docs.ray.io/en/latest/serve/api/doc/ray.serve.config.AutoscalingConfig.html).
-- This example uses vLLM, and the [Dockerfile](https://github.com/anyscale/examples/blob/main/05_deploy_deepseek_r1/Dockerfile) defines the service’s dependencies. When you run `anyscale service deploy`, the build process adds these dependencies on top of an Anyscale-provided base image.
+- This example uses vLLM, and the [Dockerfile](https://github.com/anyscale/examples/blob/main/deploy_deepseek_r1/Dockerfile) defines the service’s dependencies. When you run `anyscale service deploy`, the build process adds these dependencies on top of an Anyscale-provided base image.
 - To configure vLLM, modify the `engine_kwargs` dictionary. See [Ray documentation for the `LLMConfig` object](https://docs.ray.io/en/latest/serve/api/doc/ray.serve.llm.LLMConfig.html#ray.serve.llm.LLMConfig).
 
 
@@ -38,7 +38,7 @@ The `anyscale service deploy` command outputs a line that looks like
 curl -H "Authorization: Bearer <SERVICE_TOKEN>" <BASE_URL>
 ```
 
-From the output, you can extract the service token and base URL. Open [query.py](https://github.com/anyscale/examples/blob/main/05_deploy_deepseek_r1/query.py) and add them to the appropriate fields.
+From the output, you can extract the service token and base URL. Open [query.py](https://github.com/anyscale/examples/blob/main/deploy_deepseek_r1/query.py) and add them to the appropriate fields.
 ```python
 token = <SERVICE_TOKEN> 
 base_url = <BASE_URL> 
diff --git a/deploy_deepseek_r1/serve_deepseek_r1.py b/deploy_deepseek_r1/serve_deepseek_r1.py
index 7057331..a521467 100644
--- a/deploy_deepseek_r1/serve_deepseek_r1.py
+++ b/deploy_deepseek_r1/serve_deepseek_r1.py
@@ -14,8 +14,6 @@
             max_replicas=1,
         )
     ),
-    ### Uncomment if your model is gated and needs your Hugging Face token to access it. You can also pass the token to your Anyscale Service with `--env HF_TOKEN=$HF_TOKEN`
-    # runtime_env=dict(env_vars={"HF_TOKEN": os.environ.get("HF_TOKEN")}),
     engine_kwargs=dict(
         max_model_len=16384,
         # Split weights among 8 GPUs in the node