From f5cdf4b49a5dbe78950786f3c0aa987d4313961c Mon Sep 17 00:00:00 2001
From: "promptless[bot]" <179508745+promptless[bot]@users.noreply.github.com>
Date: Mon, 23 Feb 2026 14:47:28 +0000
Subject: [PATCH 1/8] Deprecate SDK docs and consolidate to serverless
documentation
---
docs.json | 83 +-
sdks/go/endpoints.mdx | 836 ------------------
sdks/go/overview.mdx | 70 --
sdks/javascript/endpoints.mdx | 661 --------------
sdks/javascript/overview.mdx | 52 --
sdks/python/apis.mdx | 238 -----
sdks/python/endpoints.mdx | 468 ----------
sdks/python/overview.mdx | 133 ---
serverless/endpoints/send-requests.mdx | 37 +
serverless/sdks.mdx | 125 +++
tutorials/sdks/python/101/aggregate.mdx | 240 -----
tutorials/sdks/python/101/async.mdx | 226 -----
tutorials/sdks/python/101/error.mdx | 296 -------
tutorials/sdks/python/101/generator.mdx | 190 ----
tutorials/sdks/python/101/hello.mdx | 116 ---
.../sdks/python/101/local-server-testing.mdx | 186 ----
.../sdks/python/102/huggingface-models.mdx | 167 ----
.../102/stable-diffusion-text-to-image.mdx | 218 -----
.../sdks/python/get-started/hello-world.mdx | 54 --
.../sdks/python/get-started/introduction.mdx | 101 ---
.../sdks/python/get-started/prerequisites.mdx | 106 ---
.../python/get-started/running-locally.mdx | 99 ---
22 files changed, 200 insertions(+), 4502 deletions(-)
create mode 100644 serverless/sdks.mdx
diff --git a/docs.json b/docs.json
index cfabedbb..440fe98f 100644
--- a/docs.json
+++ b/docs.json
@@ -40,7 +40,7 @@
"get-started",
"get-started/concepts",
"get-started/manage-accounts",
- "get-started/api-keys"
+ "get-started/api-keys"
]
},
{
@@ -48,6 +48,7 @@
"pages": [
"serverless/overview",
"serverless/quickstart",
+ "serverless/sdks",
"serverless/pricing",
{
"group": "Create handler functions",
@@ -401,50 +402,6 @@
}
]
},
- {
- "tab": "SDK",
- "groups": [
- {
- "group": "Python",
- "pages": [
- "sdks/python/overview",
- "sdks/python/apis",
- "sdks/python/endpoints",
- {
- "group": "Tutorials",
- "pages": [
- "tutorials/sdks/python/get-started/introduction",
- "tutorials/sdks/python/get-started/prerequisites",
- "tutorials/sdks/python/get-started/hello-world",
- "tutorials/sdks/python/get-started/running-locally",
- "tutorials/sdks/python/101/hello",
- "tutorials/sdks/python/101/local-server-testing",
- "tutorials/sdks/python/101/generator",
- "tutorials/sdks/python/101/async",
- "tutorials/sdks/python/101/error",
- "tutorials/sdks/python/101/aggregate",
- "tutorials/sdks/python/102/huggingface-models",
- "tutorials/sdks/python/102/stable-diffusion-text-to-image"
- ]
- }
- ]
- },
- {
- "group": "JavaScript",
- "pages": [
- "sdks/javascript/overview",
- "sdks/javascript/endpoints"
- ]
- },
- {
- "group": "Go",
- "pages": [
- "sdks/go/overview",
- "sdks/go/endpoints"
- ]
- }
- ]
- },
{
"tab": "CLI",
"groups": [
@@ -822,6 +779,42 @@
{
"source": "/hub/public-endpoint-reference",
"destination": "/public-endpoints/reference"
+ },
+ {
+ "source": "/sdks/python/overview",
+ "destination": "/serverless/sdks"
+ },
+ {
+ "source": "/sdks/python/endpoints",
+ "destination": "/serverless/endpoints/send-requests"
+ },
+ {
+ "source": "/sdks/python/apis",
+ "destination": "/api-reference/overview"
+ },
+ {
+ "source": "/sdks/javascript/overview",
+ "destination": "/serverless/sdks"
+ },
+ {
+ "source": "/sdks/javascript/endpoints",
+ "destination": "/serverless/endpoints/send-requests"
+ },
+ {
+ "source": "/sdks/go/overview",
+ "destination": "/serverless/sdks"
+ },
+ {
+ "source": "/sdks/go/endpoints",
+ "destination": "/serverless/endpoints/send-requests"
+ },
+ {
+ "source": "/tutorials/sdks/python/:slug*",
+ "destination": "/serverless/workers/handler-functions"
+ },
+ {
+ "source": "/get-started/install-sdks",
+ "destination": "/serverless/sdks"
}
]
}
diff --git a/sdks/go/endpoints.mdx b/sdks/go/endpoints.mdx
index 689cf434..e69de29b 100644
--- a/sdks/go/endpoints.mdx
+++ b/sdks/go/endpoints.mdx
@@ -1,836 +0,0 @@
----
-title: "Endpoints"
----
-
-Interacting with Runpod's Endpoints is a core feature of the SDK, enabling the execution of tasks and the retrieval of results. This section covers the synchronous and asynchronous execution methods, along with checking the status of operations.
-
-## Prerequisites
-
-Before using the Runpod Go SDK, ensure that you have:
-
-* [Installed the Runpod Go SDK](/sdks/go/overview#install).
-* Configured your API key.
-
-## Set your Endpoint Id
-
-Set your Runpod API key and your Endpoint Id as environment variables.
-
-```go
-package main
-
-import (
- "log"
- "os"
-
- "github.com/runpod/go-sdk/pkg/sdk"
- "github.com.runpod/go-sdk/pkg/sdk/config"
- rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
-)
-
-func main() {
- // Retrieve the API key and base URL from environment variables
- apiKey := os.Getenv("RUNPOD_API_KEY")
- baseURL := os.Getenv("RUNPOD_BASE_URL")
-
- // Check if environment variables are set
- if apiKey == "" {
- log.Fatalf("Environment variable RUNPOD_API_KEY is not set")
- }
- if baseURL == "" {
- log.Fatalf("Environment variable RUNPOD_BASE_URL is not set")
- }
-
-
- // Use the endpoint object
- // ...
-}
-```
-
-This allows all calls to pass through your Endpoint Id with a valid API key.
-
-The following are actions you use on the
-
-* [RunSync](#run-sync)
-* [Run](#run-async)
-* [Stream](#stream)
-* [Health](#health-check)
-* [Status](#status)
-* [Cancel](#cancel)
-* [Purge Queue](#purge-queue)
-
-Here is the revised documentation based on the Go Sample:
-
-## Run the Endpoint
-
-Run the Endpoint using either the asynchronous `run` or synchronous `runSync` method.
-
-Choosing between asynchronous and synchronous execution hinges on your task's needs and application design.
-
-### Run synchronously
-
-To execute an endpoint synchronously and wait for the result, use the `runSync` method on your endpoint. This method blocks the execution until the endpoint run is complete or until it times out.
-
-
-
-```go
-package main
-
-import (
- "encoding/json"
- "fmt"
- "log"
- "os"
-
- "github.com/runpod/go-sdk/pkg/sdk"
- "github.com.runpod/go-sdk/pkg/sdk/config"
- rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
-)
-
-func main() {
- apiKey := os.Getenv("RUNPOD_API_KEY")
- baseURL := os.Getenv("RUNPOD_BASE_URL")
-
- endpoint, err := rpEndpoint.New(
- &config.Config{ApiKey: &apiKey},
- &rpEndpoint.Option{EndpointId: &baseURL},
- )
- if err != nil {
- log.Fatalf("Failed to create endpoint: %v", err)
- }
-
- jobInput := rpEndpoint.RunSyncInput{
- JobInput: &rpEndpoint.JobInput{
- Input: map[string]interface{}{
- "prompt": "Hello World",
- },
- },
- Timeout: sdk.Int(120),
- }
-
- output, err := endpoint.RunSync(&jobInput)
- if err != nil {
- panic(err)
- }
-
- data, _ := json.Marshal(output)
- fmt.Printf("output: %s\n", data)
-}
-```
-
-
-
-
-```bash
-{
- "delayTime": 18,
- "executionTime": 36595,
- "id": "sync-d050a3f6-791a-4aff-857a-66c759db4a06-u1",
- "output": [
- {
- "choices": [],
- "usage": {}
- }
- ],
- "status": "COMPLETED",
- "started": true,
- "completed": true,
- "succeeded": true
-}
-```
-
-
-
-
-
-## Run asynchronously
-
-Asynchronous execution allows for non-blocking operations, enabling your code to perform other tasks while waiting for an operation to complete.
-
-For non-blocking operations, use the `run` method on the endpoint. This method allows you to start an endpoint run and then check its status or wait for its completion at a later time.
-
-
-
-```go
-package main
-
-import (
- "encoding/json"
- "fmt"
- "log"
- "os"
-
- "github.com/runpod/go-sdk/pkg/sdk"
- "github.com/runpod/go-sdk/pkg/sdk/config"
- rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
-)
-
-func main() {
- apiKey := os.Getenv("RUNPOD_API_KEY")
- baseURL := os.Getenv("RUNPOD_BASE_URL")
-
- endpoint, err := rpEndpoint.New(
- &config.Config{ApiKey: &apiKey},
- &rpEndpoint.Option{EndpointId: &baseURL},
- )
- if err != nil {
- log.Fatalf("Failed to create endpoint: %v", err)
- }
-
- jobInput := rpEndpoint.RunInput{
- JobInput: &rpEndpoint.JobInput{
- Input: map[string]interface{}{
- "mock_delay": 95,
- },
- },
- RequestTimeout: sdk.Int(120),
- }
-
- output, err := endpoint.Run(&jobInput)
- if err != nil {
- panic(err)
- }
-
- data, _ := json.Marshal(output)
- fmt.Printf("output: %s\n", data)
-}
-```
-
-
-
-
-```bash
-{
- "id": "d4e960f6-073f-4219-af24-cbae6b532c31-u1",
- "status": "IN_QUEUE"
-}
-```
-
-
-
-
-
-### Get results from an asynchronous run
-
-The following example shows how to get the results of an asynchronous run.
-
-
-
-```go
-package main
-
-import (
- "encoding/json"
- "fmt"
- "log"
- "os"
- "time"
-
- "github.com/runpod/go-sdk/pkg/sdk"
- "github.com/runpod/go-sdk/pkg/sdk/config"
- rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
-)
-
-func main() {
- apiKey := os.Getenv("RUNPOD_API_KEY")
- baseURL := os.Getenv("RUNPOD_BASE_URL")
-
- endpoint, err := rpEndpoint.New(
- &config.Config{ApiKey: &apiKey},
- &rpEndpoint.Option{EndpointId: &baseURL},
- )
- if err != nil {
- log.Fatalf("Failed to create endpoint: %v", err)
- }
-
- // Initiate the asynchronous run
- jobInput := rpEndpoint.RunInput{
- JobInput: &rpEndpoint.JobInput{
- Input: map[string]interface{}{"mock_delay": 95},
- },
- RequestTimeout: sdk.Int(120),
- }
- runOutput, err := endpoint.Run(&jobInput)
- if err != nil {
- log.Fatalf("Failed to initiate the run: %v", err)
- }
-
- // Extract the ID from the run output
- runID := *runOutput.Id
- fmt.Printf("Run ID: %s\n", runID)
-
- // Prepare the input for status polling
- statusInput := rpEndpoint.StatusInput{
- Id: sdk.String(runID),
- }
-
- // Poll the status until it completes or fails
- var statusOutput *rpEndpoint.StatusOutput
- for {
- statusOutput, err = endpoint.Status(&statusInput)
- if err != nil {
- log.Printf("Error checking status: %v", err)
- time.Sleep(5 * time.Second)
- continue
- }
-
- statusJSON, _ := json.Marshal(statusOutput)
- fmt.Printf("Status: %s\n", statusJSON)
-
- if *statusOutput.Status == "COMPLETED" || *statusOutput.Status == "FAILED" {
- break
- }
-
- time.Sleep(5 * time.Second)
- }
-
- // Retrieve the final result (assuming it's available in the status output)
- if *statusOutput.Status == "COMPLETED" {
- fmt.Println("Run completed successfully!")
- // Handle the completed run's output if needed
- } else {
- fmt.Println("Run failed!")
- // Handle the failed run if needed
- }
-}
-```
-
-
-
-
-```bash
-Run ID: 353b1e99-2f35-43a8-8a8b-001d59df8aa1-u1
-Status: {"id":"353b1e99-2f35-43a8-8a8b-001d59df8aa1-u1","status":"IN_QUEUE"}
-Status: {"delayTime":536,"executionTime":239,"id":"353b1e99-2f35-43a8-8a8b-001d59df8aa1-u1","output":"69.30.85.167","status":"COMPLETED"}
-Run completed successfully!
-```
-
-
-
-
-
-## Stream
-
-Stream allows you to stream the output of an Endpoint run. To enable streaming, your handler must support the `"return_aggregate_stream": True` option on the `start` method of your Handler. Once enabled, use the `stream` method to receive data as it becomes available.
-
-
-
-```go
-package main
-
-import (
- "encoding/json"
- "fmt"
-
- "github.com/runpod/go-sdk/pkg/sdk/config"
- rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
-)
-
-func main() {
-
- apiKey := os.Getenv("RUNPOD_API_KEY")
- baseURL := os.Getenv("RUNPOD_BASE_URL")
-
- endpoint, err := rpEndpoint.New(
- &config.Config{ApiKey: &apiKey},
- &rpEndpoint.Option{EndpointId: &baseURL},
- )
- if err != nil {
- panic(err)
- }
-
- request, err := endpoint.Run(&rpEndpoint.RunInput{
- JobInput: &rpEndpoint.JobInput{
- Input: map[string]interface{}{
- "prompt": "Hello World",
- },
- },
- })
- if err != nil {
- panic(err)
- }
-
- streamChan := make(chan rpEndpoint.StreamResult, 100)
-
- err = endpoint.Stream(&rpEndpoint.StreamInput{Id: request.Id}, streamChan)
- if err != nil {
- // timeout reached, if we want to get the data that has been streamed
- if err.Error() == "ctx timeout reached" {
- for data := range streamChan {
- dt, _ := json.Marshal(data)
- fmt.Printf("output:%s\n", dt)
- }
- }
- panic(err)
- }
-
- for data := range streamChan {
- dt, _ := json.Marshal(data)
- fmt.Printf("output:%s\n", dt)
- }
-
-}
-```
-
-
-
-
-```bash
-{ id: 'cb68890e-436f-4234-955d-001db6afe972-u1', status: 'IN_QUEUE' }
-{
- "output": "H"
-}
-{
- "output": "e"
-}
-{
- "output": "l"
-}
-{
- "output": "l"
-}
-{
- "output": "o"
-}
-{
- "output": ","
-}
-{
- "output": " "
-}
-{
- "output": "W"
-}
-{
- "output": "o"
-}
-{
- "output": "r"
-}
-{
- "output": "l"
-}
-{
- "output": "d"
-}
-{
- "output": "!"
-}
-done streaming
-```
-
-
-
-
-You must define your handler to support the `"return_aggregate_stream": True` option on the `start` method.
-
-```python
-from time import sleep
-import runpod
-
-
-def handler(job):
- job_input = job["input"]["prompt"]
-
- for i in job_input:
- sleep(1) # sleep for 1 second for effect
- yield i
-
-
-runpod.serverless.start(
- {
- "handler": handler,
- "return_aggregate_stream": True, # Ensures aggregated results are streamed back
- }
-)
-```
-
-
-
-
-
-
-
-The maximum size for a payload that can be sent using yield to stream results is 1 MB.
-
-
-
-## Health check
-
-Monitor the health of an endpoint by checking its status, including jobs completed, failed, in progress, in queue, and retried, as well as the status of workers.
-
-
-
-```go
-package main
-
-import (
- "encoding/json"
- "fmt"
-
- "github.com/runpod/go-sdk/pkg/sdk"
- "github.com/runpod/go-sdk/pkg/sdk/config"
- rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
-)
-
-func main() {
-
- apiKey := os.Getenv("RUNPOD_API_KEY")
- baseURL := os.Getenv("RUNPOD_BASE_URL")
-
- endpoint, err := rpEndpoint.New(
- &config.Config{ApiKey: &apiKey},
- &rpEndpoint.Option{EndpointId: &baseURL},
- )
- if err != nil {
- panic(err)
- }
-
- healthInput := rpEndpoint.StatusInput{
- Id: sdk.String("20aad8ef-9c86-4fcd-a349-579ce38e8bfd-u1"),
- }
- output, err := endpoint.Status(&healthInput)
- if err != nil {
- panic(err)
- }
-
- healthData, _ := json.Marshal(output)
- fmt.Printf("health output: %s\n", healthData)
-
-}
-```
-
-
-
-
-```bash
-{
- "jobs": {
- "completed": 72,
- "failed": 1,
- "inProgress": 6,
- "inQueue": 0,
- "retried": 1
- },
- "workers": {
- "idle": 4,
- "initializing": 0,
- "ready": 4,
- "running": 1,
- "throttled": 0
- }
-}
-```
-
-
-
-
-
-## Status
-
-Use the `status` method and specify the `id` of the run to get the status of a run.
-
-
-
-```go
-package main
-
-import (
- "encoding/json"
- "fmt"
- "log"
- "os"
-
- "github.com/runpod/go-sdk/pkg/sdk"
- "github.com/runpod/go-sdk/pkg/sdk/config"
- rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
-)
-
-func main() {
-
- apiKey := os.Getenv("RUNPOD_API_KEY")
- baseURL := os.Getenv("RUNPOD_BASE_URL")
-
- endpoint, err := rpEndpoint.New(
- &config.Config{ApiKey: &apiKey},
- &rpEndpoint.Option{EndpointId: &baseURL},
- )
- if err != nil {
- log.Fatalf("Failed to create endpoint: %v", err)
- }
- input := rpEndpoint.StatusInput{
- Id: sdk.String("5efff030-686c-4179-85bb-31b9bf97b944-u1"),
- }
- output, err := endpoint.Status(&input)
- if err != nil {
- panic(err)
- }
- dt, _ := json.Marshal(output)
- fmt.Printf("output:%s\n", dt)
-}
-```
-
-
-
-
-```bash
-{
- "delayTime": 18,
- "id": "792b1497-b2c8-4c95-90bf-4e2a6a2a37ff-u1",
- "status": "IN_PROGRESS",
- "started": true,
- "completed": false,
- "succeeded": false
-}
-```
-
-
-
-
-
-## Cancel
-
-You can cancel a Job request by using the `cancel()` function on the run request. You might want to cancel a Job because it's stuck with a status of `IN_QUEUE` or `IN_PROGRESS`, or because you no longer need the result.
-
-
-
-```go
-package main
-
-import (
- "encoding/json"
- "fmt"
-
- "github.com/runpod/go-sdk/pkg/sdk"
- "github.com/runpod/go-sdk/pkg/sdk/config"
- rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
-)
-
-func main() {
-
- apiKey := os.Getenv("RUNPOD_API_KEY")
- baseURL := os.Getenv("RUNPOD_BASE_URL")
-
- endpoint, err := rpEndpoint.New(
- &config.Config{ApiKey: &apiKey},
- &rpEndpoint.Option{EndpointId: &baseURL},
- )
- if err != nil {
- panic(err)
- }
-
- cancelInput := rpEndpoint.CancelInput{
- Id: sdk.String("00edfd03-8094-46da-82e3-ea47dd9566dc-u1"),
- }
- output, err := endpoint.Cancel(&cancelInput)
- if err != nil {
- panic(err)
- }
-
- healthData, _ := json.Marshal(output)
- fmt.Printf("health output: %s\n", healthData)
-
-}
-```
-
-
-
-
-```bash
-{
- "id": "5fb6a8db-a8fa-41a1-ad81-f5fad9755f9e-u1",
- "status": "CANCELLED"
-}
-```
-
-
-
-
-
-### Timeout
-
-You can set the maximum time to wait for a response from the endpoint using the `RequestTimeout` field in the `RunInput` struct.
-
-
-
-```go
-package main
-
-import (
- "encoding/json"
- "fmt"
- "log"
- "os"
-
- "github.com/runpod/go-sdk/pkg/sdk"
- "github.com/runpod/go-sdk/pkg/sdk/config"
- rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
-)
-
-func main() {
- apiKey := os.Getenv("RUNPOD_API_KEY")
- baseURL := os.Getenv("RUNPOD_BASE_URL")
-
- endpoint, err := rpEndpoint.New(
- &config.Config{ApiKey: &apiKey},
- &rpEndpoint.Option{EndpointId: &baseURL},
- )
- if err != nil {
- log.Fatalf("Failed to create endpoint: %v", err)
- }
-
- jobInput := rpEndpoint.RunInput{
- JobInput: &rpEndpoint.JobInput{
- Input: map[string]interface{}
- RequestTimeout: sdk.Int(120),
- }
-
- output, err := endpoint.Run(&jobInput)
- if err != nil {
- panic(err)
- }
-
- data, _ := json.Marshal(output)
- fmt.Printf("output: %s\n", data)
-}
-```
-
-
-
-
-```bash
-{
- "id": "43309f93-0422-4eac-92cf-e385dee36e99-u1",
- "status": "IN_QUEUE"
-}
-```
-
-
-
-
-
-### Execution policy
-
-You can specify the TTL (Time-to-Live) and ExecutionTimeout values for the job using the `Input` map of the `JobInput` struct.
-
-
-
-```go
-package main
-
-import (
- "encoding/json"
- "fmt"
- "log"
- "os"
-
- "github.com/runpod/go-sdk/pkg/sdk"
- "github.com/runpod/go-sdk/pkg/sdk/config"
- rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
-)
-
-func main() {
- apiKey := os.Getenv("RUNPOD_API_KEY")
- baseURL := os.Getenv("RUNPOD_BASE_URL")
-
- endpoint, err := rpEndpoint.New(
- &config.Config{ApiKey: &apiKey},
- &rpEndpoint.Option{EndpointId: &baseURL},
- )
- if err != nil {
- log.Fatalf("Failed to create endpoint: %v", err)
- }
-
- jobInput := rpEndpoint.RunInput{
- JobInput: &rpEndpoint.JobInput{
- Input: map[string]interface{}{
- "ttl": 3600, // Set the TTL value, e.g., 3600 seconds (1 hour)
- "execution_timeout": 300, // Set the ExecutionTimeout value, e.g., 300 seconds (5 minutes)
- },
- },
- RequestTimeout: sdk.Int(120),
- }
-
- output, err := endpoint.Run(&jobInput)
- if err != nil {
- panic(err)
- }
-
- data, _ := json.Marshal(output)
- fmt.Printf("output: %s\n", data)
-}
-```
-
-
-
-
-```bash
-{
- "id": "21bd3763-dcbf-4091-84ee-85b80907a020-u1",
- "status": "IN_QUEUE"
-}
-```
-
-
-
-
-
-For more information, see [Execution policy](/serverless/endpoints/send-requests#execution-policies).
-
-## Purge Queue
-
-Create an instance of the `PurgeQueueInput` struct and set the desired values. Call the `PurgeQueue` method of the Endpoint with the `PurgeQueueInput` instance.
-
-`PurgeQueue()` doesn't affect Jobs in progress.
-
-
-
-```go
-package main
-
-import (
- "fmt"
- "log"
- "os"
-
- "github.com/runpod/go-sdk/pkg/sdk"
- "github.com/runpod/go-sdk/pkg/sdk/config"
- rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
-)
-
-func main() {
- apiKey := os.Getenv("RUNPOD_API_KEY")
- baseURL := os.Getenv("RUNPOD_BASE_URL")
-
- endpoint, err := rpEndpoint.New(
- &config.Config{ApiKey: &apiKey},
- &rpEndpoint.Option{EndpointId: &baseURL},
- )
- if err != nil {
- log.Fatalf("Failed to create endpoint: %v", err)
- }
-
- purgeQueueInput := rpEndpoint.PurgeQueueInput{
- RequestTimeout: sdk.Int(5), // Set the request timeout to 5 seconds
- }
-
- purgeQueueOutput, err := endpoint.PurgeQueue(&purgeQueueInput)
- if err != nil {
- panic(err)
- }
-
- fmt.Printf("Status: %s\n", *purgeQueueOutput.Status)
- fmt.Printf("Removed: %d\n", *purgeQueueOutput.Removed)
-}
-```
-
-
-
-
-```bash
-Status: completed
-Removed: 1
-```
-
-
-
-
diff --git a/sdks/go/overview.mdx b/sdks/go/overview.mdx
index 7072c61f..e69de29b 100644
--- a/sdks/go/overview.mdx
+++ b/sdks/go/overview.mdx
@@ -1,70 +0,0 @@
----
-title: "Overview"
-description: "Use the Runpod Go SDK to integrate Serverless endpoints with your Go projects."
----
-
-Get started with setting up your Runpod projects using Go. Whether you're building web applications, server-side implementations, or automating tasks, the Runpod Go SDK provides the tools you need. This guide outlines the steps to get your development environment ready and integrate Runpod into your Go projects.
-
-## Prerequisites
-
-Before you begin, ensure that you have the following:
-
-* Go installed on your machine (version 1.16 or later)
-* A Runpod account with an API key and Endpoint Id
-
-## Install the Runpod SDK
-
-Before integrating Runpod into your project, you'll need to install the SDK.
-
-To install the Runpod SDK, run the following `go get` command in your project directory.
-
-```sh
-go get github.com/runpod/go-sdk
-```
-
-This command installs the `runpod-sdk` package. Then run the following command to install the dependencies:
-
-```sh
-go mod tidy
-```
-
-For more details about the package, visit the [Go package page](https://pkg.go.dev/github.com/runpod/go-sdk/pkg/sdk) or the [GitHub repository](https://github.com/runpod/go-sdk).
-
-## Add your API key
-
-To use the Runpod SDK in your project, you first need to import it and configure it with your API key and endpoint ID. Ensure these values are securely stored, preferably as environment variables.
-
-Below is a basic example of how to initialize and use the Runpod SDK in your Go project.
-
-```go
-func main() {
- endpoint, err := rpEndpoint.New(
- &config.Config{ApiKey: sdk.String(os.Getenv("RUNPOD_API_KEY"))},
- &rpEndpoint.Option{EndpointId: sdk.String(os.Getenv("RUNPOD_BASE_URL"))},
- )
- if err != nil {
- panic(err)
- }
-
- // Use the endpoint object
- // ...
-}
-```
-
-This snippet demonstrates how to import the SDK, initialize it with your API key, and reference a specific endpoint using its ID.
-
-### Secure your API key
-
-When working with the Runpod SDK, it's essential to secure your API key. Storing the API key in environment variables is recommended, as shown in the initialization example. This method keeps your key out of your source code and reduces the risk of accidental exposure.
-
-
-
-Use environment variables or secure secrets management solutions to handle sensitive information like API keys.
-
-
-
-For more information, see the following:
-
-* [Runpod SDK Go Package](https://pkg.go.dev/github.com/runpod/go-sdk/pkg/sdk)
-* [Runpod GitHub Repository](https://github.com/runpod/go-sdk)
-* [Endpoints](/sdks/go/endpoints)
diff --git a/sdks/javascript/endpoints.mdx b/sdks/javascript/endpoints.mdx
index 9509d463..e69de29b 100644
--- a/sdks/javascript/endpoints.mdx
+++ b/sdks/javascript/endpoints.mdx
@@ -1,661 +0,0 @@
----
-title: "Endpoints"
----
-
-Interacting with Runpod's endpoints is a core feature of the SDK, enabling the execution of tasks and the retrieval of results. This section covers the synchronous and asynchronous execution methods, along with checking the status of operations.
-
-## Prerequisites
-
-Before using the Runpod JavaScript, ensure that you have:
-
-* Installed the Runpod JavaScript SDK.
-* Configured your API key.
-
-## Set your Endpoint Id
-
-Set your Runpod API key and your Endpoint Id as environment variables.
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-const runpod = runpodSdk(RUNPOD_API_KEY);
-const endpoint = runpod.endpoint(ENDPOINT_ID);
-```
-
-This allows all calls to pass through your Endpoint Id with a valid API key.
-
-In most situations, you'll set a variable name `endpoint` on the `Endpoint` class. This allows you to use the following methods or instances variables from the `Endpoint` class:
-
-* [health](#health-check)
-* [purge\_queue](#purge-queue)
-* [runSync](#run-synchronously)
-* [run](#run-asynchronously)
-
-## Run the Endpoint
-
-Run the Endpoint with the either the asynchronous `run` or synchronous `runSync` method.
-
-Choosing between asynchronous and synchronous execution hinges on your task's needs and application design.
-
-### Run synchronously
-
-To execute an endpoint synchronously and wait for the result, use the `runSync` method on your endpoint. This method blocks the execution until the endpoint run is complete or until it times out.
-
-
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-const runpod = runpodSdk(RUNPOD_API_KEY);
-const endpoint = runpod.endpoint(ENDPOINT_ID);
-const result = await endpoint.runSync({
- "input": {
- "prompt": "Hello, World!",
- },
-});
-
-console.log(result);
-```
-
-
-
-
-```json
-{
- delayTime: 18,
- executionTime: 36595,
- id: 'sync-d050a3f6-791a-4aff-857a-66c759db4a06-u1',
- output: [ { choices: [Array], usage: [Object] } ],
- status: 'COMPLETED',
- started: true,
- completed: true,
- succeeded: true
-}
-```
-
-
-
-
-
-## Run asynchronously
-
-Asynchronous execution allows for non-blocking operations, enabling your code to perform other tasks while waiting for an operation to complete.
-
-For non-blocking operations, use the `run` method on the endpoint. This method allows you to start an endpoint run and then check its status or wait for its completion at a later time.
-
-
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-const runpod = runpodSdk(RUNPOD_API_KEY);
-const endpoint = runpod.endpoint(ENDPOINT_ID);
-const result = await endpoint.run({
- "input": {
- "prompt": "Hello, World!",
- },
-});
-
-console.log(result);
-```
-
-
-
-
-```json
-{
- "id": "d4e960f6-073f-4219-af24-cbae6b532c31-u1",
- "status": "IN_QUEUE"
-}
-```
-
-
-
-
-
-### Get results from an asynchronous run
-
-The following example shows how to get the results of an asynchronous run.
-
-
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
-
-async function main() {
- const runpod = runpodSdk(RUNPOD_API_KEY);
- const endpoint = runpod.endpoint(ENDPOINT_ID);
- const result = await endpoint.run({
- input: {
- prompt: "Hello, World!",
- },
- });
-
- console.log(result);
- console.log("run response");
- console.log(result);
-
- const { id } = result; // Extracting the operation ID from the initial run response
-
- // Check the status in a loop, similar to the working example
- for (let i = 0; i < 20; i++) {
- // Increase or decrease the loop count as necessary
- const statusResult = await endpoint.status(id);
- console.log("status response");
- console.log(statusResult);
-
- if (
- statusResult.status === "COMPLETED"
- || statusResult.status === "FAILED"
- ) {
- // Once completed or failed, log the final status and break the loop
- if (statusResult.status === "COMPLETED") {
- console.log("Operation completed successfully.");
- console.log(statusResult.output);
- } else {
- console.log("Operation failed.");
- console.log(statusResult);
- }
- break;
- }
-
- // Wait for a bit before checking the status again
- await sleep(5000);
- }
-}
-
-main();
-```
-
-
-
-
-```json
-run response
-{ id: 'c671a352-78e6-4eba-b2c8-2ea537c00897-u1', status: 'IN_QUEUE' }
-status response
-{
- delayTime: 19,
- id: 'c671a352-78e6-4eba-b2c8-2ea537c00897-u1',
- status: 'IN_PROGRESS',
- started: true,
- completed: false,
- succeeded: false
-}
-status response
-{
- delayTime: 19,
- executionTime: 539,
- id: 'c671a352-78e6-4eba-b2c8-2ea537c00897-u1',
- output: [ { choices: [Array], usage: [Object] } ],
- status: 'COMPLETED',
- started: true,
- completed: true,
- succeeded: true
-}
-Operation completed successfully.
-[ { choices: [ [Object] ], usage: { input: 5, output: 16 } } ]
-```
-
-
-
-
-
-### Poll the status of an asynchronous run
-
-Uses `await endpoint.status(id)` to check the status of the operation repeatedly until it either completes or fails. After each check, the function waits for 5 seconds (or any other suitable duration you choose) before checking the status again, using the sleep function. This approach ensures your application remains responsive and doesn't overwhelm the Runpod endpoint with status requests.
-
-
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-// Function to pause execution for a specified time
-const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
-
-async function main() {
- try {
- const runpod = runpodSdk(RUNPOD_API_KEY);
- const endpoint = runpod.endpoint(ENDPOINT_ID);
- const result = await endpoint.run({
- input: {
- prompt: "Hello, World!",
- },
- });
-
- const { id } = result;
- if (!id) {
- console.error("No ID returned from endpoint.run");
- return;
- }
-
- // Poll the status of the operation until it completes or fails
- let isComplete = false;
- while (!isComplete) {
- const status = await endpoint.status(id);
- console.log(`Current status: ${status.status}`);
-
- if (status.status === "COMPLETED" || status.status === "FAILED") {
- isComplete = true; // Exit the loop
- console.log(`Operation ${status.status.toLowerCase()}.`);
-
- if (status.status === "COMPLETED") {
- console.log("Output:", status.output);
- } else {
- console.error("Error details:", status.error);
- }
- } else {
- await sleep(5000); // Adjust the delay as needed
- }
- }
- } catch (error) {
- console.error("An error occurred:", error);
- }
-}
-
-main();
-```
-
-
-
-
-```json
-Current status: IN_QUEUE
-Current status: IN_PROGRESS
-Current status: COMPLETED
-Operation completed.
-Hello, World!
-```
-
-
-
-
-
-## Stream
-
-Stream allows you to stream the output of an Endpoint run. To enable streaming, your handler must support the `"return_aggregate_stream": True` option on the `start` method of your Handler. Once enabled, use the `stream` method to receive data as it becomes available.
-
-
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-async function main() {
- const runpod = runpodSdk(RUNPOD_API_KEY);
- const endpoint = runpod.endpoint(ENDPOINT_ID);
- const result = await endpoint.run({
- input: {
- prompt: "Hello, World!",
- },
- });
-
- console.log(result);
-
- const { id } = result;
- for await (const result of endpoint.stream(id)) {
- console.log(`${JSON.stringify(result, null, 2)}`);
- }
- console.log("done streaming");
-}
-
-main();
-```
-
-
-
-
-```json
-{ id: 'cb68890e-436f-4234-955d-001db6afe972-u1', status: 'IN_QUEUE' }
-{
- "output": "H"
-}
-{
- "output": "e"
-}
-{
- "output": "l"
-}
-{
- "output": "l"
-}
-{
- "output": "o"
-}
-{
- "output": ","
-}
-{
- "output": " "
-}
-{
- "output": "W"
-}
-{
- "output": "o"
-}
-{
- "output": "r"
-}
-{
- "output": "l"
-}
-{
- "output": "d"
-}
-{
- "output": "!"
-}
-done streaming
-```
-
-
-
-
-You must define your handler to support the `"return_aggregate_stream": True` option on the `start` method.
-
-```python
-from time import sleep
-import runpod
-
-
-def handler(job):
- job_input = job["input"]["prompt"]
-
- for i in job_input:
- sleep(1) # sleep for 1 second for effect
- yield i
-
-
-runpod.serverless.start(
- {
- "handler": handler,
- "return_aggregate_stream": True, # Ensures aggregated results are streamed back
- }
-)
-```
-
-
-
-
-
-
-
-The maximum size for a payload that can be sent using yield to stream results is 1 MB.
-
-
-
-## Health check
-
-Monitor the health of an endpoint by checking its status, including jobs completed, failed, in progress, in queue, and retried, as well as the status of workers.
-
-
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-const runpod = runpodSdk(RUNPOD_API_KEY);
-const endpoint = runpod.endpoint(ENDPOINT_ID);
-
-const health = await endpoint.health();
-console.log(health);
-```
-
-
-
-
-```json
-{
- "jobs": {
- "completed": 72,
- "failed": 1,
- "inProgress": 6,
- "inQueue": 0,
- "retried": 1
- },
- "workers": {
- "idle": 4,
- "initializing": 0,
- "ready": 4,
- "running": 1,
- "throttled": 0
- }
-}
-```
-
-
-
-
-
-## Status
-
-Use the `status` method and specify the `id` of the run to get the status of a run.
-
-
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-async function main() {
- try {
- const runpod = runpodSdk(RUNPOD_API_KEY);
- const endpoint = runpod.endpoint(ENDPOINT_ID);
- const result = await endpoint.run({
- input: {
- prompt: "Hello, World!",
- },
- });
-
- const { id } = result;
- if (!id) {
- console.error("No ID returned from endpoint.run");
- return;
- }
-
- const status = await endpoint.status(id);
- console.log(status);
- } catch (error) {
- console.error("An error occurred:", error);
- }
-}
-
-main();
-```
-
-
-
-
-```json
-{
- "delayTime": 18,
- "id": "792b1497-b2c8-4c95-90bf-4e2a6a2a37ff-u1",
- "status": "IN_PROGRESS",
- "started": true,
- "completed": false,
- "succeeded": false
-}
-```
-
-
-
-
-
-## Cancel
-
-You can cancel a Job request by using the `cancel()` function on the run request. You might want to cancel a Job because it's stuck with a status of `IN_QUEUE` or `IN_PROGRESS`, or because you no longer need the result.
-
-
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-async function main() {
- try {
- const runpod = runpodSdk(RUNPOD_API_KEY);
- const endpoint = runpod.endpoint(ENDPOINT_ID);
- const result = await endpoint.run({
- input: {
- prompt: "Hello, World!",
- },
- });
-
- const { id } = result;
- if (!id) {
- console.error("No ID returned from endpoint.run");
- return;
- }
-
- const cancel = await endpoint.cancel(id);
- console.log(cancel);
- } catch (error) {
- console.error("An error occurred:", error);
- }
-}
-
-main();
-```
-
-
-
-
-```json
-{
- "id": "5fb6a8db-a8fa-41a1-ad81-f5fad9755f9e-u1",
- "status": "CANCELLED"
-}
-```
-
-
-
-
-
-### Timeout
-
-To set a timeout on a run, pass a timeout value to the `run` method. Time is measured in milliseconds.
-
-
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-const runpod = runpodSdk(RUNPOD_API_KEY);
-const endpoint = runpod.endpoint(ENDPOINT_ID);
-const result = await endpoint.run({
- "input": {
- "prompt": "Hello, World!",
- },
-}, 5000);
-
-console.log(result);
-```
-
-
-
-
-```json
-{
- "id": "43309f93-0422-4eac-92cf-e385dee36e99-u1",
- "status": "IN_QUEUE"
-}
-```
-
-
-
-
-
-### Execution policy
-
-You can set the maximum time to wait for a response from the endpoint in the `policy` parameter.
-
-
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-const runpod = runpodSdk(RUNPOD_API_KEY);
-const endpoint = runpod.endpoint(ENDPOINT_ID);
-const result = await endpoint.run({
- "input": {
- "prompt": "Hello, World!",
- },
- policy: {
- executionTimeout: 5000,
- },
-});
-
-console.log(result);
-```
-
-
-
-
-```json
-{
- "id": "21bd3763-dcbf-4091-84ee-85b80907a020-u1",
- "status": "IN_QUEUE"
-}
-```
-
-
-
-
-
-For more information, see [Execution policy](/serverless/endpoints/operations).
-
-## Purge queue
-
-You can purge all jobs from a queue by using the `purgeQueue()` function.
-
-`purgeQueue()` doesn't affect Jobs in progress.
-
-
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-async function main() {
- try {
- const runpod = runpodSdk(RUNPOD_API_KEY);
- const endpoint = runpod.endpoint(ENDPOINT_ID);
- await endpoint.run({
- input: {
- prompt: "Hello, World!",
- },
- });
-
- const purgeQueue = await endpoint.purgeQueue();
- console.log(purgeQueue);
- } catch (error) {
- console.error("An error occurred:", error);
- }
-}
-
-main();
-```
-
-
-
-
-```json
-{
- "removed": 1,
- "status": "completed"
-}
-```
-
-
-
-
diff --git a/sdks/javascript/overview.mdx b/sdks/javascript/overview.mdx
index 34594307..e69de29b 100644
--- a/sdks/javascript/overview.mdx
+++ b/sdks/javascript/overview.mdx
@@ -1,52 +0,0 @@
----
-title: "Overview"
-description: "Use the Runpod JavaScript SDK to integrate Serverless endpoints with your JavaScript projects."
----
-
-Get started with setting up your Runpod projects using JavaScript. Whether you're building web applications, server-side implementations, or automating tasks, the Runpod JavaScript SDK provides the tools you need. This guide outlines the steps to get your development environment ready and integrate Runpod into your JavaScript projects.
-
-## Install the Runpod SDK
-
-Before integrating Runpod into your project, you'll need to install the SDK. Using Node.js and npm (Node Package Manager) simplifies this process. Ensure you have Node.js and npm installed on your system before proceeding.
-
-To install the Runpod SDK, run the following npm command in your project directory.
-
-```bash
-npm install --save runpod-sdk
-# or
-yarn add runpod-sdk
-```
-
-This command installs the `runpod-sdk` package and adds it to your project's `package.json` dependencies. For more details about the package, visit the [npm package page](https://www.npmjs.com/package/runpod-sdk) or the [GitHub repository](https://github.com/runpod/js-sdk).
-
-## Add your API key
-
-To use the Runpod SDK in your project, you first need to import it and configure it with your API key and endpoint ID. Ensure these values are securely stored, preferably as environment variables.
-
-Below is a basic example of how to initialize and use the Runpod SDK in your JavaScript project.
-
-```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
-import runpodSdk from "runpod-sdk";
-
-const runpod = runpodSdk(RUNPOD_API_KEY);
-const endpoint = runpod.endpoint(ENDPOINT_ID);
-```
-
-This snippet demonstrates how to import the SDK, initialize it with your API key, and reference a specific endpoint using its ID. Remember, the Runpod SDK uses the ES Module (ESM) system and supports asynchronous operations, making it compatible with modern JavaScript development practices.
-
-### Secure your API key
-
-When working with the Runpod SDK, it's essential to secure your API key. Storing the API key in environment variables is recommended, as shown in the initialization example. This method keeps your key out of your source code and reduces the risk of accidental exposure.
-
-
-
-Use environment variables or secure secrets management solutions to handle sensitive information like API keys.
-
-
-
-For more information, see the following:
-
-* [Runpod SDK npm Package](https://www.npmjs.com/package/runpod-sdk)
-* [Runpod GitHub Repository](https://github.com/runpod/js-sdk)
-* [Endpoints](/sdks/javascript/endpoints)
diff --git a/sdks/python/apis.mdx b/sdks/python/apis.mdx
index 0ea04a07..e69de29b 100644
--- a/sdks/python/apis.mdx
+++ b/sdks/python/apis.mdx
@@ -1,238 +0,0 @@
----
-title: "API Wrapper"
-sidebarTitle: "APIs"
----
-
-This document outlines the core functionalities provided by the Runpod API, including how to interact with Endpoints, manage Templates, and list available GPUs. These operations let you dynamically manage computational resources within the Runpod environment.
-
-## Get Endpoints
-
-To retrieve a comprehensive list of all available endpoint configurations within Runpod, you can use the `get_endpoints()` function. This function returns a list of endpoint configurations, allowing you to understand what's available for use in your projects.
-
-```python
-import runpod
-import os
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-# Fetching all available endpoints
-endpoints = runpod.get_endpoints()
-
-# Displaying the list of endpoints
-print(endpoints)
-```
-
-## Create Template
-
-Templates in Runpod serve as predefined configurations for setting up environments efficiently. The `create_template()` function facilitates the creation of new templates by specifying a name and a Docker image.
-
-
-
-```python
-import runpod
-import os
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-try:
- # Creating a new template with a specified name and Docker image
- new_template = runpod.create_template(name="test", image_name="runpod/base:0.1.0")
-
- # Output the created template details
- print(new_template)
-
-except runpod.error.QueryError as err:
- # Handling potential errors during template creation
- print(err)
- print(err.query)
-```
-
-
-
-
-```json
-{
- "id": "n6m0htekvq",
- "name": "test",
- "imageName": "runpod/base:0.1.0",
- "dockerArgs": "",
- "containerDiskInGb": 10,
- "volumeInGb": 0,
- "volumeMountPath": "/workspace",
- "ports": "",
- "env": [],
- "isServerless": false
-}
-```
-
-
-
-
-
-## Create Endpoint
-
-Creating a new endpoint with the `create_endpoint()` function. This function requires you to specify a `name` and a `template_id`. Additional configurations such as GPUs, number of Workers, and more can also be specified depending your requirements.
-
-
-
-```python
-import runpod
-import os
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-try:
- # Creating a template to use with the new endpoint
- new_template = runpod.create_template(
- name="test", image_name="runpod/base:0.4.4", is_serverless=True
- )
-
- # Output the created template details
- print(new_template)
-
- # Creating a new endpoint using the previously created template
- new_endpoint = runpod.create_endpoint(
- name="test",
- template_id=new_template["id"],
- gpu_ids="AMPERE_16",
- workers_min=0,
- workers_max=1,
- )
-
- # Output the created endpoint details
- print(new_endpoint)
-
-except runpod.error.QueryError as err:
- # Handling potential errors during endpoint creation
- print(err)
- print(err.query)
-```
-
-
-
-
-```json
-{
- "id": "Unique_Id",
- "name": "YourTemplate",
- "imageName": "runpod/base:0.4.4",
- "dockerArgs": "",
- "containerDiskInGb": 10,
- "volumeInGb": 0,
- "volumeMountPath": "/workspace",
- "ports": null,
- "env": [],
- "isServerless": true
-}
-{
- "id": "Unique_Id",
- "name": "YourTemplate",
- "templateId": "Unique_Id",
- "gpuIds": "AMPERE_16",
- "networkVolumeId": null,
- "locations": null,
- "idleTimeout": 5,
- "scalerType": "QUEUE_DELAY",
- "scalerValue": 4,
- "workersMin": 0,
- "workersMax": 1
-}
-```
-
-
-
-
-
-## Get GPUs
-
-For understanding the computational resources available, the `get_gpus()` function lists all GPUs that can be allocated to endpoints in Runpod. This enables optimal resource selection based on your computational needs.
-
-
-
-```python
-import runpod
-import json
-import os
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-# Fetching all available GPUs
-gpus = runpod.get_gpus()
-
-# Displaying the GPUs in a formatted manner
-print(json.dumps(gpus, indent=2))
-```
-
-
-
-
-```json
-[
- {
- "id": "NVIDIA A100 80GB PCIe",
- "displayName": "A100 80GB",
- "memoryInGb": 80
- },
- {
- "id": "NVIDIA A100-SXM4-80GB",
- "displayName": "A100 SXM 80GB",
- "memoryInGb": 80
- }
- // Additional GPUs omitted for brevity
-]
-```
-
-
-
-
-
-## Get GPU by Id
-
-Use `get_gpu()` and pass in a GPU Id to retrieve details about a specific GPU model by its ID. This is useful when understanding the capabilities and costs associated with various GPU models.
-
-
-
-```python
-import runpod
-import json
-import os
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-gpus = runpod.get_gpu("NVIDIA A100 80GB PCIe")
-
-print(json.dumps(gpus, indent=2))
-```
-
-
-
-
-```json
-{
- "maxGpuCount": 8,
- "id": "NVIDIA A100 80GB PCIe",
- "displayName": "A100 80GB",
- "manufacturer": "Nvidia",
- "memoryInGb": 80,
- "cudaCores": 0,
- "secureCloud": true,
- "communityCloud": true,
- "securePrice": 1.89,
- "communityPrice": 1.59,
- "oneMonthPrice": null,
- "threeMonthPrice": null,
- "oneWeekPrice": null,
- "communitySpotPrice": 0.89,
- "secureSpotPrice": null,
- "lowestPrice": {
- "minimumBidPrice": 0.89,
- "uninterruptablePrice": 1.59
- }
-}
-```
-
-
-
-
-
-Through these functionalities, the Runpod API enables efficient and flexible management of computational resources, catering to a wide range of project requirements.
diff --git a/sdks/python/endpoints.mdx b/sdks/python/endpoints.mdx
index 94f60871..e69de29b 100644
--- a/sdks/python/endpoints.mdx
+++ b/sdks/python/endpoints.mdx
@@ -1,468 +0,0 @@
----
-title: "Endpoints"
----
-
-This documentation provides detailed instructions on how to use the Runpod Python SDK to interact with various endpoints. You can perform synchronous and asynchronous operations, stream data, and check the health status of endpoints.
-
-## Prerequisites
-
-Before using the Runpod Python, ensure that you have:
-
-* Installed the Runpod Python SDK.
-* Configured your API key.
-
-## Set your Endpoint Id
-
-Pass your Endpoint Id on the `Endpoint` class.
-
-```python
-import runpod
-import os
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-endpoint = runpod.Endpoint("YOUR_ENDPOINT_ID")
-```
-
-This allows all calls to pass through your Endpoint Id with a valid API key.
-
-In most situations, you'll set a variable name `endpoint` on the `Endpoint` class. This allows you to use the following methods or instances variables from the `Endpoint` class:
-
-* [health](#health-check)
-* [purge\_queue](#purge-queue)
-* [run\_sync](#run-synchronously)
-* [run](#run-asynchronously)
-
-## Run the Endpoint
-
-Run the Endpoint with the either the asynchronous `run` or synchronous `run_sync` method.
-
-Choosing between asynchronous and synchronous execution hinges on your task's needs and application design.
-
-* **Asynchronous methods**: Choose the asynchronous method for handling tasks efficiently, especially when immediate feedback isn't crucial. They allow your application to stay responsive by running time-consuming operations in the background, ideal for:
-
- * **Non-blocking calls**: Keep your application active while waiting on long processes.
- * **Long-running operations**: Avoid timeouts on tasks over 30 seconds, letting your app's workflow continue smoothly.
- * **Job tracking**: Get a Job Id to monitor task status, useful for complex or delayed-result operations.
-
-* **Synchronous methods**: Choose the synchronous method for these when your application requires immediate results from operations. They're best for:
-
- * **Immediate results**: Necessary for operations where quick outcomes are essential to continue with your app's logic.
- * **Short operations**: Ideal for tasks under 30 seconds to prevent application delays.
- * **Simplicity and control**: Provides a straightforward execution process, with timeout settings for better operational control.
-
-### Run synchronously
-
-To execute an endpoint synchronously and wait for the result, use the `run_sync` method. This method blocks the execution until the endpoint run is complete or until it times out.
-
-```python
-import runpod
-import os
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-endpoint = runpod.Endpoint("YOUR_ENDPOINT_ID")
-
-try:
- run_request = endpoint.run_sync(
- {
- "prompt": "Hello, world!",
- },
- timeout=60, # Timeout in seconds.
- )
-
- print(run_request)
-except TimeoutError:
- print("Job timed out.")
-```
-
-### Run asynchronously
-
-Asynchronous execution allows for non-blocking operations, enabling your code to perform other tasks while waiting for an operation to complete. Runpod supports both standard asynchronous execution and advanced asynchronous programming with Python's [asyncio](https://docs.python.org/3/library/asyncio.html) framework.
-
-Depending on your application's needs, you can choose the approach that best suits your scenario.
-
-For non-blocking operations, use the `run` method. This method allows you to start an endpoint run and then check its status or wait for its completion at a later time.
-
-#### Asynchronous execution
-
-This executes a standard Python environment without requiring an asynchronous event loop.
-
-
-
-```python
-import runpod
-import os
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-input_payload = {"prompt": "Hello, World!"}
-
-endpoint = runpod.Endpoint("YOUR_ENDPOINT_ID")
-run_request = endpoint.run(input_payload)
-
-# Initial check without blocking, useful for quick tasks
-status = run_request.status()
-print(f"Initial job status: {status}")
-
-if status != "COMPLETED":
- # Polling with timeout for long-running tasks
- output = run_request.output(timeout=60)
-else:
- output = run_request.output()
-print(f"Job output: {output}")
-```
-
-
-
-
-```bash
-Initial job status: IN_QUEUE
-Job output: {'input_tokens': 24, 'output_tokens': 16, 'text': ["Hello! How may I assist you today?\n"]}
-```
-
-
-
-
-
-#### Asynchronous execution with asyncio
-
-Use Python's `asyncio` library for handling concurrent Endpoint calls efficiently. This method embraces Python's asyncio framework for asynchronous programming, requiring functions to be defined with async and called with await. This approach is inherently non-blocking and is built to handle concurrency efficiently.
-
-
-
-```python
-import asyncio
-import aiohttp
-import os
-import runpod
-from runpod import AsyncioEndpoint, AsyncioJob
-
-# asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) # For Windows users.
-
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-
-async def main():
- async with aiohttp.ClientSession() as session:
- input_payload = {"prompt": "Hello, World!"}
- endpoint = AsyncioEndpoint("YOUR_ENDPOINT_ID", session)
- job: AsyncioJob = await endpoint.run(input_payload)
-
- # Polling job status
- while True:
- status = await job.status()
- print(f"Current job status: {status}")
- if status == "COMPLETED":
- output = await job.output()
- print("Job output:", output)
- break # Exit the loop once the job is completed.
- elif status in ["FAILED"]:
- print("Job failed or encountered an error.")
-
- break
- else:
- print("Job in queue or processing. Waiting 3 seconds...")
- await asyncio.sleep(3) # Wait for 3 seconds before polling again
-
-
-if __name__ == "__main__":
- asyncio.run(main())
-```
-
-
-
-
-```bash
-Current job status: IN_QUEUE
-Job in queue or processing. Waiting 3 seconds...
-Current job status: COMPLETED
-Job output: {'input_tokens': 24, 'output_tokens': 16, 'text': ['Hello! How may I assist you today?\n']}
-```
-
-
-
-
-
-## Health check
-
-Monitor the health of an endpoint by checking its status, including jobs completed, failed, in progress, in queue, and retried, as well as the status of workers.
-
-
-
-```python
-import runpod
-import json
-import os
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-endpoint = runpod.Endpoint("gwp4kx5yd3nur1")
-
-endpoint_health = endpoint.health()
-
-print(json.dumps(endpoint_health, indent=2))
-```
-
-
-
-
-```bash
-{
- "jobs": {
- "completed": 100,
- "failed": 0,
- "inProgress": 0,
- "inQueue": 0,
- "retried": 0
- },
- "workers": {
- "idle": 1,
- "initializing": 0,
- "ready": 1,
- "running": 0,
- "throttled": 0
- }
-}
-```
-
-
-
-
-
-## Streaming
-
-To enable streaming, your handler must support the `"return_aggregate_stream": True` option on the `start` method of your Handler. Once enabled, use the `stream` method to receive data as it becomes available.
-
-
-
-```python
-import runpod
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-endpoint = runpod.Endpoint("YOUR_ENDPOINT_ID")
-
-run_request = endpoint.run(
- {
- "input": {
- "prompt": "Hello, world!",
- }
- }
-)
-
-for output in run_request.stream():
- print(output)
-```
-
-
-
-
-```python
-from time import sleep
-import runpod
-
-
-def handler(job):
- job_input = job["input"]["prompt"]
-
- for i in job_input:
- sleep(1) # sleep for 1 second for effect
- yield i
-
-
-runpod.serverless.start(
- {
- "handler": handler,
- "return_aggregate_stream": True, # Ensures aggregated results are streamed back
- }
-)
-```
-
-
-
-
-
-
-
-The maximum size for a payload that can be sent using yield to stream results is 1 MB.
-
-
-
-## Status
-
-Returns the status of the Job request. Set the `status()` function on the run request to return the status of the Job.
-
-
-
-```python
-import runpod
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-input_payload = {"input": {"prompt": "Hello, World!"}}
-
-endpoint = runpod.Endpoint("YOUR_ENDPOINT_ID")
-run_request = endpoint.run(input_payload)
-
-# Initial check without blocking, useful for quick tasks
-status = run_request.status()
-print(f"Initial job status: {status}")
-
-if status != "COMPLETED":
- # Polling with timeout for long-running tasks
- output = run_request.output(timeout=60)
-else:
- output = run_request.output()
-print(f"Job output: {output}")
-print(f"An error occurred: {e}")
-```
-
-
-
-
-```bash
-Initial job status: IN_QUEUE
-Job output: Hello, World!
-```
-
-
-
-
-
-## Cancel
-
-You can cancel a Job request by using the `cancel()` function on the run request. You might want to cancel a Job because it's stuck with a status of `IN_QUEUE` or `IN_PROGRESS`, or because you no longer need the result.
-
-The following pattern cancels a job given a human interaction, for example pressing `Ctrl+C` in the terminal.
-
-This sends a `SIGINT` signal to the running Job by catching the `KeyboardInterrupt` exception.
-
-
-
-```python
-import time
-import runpod
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-input_payload = {
- "messages": [{"role": "user", "content": f"Hello, World"}],
- "max_tokens": 2048,
- "use_openai_format": True,
-}
-
-try:
- endpoint = runpod.Endpoint("YOUR_ENDPOINT_ID")
- run_request = rp_endpoint.run(input_payload)
-
- while True:
- status = run_request.status()
- print(f"Current job status: {status}")
-
- if status == "COMPLETED":
- output = run_request.output()
- print("Job output:", output)
-
- generated_text = (
- output.get("choices", [{}])[0].get("message", {}).get("content")
- )
- print(generated_text)
- break
- elif status in ["FAILED", "ERROR"]:
- print("Job failed to complete successfully.")
- break
- else:
- time.sleep(10)
-except KeyboardInterrupt: # Catch KeyboardInterrupt
- print("KeyboardInterrupt detected. Canceling the job...")
- if run_request: # Check if a job is active
- run_request.cancel()
- print("Job canceled.")
-```
-
-
-
-
-```bash
-Current job status: IN_QUEUE
-Current job status: IN_PROGRESS
-KeyboardInterrupt detected. Canceling the job...
-Job canceled.
-```
-
-
-
-
-
-### Timeout
-
-Use the `cancel()` function and the `timeout` argument to cancel the Job after a specified time.
-
-In the previous `cancel()` example, the Job is canceled due to an external condition. In this example, you can cancel a running Job that has taken too long to complete.
-
-
-
-```python
-from time import sleep
-import runpod
-import os
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-input_payload = {"input": {"prompt": "Hello, World!"}}
-
-endpoint = runpod.Endpoint("YOUR_ENDPOINT_ID")
-
-
-# Submit the job request
-run_request = endpoint.run(input_payload)
-
-# Retrieve and print the initial job status
-initial_status = run_request.status()
-print(f"Initial job status: {initial_status}")
-
-# Attempt to cancel the job after a specified timeout period (in seconds)
-# Note: This demonstrates an immediate cancellation for demonstration purposes.
-# Typically, you'd set the timeout based on expected job completion time.
-run_request.cancel(timeout=3)
-
-# Wait for the timeout period to ensure the cancellation takes effect
-sleep(3)
-print("Sleeping for 3 seconds to allow for job cancellation...")
-
-# Check and print the job status after the sleep period
-final_status = run_request.status()
-print(f"Final job status: {final_status}")
-```
-
-
-
-
-```bash
-Initial job status: IN_QUEUE
-Sleeping for 3 seconds to allow for job cancellation...
-Final job status: CANCELLED
-```
-
-
-
-
-
-## Purge queue
-
-You can purge all jobs from a queue by using the `purge_queue()` function. You can provide the `timeout` parameter to specify how long to wait for the server to respond before purging the queue.
-
-`purge_queue()` doesn't affect Jobs in progress.
-
-```python
-import runpod
-import os
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-
-endpoint = runpod.Endpoint("YOUR_ENDPOINT_ID")
-
-endpoint.purge_queue(timeout=3)
-```
diff --git a/sdks/python/overview.mdx b/sdks/python/overview.mdx
index 296732d0..e69de29b 100644
--- a/sdks/python/overview.mdx
+++ b/sdks/python/overview.mdx
@@ -1,133 +0,0 @@
----
-title: "Overview"
-description: "Use the Runpod Python SDK to build Serverless applications."
----
-
-Get started with setting up your Runpod projects using Python. Depending on the specific needs of your project, there are various ways to interact with the Runpod platform. This guide provides an approach to get you up and running.
-
-## Install the Runpod SDK
-
-Create a Python virtual environment to install the Runpod SDK library. Virtual environments allow you to manage dependencies for different projects separately, avoiding conflicts between project requirements.
-
-To get started, install setup a virtual environment then install the Runpod SDK library.
-
-
-
-Create a Python virtual environment with [venv](https://docs.python.org/3/library/venv.html):
-
-```bash
-python3 -m venv env
-source env/bin/activate
-```
-
-
-
-
-Create a Python virtual environment with [venv](https://docs.python.org/3/library/venv.html):
-
-```bash
-python -m venv env
-env\Scripts\activate
-```
-
-
-
-
-Create a Python virtual environment with [venv](https://docs.python.org/3/library/venv.html):
-
-```bash
-python3 -m venv env
-source env/bin/activate
-```
-
-
-
-
-
-To install the SDK, run the following command from the terminal.
-
-```bash
-python -m pip install runpod
-```
-
-You should have the Runpod SDK installed and ready to use.
-
-## Get Runpod SDK version
-
-To ensure you've setup your Runpod SDK in Python, choose from one of the following methods to print the Runpod Python SDK version to your terminal.
-
-
-
-Run the following command using pip to get the Runpod SDK version.
-
-```bash
-pip show runpod
-```
-
-You should see something similar to the following output.
-
-```bash
-runpod==1.7.9
-```
-
-
-
-
-Run the following command from your terminal to get the Runpod SDK version.
-
-```bash
-python3 -c "import runpod; print(runpod.__version__)"
-```
-
-
-
-
-To ensure you've setup your installation correctly, get the Runpod SDK version. Create a new file called `main.py`. Add the following to your Python file and execute the script.
-
-```py
-import runpod
-
-version = runpod.version.get_version()
-
-print(f"Runpod version number: {version}")
-```
-
-You should see something similar to the following output.
-
-```sh
-Runpod version number: 1.X.0
-```
-
-
-
-
-
-You can find the latest version of the Runpod Python SDK on [GitHub](https://github.com/runpod/runpod-python/releases).
-
-Now that you've installed the Runpod SDK, add your API key.
-
-## Add your API key
-
-Set `api_key` and reference its variable in your Python application. This authenticates your requests to the Runpod platform and allows you to access the [Runpod API](/sdks/python/apis).
-
-```python
-import runpod
-import os
-
-runpod.api_key = os.getenv("RUNPOD_API_KEY")
-```
-
-
-
-It's recommended to use environment variables to set your API key. You shouldn't load your API key directly into your code.
-
-For these examples, the API key loads from an environment variable called `RUNPOD_API_KEY`.
-
-
-
-Now that you've have the Runpod Python SDK installed and configured, you can start using the Runpod platform.
-
-For more information, see:
-
-* [APIs](/sdks/python/apis)
-* [Endpoints](/sdks/python/endpoints)
diff --git a/serverless/endpoints/send-requests.mdx b/serverless/endpoints/send-requests.mdx
index 46633da3..ca398f93 100644
--- a/serverless/endpoints/send-requests.mdx
+++ b/serverless/endpoints/send-requests.mdx
@@ -991,6 +991,43 @@ console.log(health);
```
+
+```go
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "log"
+ "os"
+
+ "github.com/runpod/go-sdk/pkg/sdk/config"
+ rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
+)
+
+func main() {
+ apiKey := os.Getenv("RUNPOD_API_KEY")
+ endpointId := os.Getenv("ENDPOINT_ID")
+
+ endpoint, err := rpEndpoint.New(
+ &config.Config{ApiKey: &apiKey},
+ &rpEndpoint.Option{EndpointId: &endpointId},
+ )
+ if err != nil {
+ log.Fatalf("Failed to create endpoint: %v", err)
+ }
+
+ health, err := endpoint.Health()
+ if err != nil {
+ log.Fatalf("Failed to get health: %v", err)
+ }
+
+ data, _ := json.Marshal(health)
+ fmt.Printf("Health: %s\n", data)
+}
+```
+
+
`/health` requests return a JSON response with the current status of the endpoint, including the number of jobs completed, failed, in progress, in queue, and retried, as well as the status of workers.
diff --git a/serverless/sdks.mdx b/serverless/sdks.mdx
new file mode 100644
index 00000000..06e4f19b
--- /dev/null
+++ b/serverless/sdks.mdx
@@ -0,0 +1,125 @@
+---
+title: "Install the Runpod SDK"
+sidebarTitle: "Install SDKs"
+description: "Install and configure the Runpod SDK for Python, JavaScript, or Go to interact with Serverless endpoints programmatically."
+---
+
+The Runpod SDK lets you interact with Serverless endpoints programmatically from your own applications.
+
+## Python
+
+### Install
+
+Create a virtual environment and install the SDK:
+
+
+
+```bash
+python3 -m venv venv
+source venv/bin/activate
+pip install runpod
+```
+
+
+
+```bash
+python -m venv venv
+venv\Scripts\activate
+pip install runpod
+```
+
+
+
+To verify the installation:
+
+```bash
+python -c "import runpod; print(runpod.__version__)"
+```
+
+### Configure your API key
+
+Set your API key as an environment variable and reference it in your code:
+
+```python
+import runpod
+import os
+
+runpod.api_key = os.getenv("RUNPOD_API_KEY")
+```
+
+
+Never hardcode your API key directly in your code. Always use environment variables or a secrets manager.
+
+
+For more information, see the [Python SDK on GitHub](https://github.com/runpod/runpod-python).
+
+## JavaScript
+
+The JavaScript SDK supports Node.js and modern JavaScript environments using ES modules.
+
+### Install
+
+```bash
+npm install --save runpod-sdk
+# or
+yarn add runpod-sdk
+```
+
+### Configure your API key
+
+```javascript
+const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
+import runpodSdk from "runpod-sdk";
+
+const runpod = runpodSdk(RUNPOD_API_KEY);
+const endpoint = runpod.endpoint(ENDPOINT_ID);
+```
+
+For more information, see the [JavaScript SDK on GitHub](https://github.com/runpod/js-sdk) and the [npm package](https://www.npmjs.com/package/runpod-sdk).
+
+## Go
+
+### Install
+
+```bash
+go get github.com/runpod/go-sdk
+go mod tidy
+```
+
+### Configure your API key
+
+```go
+package main
+
+import (
+ "os"
+
+ "github.com/runpod/go-sdk/pkg/sdk/config"
+ rpEndpoint "github.com/runpod/go-sdk/pkg/sdk/endpoint"
+)
+
+func main() {
+ apiKey := os.Getenv("RUNPOD_API_KEY")
+ endpointId := os.Getenv("ENDPOINT_ID")
+
+ endpoint, err := rpEndpoint.New(
+ &config.Config{ApiKey: &apiKey},
+ &rpEndpoint.Option{EndpointId: &endpointId},
+ )
+ if err != nil {
+ panic(err)
+ }
+
+ // Use the endpoint...
+}
+```
+
+For more information, see the [Go SDK on GitHub](https://github.com/runpod/go-sdk) and the [Go package documentation](https://pkg.go.dev/github.com/runpod/go-sdk/pkg/sdk).
+
+## Next steps
+
+Once you've installed and configured the SDK, you're ready to send requests to your Serverless endpoints:
+
+- [Send API requests](/serverless/endpoints/send-requests): Learn how to submit jobs, check status, and stream results.
+- [Create handler functions](/serverless/workers/handler-functions): Build custom workers to process your jobs.
+- [API reference](/api-reference/overview): Explore the full Runpod API for managing Pods, endpoints, and other resources.
diff --git a/tutorials/sdks/python/101/aggregate.mdx b/tutorials/sdks/python/101/aggregate.mdx
index 9c693cbc..e69de29b 100644
--- a/tutorials/sdks/python/101/aggregate.mdx
+++ b/tutorials/sdks/python/101/aggregate.mdx
@@ -1,240 +0,0 @@
----
-title: "Aggregating outputs in Runpod serverless functions"
-sidebarTitle: "Aggregating outputs"
----
-
-import { ServerlessTooltip, WorkerTooltip, HandlerFunctionTooltip } from "/snippets/tooltips.jsx";
-
-This tutorial will guide you through using the `return_aggregate_stream` feature in Runpod to simplify result handling in your functions. Using `return_aggregate_stream` allows you to automatically collect and aggregate all yielded results from a generator into a single response. This simplifies result handling, making it easier to manage and return a consolidated set of results from asynchronous tasks, such as concurrent sentiment analysis or object detection, without needing additional code to collect and format the results manually.
-
-We'll create a multi-purpose analyzer that can perform sentiment analysis on text and object detection in images, demonstrating how to aggregate outputs efficiently.
-
-## Setting up your Serverless Function
-
-Let's break down the process of creating our multi-purpose analyzer into steps.
-
-### Import required libraries
-
-First, import the necessary libraries:
-
-```python
-import runpod
-import time
-import random
-```
-
-### Create Helper Functions
-
-Define functions to simulate sentiment analysis and object detection:
-
-```python
-def analyze_sentiment(text):
- """Simulate sentiment analysis of text."""
- sentiments = ["Positive", "Neutral", "Negative"]
- score = random.uniform(-1, 1)
- sentiment = random.choice(sentiments)
- return f"Sentiment: {sentiment}, Score: {score:.2f}"
-
-
-def detect_objects(image_url):
- """Simulate object detection in an image."""
- objects = ["person", "car", "dog", "cat", "tree", "building"]
- detected = random.sample(objects, random.randint(1, 4))
- confidences = [random.uniform(0.7, 0.99) for _ in detected]
- return [f"{obj}: {conf:.2f}" for obj, conf in zip(detected, confidences)]
-```
-
-These functions:
-
-1. Simulate sentiment analysis, returning a random sentiment and score
-2. Simulate object detection, returning a list of detected objects with confidence scores
-
-### Create the main
-
-Now, let's create the main handler function that processes jobs and yields results:
-
-```python
-def handler(job):
- job_input = job["input"]
- task_type = job_input.get("task_type", "sentiment")
- items = job_input.get("items", [])
-
- results = []
- for item in items:
- time.sleep(random.uniform(0.5, 2)) # Simulate processing time
-
- if task_type == "sentiment":
- result = analyze_sentiment(item)
- elif task_type == "object_detection":
- result = detect_objects(item)
- else:
- result = f"Unknown task type: {task_type}"
-
- results.append(result)
- yield result
-
- return results
-```
-
-This handler:
-
-1. Determines the task type (sentiment analysis or object detection)
-2. Processes each item in the input
-3. Yields results incrementally
-4. Returns the complete list of results
-
-### Set up the Serverless starter
-
-Create a function to start the Serverless handler with proper configuration:
-
-```python
-def start_handler():
- def wrapper(job):
- generator = handler(job)
- if job.get("id") == "local_test":
- return list(generator)
- return generator
-
- runpod.serverless.start({"handler": wrapper, "return_aggregate_stream": True})
-
-
-if __name__ == "__main__":
- start_handler()
-```
-
-This setup:
-
-1. Creates a wrapper to handle both local testing and Runpod environments
-2. Uses `return_aggregate_stream=True` to automatically aggregate yielded results
-
-## Complete code example
-
-Here's the full code for our multi-purpose analyzer with output aggregation:
-
-```python
-import runpod
-import time
-import random
-
-
-def analyze_sentiment(text):
- """Simulate sentiment analysis of text."""
- sentiments = ["Positive", "Neutral", "Negative"]
- score = random.uniform(-1, 1)
- sentiment = random.choice(sentiments)
- return f"Sentiment: {sentiment}, Score: {score:.2f}"
-
-
-def detect_objects(image_url):
- """Simulate object detection in an image."""
- objects = ["person", "car", "dog", "cat", "tree", "building"]
- detected = random.sample(objects, random.randint(1, 4))
- confidences = [random.uniform(0.7, 0.99) for _ in detected]
- return [f"{obj}: {conf:.2f}" for obj, conf in zip(detected, confidences)]
-
-
-def handler(job):
- job_input = job["input"]
- task_type = job_input.get("task_type", "sentiment")
- items = job_input.get("items", [])
-
- results = []
- for item in items:
- time.sleep(random.uniform(0.5, 2)) # Simulate processing time
-
- if task_type == "sentiment":
- result = analyze_sentiment(item)
- elif task_type == "object_detection":
- result = detect_objects(item)
- else:
- result = f"Unknown task type: {task_type}"
-
- results.append(result)
- yield result
-
- return results
-
-
-def start_handler():
- def wrapper(job):
- generator = handler(job)
- if job.get("id") == "local_test":
- return list(generator)
- return generator
-
- runpod.serverless.start({"handler": wrapper, "return_aggregate_stream": True})
-
-
-if __name__ == "__main__":
- start_handler()
-```
-
-## Testing your Serverless Function
-
-To test your function locally, use these commands:
-
-For sentiment analysis:
-
-```bash
-python your_script.py --test_input '
-{
- "input": {
- "task_type": "sentiment",
- "items": [
- "I love this product!",
- "The service was terrible.",
- "It was okay, nothing special."
- ]
- }
-}'
-```
-
-For object detection:
-
-```bash
-python your_script.py --test_input '
-{
- "input": {
- "task_type": "object_detection",
- "items": [
- "image1.jpg",
- "image2.jpg",
- "image3.jpg"
- ]
- }
-}'
-```
-
-### Understanding the output
-
-When you run the sentiment analysis test, you'll see output similar to this:
-
-```bash
---- Starting Serverless Worker | Version 1.6.2 ---
-INFO | test_input set, using test_input as job input.
-DEBUG | Retrieved local job: {'input': {'task_type': 'sentiment', 'items': ['I love this product!', 'The service was terrible.', 'It was okay, nothing special.']}, 'id': 'local_test'}
-INFO | local_test | Started.
-DEBUG | local_test | Handler output: ['Sentiment: Positive, Score: 0.85', 'Sentiment: Negative, Score: -0.72', 'Sentiment: Neutral, Score: 0.12']
-DEBUG | local_test | run_job return: {'output': ['Sentiment: Positive, Score: 0.85', 'Sentiment: Negative, Score: -0.72', 'Sentiment: Neutral, Score: 0.12']}
-INFO | Job local_test completed successfully.
-INFO | Job result: {'output': ['Sentiment: Positive, Score: 0.85', 'Sentiment: Negative, Score: -0.72', 'Sentiment: Neutral, Score: 0.12']}
-INFO | Local testing complete, exiting.
-```
-
-This output demonstrates:
-
-1. The Serverless starting and processing the job
-2. The handler generating results for each input item
-3. The aggregation of results into a single list
-
-## Conclusion
-
-You've now created a Serverless function using Runpod's Python SDK that demonstrates efficient output aggregation for both local testing and production environments. This approach simplifies result handling and ensures consistent behavior across different execution contexts.
-
-To further enhance this application, consider:
-
-* Implementing real sentiment analysis and object detection models
-* Adding error handling and logging for each processing step
-* Exploring Runpod's advanced features for handling larger datasets or parallel processing
-
-Runpod's Serverless library, with features like `return_aggregate_stream`, provides a powerful foundation for building scalable, efficient applications that can process and aggregate data seamlessly.
diff --git a/tutorials/sdks/python/101/async.mdx b/tutorials/sdks/python/101/async.mdx
index 979cbc05..e69de29b 100644
--- a/tutorials/sdks/python/101/async.mdx
+++ b/tutorials/sdks/python/101/async.mdx
@@ -1,226 +0,0 @@
----
-title: "Building an async generator handler for weather data simulation"
-sidebarTitle: "Async generator"
----
-
-import { ServerlessTooltip, HandlerFunctionTooltip } from "/snippets/tooltips.jsx";
-
-This tutorial will guide you through creating a function using Runpod's Python SDK that simulates fetching weather data for multiple cities concurrently.
-
-Use asynchronous functions to handle multiple concurrent operations efficiently, especially when dealing with tasks that involve waiting for external resources, such as network requests or I/O operations. Asynchronous programming allows your code to perform other tasks while waiting, rather than blocking the entire program. This is particularly useful in a Serverless environment where you want to maximize resource utilization and minimize response times.
-
-We'll use an async generator to stream results incrementally, demonstrating how to manage multiple concurrent operations efficiently in a Serverless environment.
-
-## Setting up your Serverless Function
-
-Let's break down the process of creating our weather data simulator into steps.
-
-### SImport required libraries
-
-First, import the necessary libraries:
-
-```python
-import runpod
-import asyncio
-import random
-import json
-import sys
-```
-
-### Create the Weather Data Fetcher
-
-Define an asynchronous function that simulates fetching weather data:
-
-```python
-async def fetch_weather_data(city, delay):
- await asyncio.sleep(delay)
- temperature = random.uniform(-10, 40)
- humidity = random.uniform(0, 100)
- return {
- "city": city,
- "temperature": round(temperature, 1),
- "humidity": round(humidity, 1)
- }
-```
-
-This function:
-
-1. Simulates a network delay using `asyncio.sleep()`
-2. Generates random temperature and humidity data
-3. Returns a dictionary with the weather data for a city
-
-### Create the Async Generator Handler
-
-Now, let's create the main handler function:
-
-```python
-async def async_generator_handler(job):
- job_input = job['input']
- cities = job_input.get('cities', ['New York', 'London', 'Tokyo', 'Sydney', 'Moscow'])
- update_interval = job_input.get('update_interval', 2)
- duration = job_input.get('duration', 10)
-
- print(f"Weather Data Stream | Starting job {job['id']}")
- print(f"Monitoring cities: {', '.join(cities)}")
-
- start_time = asyncio.get_event_loop().time()
-
- while asyncio.get_event_loop().time() - start_time < duration:
- tasks = [fetch_weather_data(city, random.uniform(0.5, 2)) for city in cities]
- for completed_task in asyncio.as_completed(tasks):
- weather_data = await completed_task
- yield {
- "timestamp": round(asyncio.get_event_loop().time() - start_time, 2),
- "data": weather_data
- }
-
- await asyncio.sleep(update_interval)
-
- yield {"status": "completed", "message": "Weather monitoring completed"}
-```
-
-This handler:
-
-1. Extracts parameters from the job input
-2. Logs the start of the job
-3. Creates tasks for fetching weather data for each city
-4. Uses `asyncio.as_completed()` to yield results as they become available
-5. Continues fetching data at specified intervals for the given duration
-
-### Set up the Main Execution
-
-Finally, Set up the main execution block:
-
-```python
-async def run_test(job):
- async for item in async_generator_handler(job):
- print(json.dumps(item))
-
-if __name__ == "__main__":
- if "--test_input" in sys.argv:
- # Code for local testing (see full example)
- else:
- runpod.serverless.start({
- "handler": async_generator_handler,
- "return_aggregate_stream": True
- })
-```
-
-This block allows for both local testing and deployment as a Runpod Serverless function.
-
-## Complete code example
-
-Here's the full code for our serverless weather data simulator:
-
-```python fetch_weather_data.py
-import runpod
-import asyncio
-import random
-import json
-import sys
-
-async def fetch_weather_data(city, delay):
- await asyncio.sleep(delay)
- temperature = random.uniform(-10, 40)
- humidity = random.uniform(0, 100)
- return {
- "city": city,
- "temperature": round(temperature, 1),
- "humidity": round(humidity, 1)
- }
-
-async def async_generator_handler(job):
- job_input = job['input']
- cities = job_input.get('cities', ['New York', 'London', 'Tokyo', 'Sydney', 'Moscow'])
- update_interval = job_input.get('update_interval', 2)
- duration = job_input.get('duration', 10)
-
- print(f"Weather Data Stream | Starting job {job['id']}")
- print(f"Monitoring cities: {', '.join(cities)}")
-
- start_time = asyncio.get_event_loop().time()
-
- while asyncio.get_event_loop().time() - start_time < duration:
- tasks = [fetch_weather_data(city, random.uniform(0.5, 2)) for city in cities]
- for completed_task in asyncio.as_completed(tasks):
- weather_data = await completed_task
- yield {
- "timestamp": round(asyncio.get_event_loop().time() - start_time, 2),
- "data": weather_data
- }
-
- await asyncio.sleep(update_interval)
-
- yield {"status": "completed", "message": "Weather monitoring completed"}
-
-async def run_test(job):
- async for item in async_generator_handler(job):
- print(json.dumps(item))
-
-if __name__ == "__main__":
- if "--test_input" in sys.argv:
- test_input_index = sys.argv.index("--test_input")
- if test_input_index + 1 < len(sys.argv):
- test_input_json = sys.argv[test_input_index + 1]
- try:
- job = json.loads(test_input_json)
- asyncio.run(run_test(job))
- except json.JSONDecodeError:
- print("Error: Invalid JSON in test_input")
- else:
- print("Error: --test_input requires a JSON string argument")
- else:
- runpod.serverless.start({
- "handler": async_generator_handler,
- "return_aggregate_stream": True
- })
-```
-
-## Testing Your Serverless Function
-
-To test your function locally, use this command:
-
-```bash
-python your_script.py --test_input '
-{
- "input": {
- "cities": ["New York", "London", "Tokyo", "Paris", "Sydney"],
- "update_interval": 3,
- "duration": 15
- },
- "id": "local_test"
-}'
-```
-
-### Understanding the output
-
-When you run the test, you'll see output similar to this:
-
-```bash
-Weather Data Stream | Starting job local_test
-Monitoring cities: New York, London, Tokyo, Paris, Sydney
-{"timestamp": 0.84, "data": {"city": "London", "temperature": 11.0, "humidity": 7.3}}
-{"timestamp": 0.99, "data": {"city": "Paris", "temperature": -5.9, "humidity": 59.3}}
-{"timestamp": 1.75, "data": {"city": "Tokyo", "temperature": 18.4, "humidity": 34.1}}
-{"timestamp": 1.8, "data": {"city": "Sydney", "temperature": 26.8, "humidity": 91.0}}
-{"timestamp": 1.99, "data": {"city": "New York", "temperature": 35.9, "humidity": 27.5}}
-{"status": "completed", "message": "Weather monitoring completed"}
-```
-
-This output demonstrates:
-
-1. The concurrent processing of weather data for multiple cities
-2. Real-time updates with timestamps
-3. A completion message when the monitoring duration is reached
-
-## Conclusion
-
-You've now created a Serverless function using Runpod's Python SDK that simulates concurrent weather data fetching for multiple cities. This example showcases how to handle multiple asynchronous operations and stream results incrementally in a Serverless environment.
-
-To further enhance this application, consider:
-
-* Implementing real API calls to fetch actual weather data
-* Adding error handling for network failures or API limits
-* Exploring Runpod's documentation for advanced features like scaling for high-concurrency scenarios
-
-Runpod's Serverless library provides a powerful foundation for building scalable, efficient applications that can process and stream data concurrently in real-time without the need to manage infrastructure.
diff --git a/tutorials/sdks/python/101/error.mdx b/tutorials/sdks/python/101/error.mdx
index 20ec54ee..e69de29b 100644
--- a/tutorials/sdks/python/101/error.mdx
+++ b/tutorials/sdks/python/101/error.mdx
@@ -1,296 +0,0 @@
----
-title: "Implementing error handling and logging in Runpod serverless functions"
-sidebarTitle: "Error handling"
----
-
-import { ServerlessTooltip, WorkerTooltip, HandlerFunctionTooltip } from "/snippets/tooltips.jsx";
-
-This tutorial will guide you through implementing effective error handling and logging in your Runpod functions.
-
-Proper error handling ensures that your Serverless functions can handle unexpected situations gracefully. This prevents crashes and ensures that your application can continue running smoothly, even if some parts encounter issues.
-
-We'll create a simulated image classification model to demonstrate these crucial practices, ensuring your Serverless deployments are robust and maintainable.
-
-## Setting up your Serverless Function
-
-Let's break down the process of creating our error-aware image classifier into steps.
-
-### Import required libraries and Set Up Logging
-
-First, import the necessary libraries and Set up the Runpod logger:
-
-```python
-import runpod
-from runpod import RunPodLogger
-import time
-import random
-
-log = RunPodLogger()
-```
-
-### Create Helper Functions
-
-Define functions to simulate various parts of the image classification process:
-
-```python
-def load_model():
- """Simulate loading a machine learning model."""
- log.info("Loading image classification model...")
- time.sleep(2) # Simulate model loading time
- return "ImageClassifier"
-
-
-def preprocess_image(image_url):
- """Simulate image preprocessing."""
- log.debug(f"Preprocessing image: {image_url}")
- time.sleep(0.5) # Simulate preprocessing time
- return f"Preprocessed_{image_url}"
-
-
-def classify_image(model, preprocessed_image):
- """Simulate image classification."""
- classes = ["cat", "dog", "bird", "fish", "horse"]
- confidence = random.uniform(0.7, 0.99)
- predicted_class = random.choice(classes)
- return predicted_class, confidence
-```
-
-These functions:
-
-1. Simulate model loading, logging the process
-2. Preprocess images, with debug logging
-3. Classify images, returning random results for demonstration
-
-### Create the Main
-
-Now, let's create the main handler function with error handling and logging:
-
-```python
-def handler(job):
- job_input = job["input"]
- images = job_input.get("images", [])
-
- # Process mock logs if provided
- for job_log in job_input.get("mock_logs", []):
- log_level = job_log.get("level", "info").lower()
- if log_level == "debug":
- log.debug(job_log["message"])
- elif log_level == "info":
- log.info(job_log["message"])
- elif log_level == "warn":
- log.warn(job_log["message"])
- elif log_level == "error":
- log.error(job_log["message"])
-
- try:
- # Load model
- model = load_model()
- log.info("Model loaded successfully")
-
- results = []
- for i, image_url in enumerate(images):
- # Preprocess image
- preprocessed_image = preprocess_image(image_url)
-
- # Classify image
- predicted_class, confidence = classify_image(model, preprocessed_image)
-
- result = {
- "image": image_url,
- "predicted_class": predicted_class,
- "confidence": round(confidence, 2),
- }
- results.append(result)
-
- # Log progress
- progress = (i + 1) / len(images) * 100
- log.info(f"Progress: {progress:.2f}%")
-
- # Simulate some processing time
- time.sleep(random.uniform(0.5, 1.5))
-
- log.info("Classification completed successfully")
-
- # Simulate error if mock_error is True
- if job_input.get("mock_error", False):
- raise Exception("Mock error")
-
- return {"status": "success", "results": results}
-
- except Exception as e:
- log.error(f"An error occurred: {str(e)}")
- return {"error": str(e)}
-```
-
-This handler:
-
-1. Processes mock logs to demonstrate different logging levels
-2. Uses a try-except block to handle potential errors
-3. Simulates image classification with progress logging
-4. Returns results or an error message based on the execution
-
-### Start the Serverless
-
-Finally, start the Runpod Serverless function:
-
-```python
-runpod.serverless.start({"handler": handler})
-```
-
-## Complete code example
-
-Here's the full code for our error-aware image classification simulator:
-
-```python
-import runpod
-from runpod import RunPodLogger
-import time
-import random
-
-log = RunPodLogger()
-
-
-def load_model():
- """Simulate loading a machine learning model."""
- log.info("Loading image classification model...")
- time.sleep(2) # Simulate model loading time
- return "ImageClassifier"
-
-
-def preprocess_image(image_url):
- """Simulate image preprocessing."""
- log.debug(f"Preprocessing image: {image_url}")
- time.sleep(0.5) # Simulate preprocessing time
- return f"Preprocessed_{image_url}"
-
-
-def classify_image(model, preprocessed_image):
- """Simulate image classification."""
- classes = ["cat", "dog", "bird", "fish", "horse"]
- confidence = random.uniform(0.7, 0.99)
- predicted_class = random.choice(classes)
- return predicted_class, confidence
-
-
-def handler(job):
- job_input = job["input"]
- images = job_input.get("images", [])
-
- # Process mock logs if provided
- for job_log in job_input.get("mock_logs", []):
- log_level = job_log.get("level", "info").lower()
- if log_level == "debug":
- log.debug(job_log["message"])
- elif log_level == "info":
- log.info(job_log["message"])
- elif log_level == "warn":
- log.warn(job_log["message"])
- elif log_level == "error":
- log.error(job_log["message"])
-
- try:
- # Load model
- model = load_model()
- log.info("Model loaded successfully")
-
- results = []
- for i, image_url in enumerate(images):
- # Preprocess image
- preprocessed_image = preprocess_image(image_url)
-
- # Classify image
- predicted_class, confidence = classify_image(model, preprocessed_image)
-
- result = {
- "image": image_url,
- "predicted_class": predicted_class,
- "confidence": round(confidence, 2),
- }
- results.append(result)
-
- # Log progress
- progress = (i + 1) / len(images) * 100
- log.info(f"Progress: {progress:.2f}%")
-
- # Simulate some processing time
- time.sleep(random.uniform(0.5, 1.5))
-
- log.info("Classification completed successfully")
-
- # Simulate error if mock_error is True
- if job_input.get("mock_error", False):
- raise Exception("Mock error")
-
- return {"status": "success", "results": results}
-
- except Exception as e:
- log.error(f"An error occurred: {str(e)}")
- return {"error": str(e)}
-
-
-runpod.serverless.start({"handler": handler})
-```
-
-## Testing Your Serverless Function
-
-To test your function locally, use this command:
-
-```bash
-python your_script.py --test_input '{
- "input": {
- "images": ["image1.jpg", "image2.jpg", "image3.jpg"],
- "mock_logs": [
- {"level": "info", "message": "Starting job"},
- {"level": "debug", "message": "Debug information"},
- {"level": "warn", "message": "Warning: low disk space"},
- {"level": "error", "message": "Error: network timeout"}
- ],
- "mock_error": false
- }
-}'
-```
-
-### Understanding the output
-
-When you run the test, you'll see output similar to this:
-
-```json
-{
- "status": "success",
- "results": [
- {
- "image": "image1.jpg",
- "predicted_class": "cat",
- "confidence": 0.85
- },
- {
- "image": "image2.jpg",
- "predicted_class": "dog",
- "confidence": 0.92
- },
- {
- "image": "image3.jpg",
- "predicted_class": "bird",
- "confidence": 0.78
- }
- ]
-}
-```
-
-This output demonstrates:
-
-1. Successful processing of all images
-2. Random classification results for each image
-3. The overall success status of the job
-
-## Conclusion
-
-You've now created a Serverless function using Runpod's Python SDK that demonstrates effective error handling and logging practices. This approach ensures that your Serverless functions are robust, maintainable, and easier to debug.
-
-To further enhance this application, consider:
-
-* Implementing more specific error types and handling
-* Adding more detailed logging for each step of the process
-* Exploring Runpod's advanced logging features and integrations
-
-Runpod's Serverless library provides a powerful foundation for building reliable, scalable applications with comprehensive error management and logging capabilities.
diff --git a/tutorials/sdks/python/101/generator.mdx b/tutorials/sdks/python/101/generator.mdx
index 79c3d7d2..e69de29b 100644
--- a/tutorials/sdks/python/101/generator.mdx
+++ b/tutorials/sdks/python/101/generator.mdx
@@ -1,190 +0,0 @@
----
-title: "Building a streaming handler for text to speech simulation"
-sidebarTitle: "Streaming handler"
----
-
-import { ServerlessTooltip, HandlerFunctionTooltip } from "/snippets/tooltips.jsx";
-
-This tutorial will guide you through creating a function using Runpod's Python SDK that simulates a text-to-speech (TTS) process. We'll use a streaming handler to stream results incrementally, demonstrating how to handle long-running tasks efficiently in a serverless environment.
-
-A streaming in the Runpod's Python SDK is a special type of function that allows you to iterate over a sequence of values lazily. Instead of returning a single value and exiting, a streaming handler yields multiple values, one at a time, pausing the function's state between each yield. This is particularly useful for handling large data streams or long-running tasks, as it allows the function to produce and return results incrementally, rather than waiting until the entire process is complete.
-
-## Setting up your Serverless Function
-
-Let's break down the process of creating our TTS simulator into steps.
-
-### Import required libraries
-
-First, import the necessary libraries:
-
-```python
-import runpod
-import time
-import re
-import json
-import sys
-```
-
-### Create the TTS Simulator
-
-Define a function that simulates the text-to-speech process:
-
-```python
-def text_to_speech_simulator(text, chunk_size=5, delay=0.5):
- words = re.findall(r'\w+', text)
-
- for i in range(0, len(words), chunk_size):
- chunk = words[i:i+chunk_size]
- audio_chunk = f"Audio chunk {i//chunk_size + 1}: {' '.join(chunk)}"
- time.sleep(delay) # Simulate processing time
- yield audio_chunk
-```
-
-This function:
-
-1. Splits the input text into words
-2. Processes the words in chunks
-3. Simulates a delay for each chunk
-4. Yields each "audio chunk" as it's processed
-
-### Create the Streaming Handler
-
-Now, let's create the main handler function:
-
-```python
-def streaming_handler(job):
- job_input = job['input']
- text = job_input.get('text', "Welcome to Runpod's text-to-speech simulator!")
- chunk_size = job_input.get('chunk_size', 5)
- delay = job_input.get('delay', 0.5)
-
- print(f"TTS Simulator | Starting job {job['id']}")
- print(f"Processing text: {text}")
-
- for audio_chunk in text_to_speech_simulator(text, chunk_size, delay):
- yield {"status": "processing", "chunk": audio_chunk}
-
- yield {"status": "completed", "message": "Text-to-speech conversion completed"}
-```
-
-This handler:
-
-1. Extracts parameters from the job input
-2. Logs the start of the job
-3. Calls the TTS simulator and yields each chunk as it's processed using a streaming handler
-4. Yields a completion message when finished
-
-### Set up the main function
-
-Finally, set up the main execution block:
-
-```python
-if __name__ == "__main__":
- if "--test_input" in sys.argv:
- # Code for local testing (see full example)
- else:
- runpod.serverless.start({"handler": streaming_handler, "return_aggregate_stream": True})
-```
-
-This block allows for both local testing and deployment as a Runpod Serverless function.
-
-## Complete code example
-
-Here's the full code for our serverless TTS simulator using a streaming handler:
-
-```python
-import runpod
-import time
-import re
-import json
-import sys
-
-def text_to_speech_simulator(text, chunk_size=5, delay=0.5):
- words = re.findall(r'\w+', text)
-
- for i in range(0, len(words), chunk_size):
- chunk = words[i:i+chunk_size]
- audio_chunk = f"Audio chunk {i//chunk_size + 1}: {' '.join(chunk)}"
- time.sleep(delay) # Simulate processing time
- yield audio_chunk
-
-def streaming_handler(job):
- job_input = job['input']
- text = job_input.get('text', "Welcome to Runpod's text-to-speech simulator!")
- chunk_size = job_input.get('chunk_size', 5)
- delay = job_input.get('delay', 0.5)
-
- print(f"TTS Simulator | Starting job {job['id']}")
- print(f"Processing text: {text}")
-
- for audio_chunk in text_to_speech_simulator(text, chunk_size, delay):
- yield {"status": "processing", "chunk": audio_chunk}
-
- yield {"status": "completed", "message": "Text-to-speech conversion completed"}
-
-if __name__ == "__main__":
- if "--test_input" in sys.argv:
- test_input_index = sys.argv.index("--test_input")
- if test_input_index + 1 < len(sys.argv):
- test_input_json = sys.argv[test_input_index + 1]
- try:
- job = json.loads(test_input_json)
- gen = streaming_handler(job)
- for item in gen:
- print(json.dumps(item))
- except json.JSONDecodeError:
- print("Error: Invalid JSON in test_input")
- else:
- print("Error: --test_input requires a JSON string argument")
- else:
- runpod.serverless.start({"handler": streaming_handler, "return_aggregate_stream": True})
-```
-
-## Testing your Serverless Function
-
-To test your function locally, use this command:
-
-```bash
-python your_script.py --test_input '
-{
- "input": {
- "text": "This is a test of the Runpod text-to-speech simulator. It processes text in chunks and simulates audio generation.",
- "chunk_size": 4,
- "delay": 1
- },
- "id": "local_test"
-}'
-```
-
-### Understanding the output
-
-When you run the test, you'll see output similar to this:
-
-```json
-{"status": "processing", "chunk": "Audio chunk 1: This is a test"}
-{"status": "processing", "chunk": "Audio chunk 2: of the Runpod"}
-{"status": "processing", "chunk": "Audio chunk 3: text to speech"}
-{"status": "processing", "chunk": "Audio chunk 4: simulator It processes"}
-{"status": "processing", "chunk": "Audio chunk 5: text in chunks"}
-{"status": "processing", "chunk": "Audio chunk 6: and simulates audio"}
-{"status": "processing", "chunk": "Audio chunk 7: generation"}
-{"status": "completed", "message": "Text-to-speech conversion completed"}
-```
-
-This output demonstrates:
-
-1. The incremental processing of text chunks
-2. Real-time status updates for each chunk
-3. A completion message when the entire text is processed
-
-## Conclusion
-
-You've now created a Serverless function using Runpod's Python SDK that simulates a streaming text-to-speech process. This example showcases how to handle long-running tasks and stream results incrementally in a Serverless environment.
-
-To further enhance this application, consider:
-
-* Implementing a real text-to-speech model
-* Adding error handling for various input types
-* Exploring Runpod's documentation for advanced features like GPU acceleration for audio processing
-
-Runpod's Serverless library provides a powerful foundation for building scalable, efficient applications that can process and stream data in real-time without the need to manage infrastructure.
diff --git a/tutorials/sdks/python/101/hello.mdx b/tutorials/sdks/python/101/hello.mdx
index 766f93ca..e69de29b 100644
--- a/tutorials/sdks/python/101/hello.mdx
+++ b/tutorials/sdks/python/101/hello.mdx
@@ -1,116 +0,0 @@
----
-title: "Create a basic Serverless function"
-sidebarTitle: "Create a basic Serverless function"
----
-
-import { ServerlessTooltip, WorkerTooltip, HandlerFunctionTooltip } from "/snippets/tooltips.jsx";
-
-Runpod's library enables you to create and deploy scalable functions without managing infrastructure. This tutorial will walk you through creating a simple serverless function that determines whether a number is even.
-
-## Creating a Basic Serverless Function
-
-Let's start by building a function that checks if a given number is even.
-
-### Import the Runpod library
-
-Create a new python file called `is_even.py`.
-
-Import the Runpod library:
-
-```python is_even.py
-import runpod
-```
-
-### Define your function
-
-Create a function that takes a `job` argument:
-
-```python is_even.py
-def is_even(job):
- job_input = job["input"]
- the_number = job_input["number"]
-
- if not isinstance(the_number, int):
- return {"error": "Please provide an integer."}
-
- return the_number % 2 == 0
-```
-
-This function:
-
-1. Extracts the input from the `job` dictionary
-2. Checks if the input is an integer
-3. Returns an error message if it's not an integer
-4. Determines if the number is even and returns the result
-
-### Start the Serverless
-
-Wrap your function with `runpod.serverless.start()`:
-
-```python is_even.py
-runpod.serverless.start({"handler": is_even})
-```
-
-This line initializes the Serverless function with your specified .
-
-## Complete code example
-
-Here's the full code for our serverless function:
-
-```python is_even.py
-import runpod
-
-
-def is_even(job):
- job_input = job["input"]
- the_number = job_input["number"]
-
- if not isinstance(the_number, int):
- return {"error": "Please provide an integer."}
-
- return the_number % 2 == 0
-
-
-runpod.serverless.start({"handler": is_even})
-```
-
-## Testing your Serverless Function
-
-To test your function locally, use the following command:
-
-```bash
-python is_even.py --test_input '{"input": {"number": 2}}'
-```
-
-When you run the test, you'll see output similar to this:
-
-```bash
---- Starting Serverless Worker | Version 1.6.2 ---
-INFO | test_input set, using test_input as job input.
-DEBUG | Retrieved local job: {'id': 'some-id', 'input': {'number': 2}}
-INFO | some-id | Started.
-DEBUG | some-id | Handler output: True
-DEBUG | some-id | run_job return: {'output': True}
-INFO | Job some-id completed successfully.
-INFO | Job result: {'output': True}
-INFO | Local testing complete, exiting.
-```
-
-This output indicates that:
-
-1. The serverless worker started successfully
-2. It received the test input
-3. The function processed the input and returned `True` (as 2 is even)
-4. The job completed successfully
-
-## Conclusion
-
-You've now created a basic Serverless function using Runpod's Python SDK. This approach allows for efficient, scalable deployment of functions without the need to manage infrastructure.
-
-To further explore Runpod's serverless capabilities, consider:
-
-* Creating more complex functions
-* Implementing error handling and input validation
-* Exploring Runpod's documentation for advanced features and best practices
-
-Runpod's Serverless library provides a powerful tool for a wide range of applications, from simple utilities to complex data processing tasks.
diff --git a/tutorials/sdks/python/101/local-server-testing.mdx b/tutorials/sdks/python/101/local-server-testing.mdx
index fcf0ac1c..e69de29b 100644
--- a/tutorials/sdks/python/101/local-server-testing.mdx
+++ b/tutorials/sdks/python/101/local-server-testing.mdx
@@ -1,186 +0,0 @@
----
-title: "Creating and testing a Runpod serverless function with local server"
-sidebarTitle: "Local server testing"
----
-
-import { ServerlessTooltip, WorkerTooltip, HandlerFunctionTooltip } from "/snippets/tooltips.jsx";
-
-This tutorial will guide you through creating a basic function using Runpod's Python SDK. We'll build a function that reverses a given string, demonstrating the simplicity and flexibility of Runpod's Serverless architecture.
-
-## Setting up your Serverless Function
-
-Let's break down the process of creating our string reversal function into steps.
-
-### Import Runpod Library
-
-First, import the Runpod library:
-
-```python
-import runpod
-```
-
-### Define utility function
-
-Create a utility function to reverse the string:
-
-```python
-def reverse_string(s):
- return s[::-1]
-```
-
-This function uses Python's slicing feature to efficiently reverse the input string.
-
-### Create the
-
-The handler function is the core of our Serverless application:
-
-```python
-def handler(job):
- print(f"string-reverser | Starting job {job['id']}")
- job_input = job["input"]
-
- input_string = job_input.get("text", "")
-
- if not input_string:
- return {"error": "No input text provided"}
-
- reversed_string = reverse_string(input_string)
-
- job_output = {"original_text": input_string, "reversed_text": reversed_string}
-
- return job_output
-```
-
-This handler:
-
-1. Logs the start of each job
-2. Extracts the input string from the job data
-3. Validates the input
-4. Reverses the string using our utility function
-5. Prepares and returns the output
-
-### Start the Serverless
-
-Finally, start the Runpod Serverless worker:
-
-```python
-runpod.serverless.start({"handler": handler})
-```
-
-This line registers our handler function with Runpod's Serverless infrastructure.
-
-## Complete code example
-
-Here's the full code for our serverless string reversal function:
-
-```python
-import runpod
-
-
-def reverse_string(s):
- return s[::-1]
-
-
-def handler(job):
- print(f"string-reverser | Starting job {job['id']}")
- job_input = job["input"]
-
- input_string = job_input.get("text", "")
-
- if not input_string:
- return {"error": "No input text provided"}
-
- reversed_string = reverse_string(input_string)
-
- job_output = {"original_text": input_string, "reversed_text": reversed_string}
-
- return job_output
-
-
-runpod.serverless.start({"handler": handler})
-```
-
-## Testing Your Serverless Function
-
-Runpod provides multiple ways to test your Serverless function locally before deployment. We'll explore two methods: using command-line arguments and running a local test server.
-
-### Method 1: Command-line Testing
-
-To quickly test your function using command-line arguments, use this command:
-
-```bash
-python your_script.py --test_input '{"input": {"text": "Hello, Runpod!"}}'
-```
-
-When you run this test, you'll see output similar to:
-
-```bash
---- Starting Serverless Worker | Version 1.6.2 ---
-INFO | test_input set, using test_input as job input.
-DEBUG | Retrieved local job: {'input': {'text': 'Hello, Runpod!'}, 'id': 'local_test'}
-INFO | local_test | Started.
-string-reverser | Starting job local_test
-DEBUG | local_test | Handler output: {'original_text': 'Hello, Runpod!', 'reversed_text': '!doPnuR ,olleH'}
-DEBUG | local_test | run_job return: {'output': {'original_text': 'Hello, Runpod!', 'reversed_text': '!doPnuR ,olleH'}}
-INFO | Job local_test completed successfully.
-INFO | Job result: {'output': {'original_text': 'Hello, Runpod!', 'reversed_text': '!doPnuR ,olleH'}}
-INFO | Local testing complete, exiting.
-```
-
-This output shows the Serverless starting, processing the job, and returning the result.
-
-### Method 2: Local Test Server
-
-For more comprehensive testing, especially when you want to simulate HTTP requests to your Serverless function, you can launch a local test server. This server provides an endpoint that you can send requests to, mimicking the behavior of a deployed Serverless function.
-
-To start the local test server, use the `--rp_serve_api` flag:
-
-```bash
-python your_script.py --rp_serve_api
-```
-
-This command starts a FastAPI server on your local machine, accessible at `http://localhost:8000`.
-
-#### Sending Requests to the Local Server
-
-Once your local server is running, you can send HTTP POST requests to test your function. Use tools like `curl` or Postman, or write scripts to automate your tests.
-
-Example using `curl`:
-
-```bash
-curl -X POST http://localhost:8000/run \
- -H "Content-Type: application/json" \
- -d '{"input": {"text": "Hello, Runpod!"}}'
-```
-
-This will send a POST request to your local server with the input data, simulating how your function would be called in a production environment.
-
-#### Understanding the Server Output
-
-When you send a request to the local server, you'll see output in your terminal similar to:
-
-```bash
-INFO: 127.0.0.1:52686 - "POST /run HTTP/1.1" 200 OK
-DEBUG | Retrieved local job: {'input': {'text': 'Hello, Runpod!'}, 'id': 'local_test'}
-INFO | local_test | Started.
-string-reverser | Starting job local_test
-DEBUG | local_test | Handler output: {'original_text': 'Hello, Runpod!', 'reversed_text': '!doPnuR ,olleH'}
-DEBUG | local_test | run_job return: {'output': {'original_text': 'Hello, Runpod!', 'reversed_text': '!doPnuR ,olleH'}}
-INFO | Job local_test completed successfully.
-```
-
-This output provides detailed information about how your function processes the request, which can be invaluable for debugging and optimizing your Serverless function.
-
-## Conclusion
-
-You've now created a basic Serverless function using Runpod's Python SDK that reverses input strings and learned how to test it using both command-line arguments and a local test server. This example demonstrates how easy it is to deploy and validate simple text processing tasks as Serverless functions.
-
-To further explore Runpod's serverless capabilities, consider:
-
-* Adding more complex string manipulations
-* Implementing error handling for different input types
-* Writing automated test scripts to cover various input scenarios
-* Using the local server to integrate your function with other parts of your application during development
-* Exploring Runpod's documentation for advanced features like concurrent processing or GPU acceleration
-
-Runpod's Serverless library provides a powerful foundation for building scalable, efficient text processing applications without the need to manage infrastructure.
diff --git a/tutorials/sdks/python/102/huggingface-models.mdx b/tutorials/sdks/python/102/huggingface-models.mdx
index e1141369..e69de29b 100644
--- a/tutorials/sdks/python/102/huggingface-models.mdx
+++ b/tutorials/sdks/python/102/huggingface-models.mdx
@@ -1,167 +0,0 @@
----
-title: "Using Hugging Face models with Runpod"
-sidebarTitle: "Hugging Face models"
----
-
-import { ServerlessTooltip, WorkerTooltip, HandlerFunctionTooltip } from "/snippets/tooltips.jsx";
-
-Artificial Intelligence (AI) has revolutionized how applications analyze and interact with data. One powerful aspect of AI is sentiment analysis, which allows machines to interpret and categorize emotions expressed in text. In this tutorial, you will learn how to integrate pre-trained Hugging Face models into your Runpod applications to perform sentiment analysis. By the end of this guide, you will have a fully functional AI-powered sentiment analysis function running in a Serverless environment.
-
-### Install Required Libraries
-
-To begin, we need to install the necessary Python libraries. Hugging Face's `transformers` library provides state-of-the-art machine learning models, while the `torch` library supports these models.
-
-Execute the following command in your terminal to install the required libraries:
-
-```bash
-pip install torch transformers
-```
-
-This command installs the `torch` and `transformers` libraries. `torch` is used for creating and running models, and `transformers` provides pre-trained models.
-
-### Import libraries
-
-Next, we need to import the libraries into our Python script. Create a new Python file named `sentiment_analysis.py` and include the following import statements:
-
-```python sentiment_analysis.py
-import runpod
-from transformers import pipeline
-```
-
-These imports bring in the `runpod` SDK for Serverless functions and the `pipeline` method from `transformers`, which allows us to use pre-trained models.
-
-### Load the Model
-
-Loading the model in a function ensures that the model is only loaded once when the starts, optimizing the performance of our application. Add the following code to your `sentiment_analysis.py` file:
-
-```python sentiment_analysis.py
-def load_model():
- return pipeline(
- "sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english"
- )
-```
-
-In this function, we use the `pipeline` method from `transformers` to load a pre-trained sentiment analysis model. The `distilbert-base-uncased-finetuned-sst-2-english` model is a distilled version of BERT fine-tuned for sentiment analysis tasks.
-
-### Define the
-
-We will now define the handler function that will process incoming events and use the model for sentiment analysis. Add the following code to your script:
-
-```python sentiment_analysis.py
-def sentiment_analysis_handler(event):
- global model
-
- # Ensure the model is loaded
- if "model" not in globals():
- model = load_model()
-
- # Get the input text from the event
- text = event["input"].get("text")
-
- # Validate input
- if not text:
- return {"error": "No text provided for analysis."}
-
- # Perform sentiment analysis
- result = model(text)[0]
-
- return {"sentiment": result["label"], "score": float(result["score"])}
-```
-
-This function performs the following steps:
-
-1. Ensures the model is loaded.
-2. Retrieves the input text from the incoming event.
-3. Validates the input to ensure text is provided.
-4. Uses the loaded model to perform sentiment analysis.
-5. Returns the sentiment label and score as a dictionary.
-
-### Start the Serverless
-
-To run our sentiment analysis function as a Serverless , we need to start the worker using Runpod's SDK. Add the following line at the end of your `sentiment_analysis.py` file:
-
-```python sentiment_analysis.py
-runpod.serverless.start({"handler": sentiment_analysis_handler})
-```
-
-This command starts the Serverless worker and specifies `sentiment_analysis_handler` as the for incoming requests.
-
-### Complete Code
-
-Here is the complete code for our sentiment analysis serverless function:
-
-```python sentiment_analysis.py
-import runpod
-from transformers import pipeline
-
-
-def load_model():
- return pipeline(
- "sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english"
- )
-
-
-def sentiment_analysis_handler(event):
- global model
-
- if "model" not in globals():
- model = load_model()
-
- text = event["input"].get("text")
-
- if not text:
- return {"error": "No text provided for analysis."}
-
- result = model(text)[0]
-
- return {"sentiment": result["label"], "score": float(result["score"])}
-
-
-runpod.serverless.start({"handler": sentiment_analysis_handler})
-```
-
-### Testing Locally
-
-To test this function locally, create a file named `test_input.json` with the following content:
-
-```json test_input.json
-{
- "input": {
- "text": "I love using Runpod for serverless machine learning!"
- }
-}
-```
-
-Run the following command in your terminal to test the function:
-
-```
-python sentiment_analysis.py --rp_server_api
-```
-
-You should see output similar to the following, indicating that the sentiment analysis function is working correctly:
-
-```bash
---- Starting Serverless Worker | Version 1.6.2 ---
-INFO | Using test_input.json as job input.
-DEBUG | Retrieved local job: {'input': {'text': 'I love using Runpod for serverless machine learning!'}, 'id': 'local_test'}
-INFO | local_test | Started.
-model.safetensors: 100%|█████████████████████████| 268M/268M [00:02<00:00, 94.9MB/s]
-tokenizer_config.json: 100%|██████████████████████| 48.0/48.0 [00:00<00:00, 631kB/s]
-vocab.txt: 100%|█████████████████████████████████| 232k/232k [00:00<00:00, 1.86MB/s]
-Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
-DEBUG | local_test | Handler output: {'sentiment': 'POSITIVE', 'score': 0.9889019727706909}
-DEBUG | local_test | run_job return: {'output': {'sentiment': 'POSITIVE', 'score': 0.9889019727706909}}
-INFO | Job local_test completed successfully.
-INFO | Job result: {'output': {'sentiment': 'POSITIVE', 'score': 0.9889019727706909}}
-INFO | Local testing complete, exiting.
-```
-
-## Conclusion
-
-In this tutorial, you learned how to integrate a pre-trained Hugging Face model into a Runpod Serverless function to perform sentiment analysis on text input.
-
-This powerful combination enables you to create advanced AI applications in a Serverless environment.
-
-You can extend this concept to use more complex models or perform different types of inference tasks as needed.
-
-In our final lesson, we will explore a more complex AI task: text-to-image generation.
diff --git a/tutorials/sdks/python/102/stable-diffusion-text-to-image.mdx b/tutorials/sdks/python/102/stable-diffusion-text-to-image.mdx
index eb9fb209..e69de29b 100644
--- a/tutorials/sdks/python/102/stable-diffusion-text-to-image.mdx
+++ b/tutorials/sdks/python/102/stable-diffusion-text-to-image.mdx
@@ -1,218 +0,0 @@
----
-title: "Text To Image Generation with Stable Diffusion on Runpod"
-sidebarTitle: "Stable Diffusion text to image"
----
-
-import { ServerlessTooltip, WorkerTooltip, HandlerFunctionTooltip } from "/snippets/tooltips.jsx";
-
-Text-to-image generation using advanced AI models offers a unique way to bring textual descriptions to life as images. Stable Diffusion is a powerful model capable of generating high-quality images from text inputs, and Runpod is a computing platform that can manage resource-intensive tasks effectively. This tutorial will guide you through setting up a serverless application that utilizes Stable Diffusion for generating images from text prompts on Runpod.
-
-By the end of this guide, you will have a fully functional text-to-image generation system deployed on a Runpod Serverless environment.
-
-## Prerequisites
-
-Before diving into the setup, ensure you have the following:
-
-* Access to a Runpod account
-* A GPU instance configured on Runpod
-* Basic knowledge of Python programming
-
-## Import required libraries
-
-To start, we need to import several essential libraries. These will provide the functionalities required for serverless operation and image generation.
-
-```python stable_diffusion.py
-import runpod
-import torch
-from diffusers import StableDiffusionPipeline
-from io import BytesIO
-import base64
-```
-
-Here’s a breakdown of the imports:
-
-* `runpod`: The SDK used to interact with Runpod's Serverless environment.
-* `torch`: PyTorch library, necessary for running deep learning models and ensuring they utilize the GPU.
-* `diffusers`: Provides methods to work with diffusion models like Stable Diffusion.
-* `BytesIO` and `base64`: Used to handle image data conversions.
-
-Next, confirm that CUDA is available, as the model requires a GPU to function efficiently.
-
-```python stable_diffusion.py
-assert (
- torch.cuda.is_available()
-), "CUDA is not available. Make sure you have a GPU instance."
-```
-
-This assertion checks whether a compatible NVIDIA GPU is available for PyTorch to use.
-
-## Load the Stable Diffusion Model
-
-We'll load the Stable Diffusion model in a separate function. This ensures that the model is only loaded once when the process starts, which is more efficient.
-
-```python stable_diffusion.py
-def load_model():
- model_id = "runwayml/stable-diffusion-v1-5"
- pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
- pipe = pipe.to("cuda")
- return pipe
-```
-
-Here's what this function does:
-
-* `model_id` specifies the model identifier for Stable Diffusion version 1.5.
-* `StableDiffusionPipeline.from_pretrained` loads the model weights into memory with a specified tensor type.
-* `pipe.to("cuda")` moves the model to the GPU for faster computation.
-
-## Define Helper Functions
-
-We need a helper function to convert the generated image into a base64 string. This encoding allows the image to be easily transmitted over the web in textual form.
-
-```python stable_diffusion.py
-def image_to_base64(image):
- buffered = BytesIO()
- image.save(buffered, format="PNG")
- return base64.b64encode(buffered.getvalue()).decode("utf-8")
-```
-
-Explanation:
-
-* `BytesIO`: Creates an in-memory binary stream to which the image is saved.
-* `base64.b64encode`: Encodes the binary data to a base64 format, which is then decoded to a UTF-8 string.
-
-## Define the
-
-The handler function will be responsible for managing image generation requests. It includes loading the model (if not already loaded), validating inputs, generating images, and converting them to base64 strings.
-
-```python stable_diffusion.py
-def stable_diffusion_handler(event):
- global model
-
- # Ensure the model is loaded
- if "model" not in globals():
- model = load_model()
-
- # Get the input prompt from the event
- prompt = event["input"].get("prompt")
-
- # Validate input
- if not prompt:
- return {"error": "No prompt provided for image generation."}
-
- try:
- # Generate the image
- image = model(prompt).images[0]
-
- # Convert the image to base64
- image_base64 = image_to_base64(image)
-
- return {"image": image_base64, "prompt": prompt}
-
- except Exception as e:
- return {"error": str(e)}
-```
-
-Key steps in the function:
-
-* Checks if the model is loaded globally, and loads it if not.
-* Extracts the `prompt` from the input event.
-* Validates that a prompt has been provided.
-* Uses the `model` to generate an image.
-* Converts the image to base64 and prepares the response.
-
-## Start the Serverless
-
-Now, we'll start the Serverless worker using the Runpod SDK.
-
-```python stable_diffusion.py
-runpod.serverless.start({"handler": stable_diffusion_handler})
-```
-
-This command starts the Serverless worker and specifies the `stable_diffusion_handler` function to handle incoming requests.
-
-## Complete Code
-
-For your convenience, here is the entire code consolidated:
-
-```python stable_diffusion.py
-import runpod
-import torch
-from diffusers import StableDiffusionPipeline
-from io import BytesIO
-import base64
-
-assert (
- torch.cuda.is_available()
-), "CUDA is not available. Make sure you have a GPU instance."
-
-
-def load_model():
- model_id = "runwayml/stable-diffusion-v1-5"
- pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
- # to run on cpu change `cuda` to `cpu`
- pipe = pipe.to("cuda")
- return pipe
-
-
-def image_to_base64(image):
- buffered = BytesIO()
- image.save(buffered, format="PNG")
- return base64.b64encode(buffered.getvalue()).decode("utf-8")
-
-
-def stable_diffusion_handler(event):
- global model
-
- if "model" not in globals():
- model = load_model()
-
- prompt = event["input"].get("prompt")
-
- if not prompt:
- return {"error": "No prompt provided for image generation."}
-
- try:
- image = model(prompt).images[0]
- image_base64 = image_to_base64(image)
-
- return {"image": image_base64, "prompt": prompt}
-
- except Exception as e:
- return {"error": str(e)}
-
-
-runpod.serverless.start({"handler": stable_diffusion_handler})
-```
-
-## Testing Locally
-
-Before deploying on Runpod, you might want to test the script locally. Create a `test_input.json` file with the following content:
-
-```json test_input.json
-{
- "input": {
- "prompt": "A serene landscape with mountains and a lake at sunset"
- }
-}
-```
-
-Run the script with the following command:
-
-```
-python stable_diffusion.py --rp_server_api
-```
-
-Note: Local testing may not work optimally without a suitable GPU. If issues arise, proceed to deploy and test on Runpod.
-
-## Important Notes:
-
-1. This example requires significant computational resources, particularly GPU memory. Ensure your Runpod configuration has sufficient GPU capabilities.
-2. The model is loaded only once when the starts, optimizing performance.
-3. We've used Stable Diffusion v1.5; you can replace it with other versions or models as required.
-4. The handler includes error handling for missing input and exceptions during processing.
-5. Ensure necessary dependencies (like `torch`, `diffusers`) are included in your environment or requirements file when deploying.
-6. The generated image is returned as a base64-encoded string. For practical applications, consider saving it to a file or cloud storage.
-
-### Conclusion
-
-In this tutorial, you learned how to use the Runpod Serverless platform with Stable Diffusion to create a text-to-image generation system. This project showcases the potential for deploying resource-intensive AI models in a Serverless architecture using the Runpod Python SDK. You now have the skills to create and deploy sophisticated AI applications on Runpod. What will you create next?
diff --git a/tutorials/sdks/python/get-started/hello-world.mdx b/tutorials/sdks/python/get-started/hello-world.mdx
index 823c6ec4..e69de29b 100644
--- a/tutorials/sdks/python/get-started/hello-world.mdx
+++ b/tutorials/sdks/python/get-started/hello-world.mdx
@@ -1,54 +0,0 @@
----
-title: "Hello World with Runpod"
----
-
-import { ServerlessTooltip, WorkerTooltip, HandlerFunctionTooltip } from "/snippets/tooltips.jsx";
-
-Let's dive into creating your first Runpod application. We're going to build a "Hello, World!" program that greets users with a custom message. Don't worry about sending requests just yet - we'll cover that in the next tutorial, [running locally](/tutorials/sdks/python/get-started/running-locally).
-
-This exercise will introduce you to the key parts of a Runpod application, giving you a solid foundation in Serverless functions. By the end, you'll have your very own Runpod Serverless function up and running locally.
-
-### Creating Your First Serverless Function
-
-Let's write a Python script that defines a simple serverless function. This function will say `Hello, World!`.
-
-Create a new file called `hello_world.py` in your text editor and add the following code:
-
-```python hello_world.py
-import runpod
-
-
-def handler(job):
- job_input = job["input"]
-
- return f"Hello {job_input['name']}!"
-
-
-runpod.serverless.start({"handler": handler})
-```
-
-Let's break this down:
-
-We start by importing the `runpod` library. This gives us all the tools we need for creating and managing serverless applications.
-
-Next, we define our `handler` function. This function processes incoming requests. It takes a `job` parameter, which contains all the info about the incoming job.
-
-Inside the handler, we grab the input data from the job. We're expecting a 'name' field in the input.
-
-Then we create and return our greeting message, using the name we got from the input.
-
-Finally, we call `runpod.serverless.start()`, telling it to use our `handler` function. This kicks off the Serverless and gets it ready to handle incoming jobs.
-
-And there you have it! You've just created your first Runpod Serverless function. It takes in a request with a name and returns a personalized greeting.
-
-### Key Takeaways
-
-* Runpod functions are built around a that processes incoming jobs.
-* You can easily access input data from the job parameter.
-* The `runpod.serverless.start()` function gets your Serverless up and running.
-
-## Next steps
-
-You've now got a basic `Hello, World!` Runpod Serverless function up and running. You've learned how to handle input and output in a Serverless environment and how to start your application.
-
-These are the building blocks for creating more complex Serverless applications with Runpod. As you get more comfortable with these concepts, you'll be able to create even more powerful and flexible Serverless functions.
diff --git a/tutorials/sdks/python/get-started/introduction.mdx b/tutorials/sdks/python/get-started/introduction.mdx
index 8700790c..e69de29b 100644
--- a/tutorials/sdks/python/get-started/introduction.mdx
+++ b/tutorials/sdks/python/get-started/introduction.mdx
@@ -1,101 +0,0 @@
----
-title: "Introduction to the Runpod Python SDK"
-sidebarTitle: "Introduction"
----
-
-import { ServerlessTooltip, WorkerTooltip, HandlerFunctionTooltip } from "/snippets/tooltips.jsx";
-
-Welcome to the world of AI development with the [Runpod Python SDK](https://github.com/runpod/runpod-python).
-
-The Runpod Python SDK helps you develop Serverless AI applications so that you can build and deploy scalable AI solutions efficiently.
-
-This series of tutorials will deepen your understanding of Serverless principles and the practical knowledge to use the Runpod Python SDK in your AI applications.
-
-## Prerequisites
-
-To follow along with this guide, you should have:
-
-* Basic programming knowledge in Python.
-* An understanding of AI and machine learning concepts.
-* [An account on the Runpod platform](https://www.console.runpod.io/signup).
-
-## What is the Runpod Python SDK?
-
-The [Runpod Python SDK](https://github.com/runpod/runpod-python) is a toolkit designed to facilitate the creation and deployment of Serverless applications on the Runpod platform.
-
-It is optimized for AI and machine learning workloads, simplifying the development of scalable, cloud-based AI applications. The SDK allows you to define s, conduct local testing, and utilize GPU support.
-
-Acting as a bridge between your Python code and Runpod's cloud infrastructure, the SDK enables you to execute complex AI tasks without managing underlying hardware.
-
-To start using Runpod Python SDK, see the [prerequisites](/tutorials/sdks/python/get-started/prerequisites) section or if, you're already setup proceed to the [Hello World](/tutorials/sdks/python/get-started/hello-world) tutorial, where we will guide you through creating, deploying, and running your first Serverless AI application.
-
-You can also see a library of complete Runpod samples in the [ library](https://github.com/runpod-workers) on GitHub. These samples are complete Python libraries for common use cases.
-
-## Learn more
-
-Continue your journey by following our sequenced lessons designed to deepen your understanding and skills:
-
-Here's a brief overview of each tutorial:
-
-1. [Prerequisites and setup](/tutorials/sdks/python/get-started/prerequisites):
-
- * Installing Python and setting up a virtual environment
- * Installing the Runpod SDK
- * Configuring your Runpod account
-
-2. [Hello World: Your first Runpod function](/tutorials/sdks/python/get-started/hello-world):
-
- * Creating a basic handler function
- * Understanding job input and output
- * Starting the Serverless worker
-
-3. [Running and testing locally](/tutorials/sdks/python/get-started/running-locally):
-
- * Testing with JSON input files
- * Interpreting local test output
-
-4. [Runpod functions](/tutorials/sdks/python/101/hello):
-
- * Creating a basic handler function
- * Understanding job input and output
- * Starting the Serverless worker
- * Testing with command-line arguments
-
-5. [Using a Local Server](/tutorials/sdks/python/101/local-server-testing):
-
- * Setting up a local test server
- * Sending HTTP requests to your local function
- * Understanding server output and debugging
- * Comparing command-line and server-based testing
-
-6. [Building a Generator Handler for Streaming Results](/tutorials/sdks/python/101/generator):
-
- * Understanding generator functions in Runpod's SDK
- * Creating a text-to-speech simulator with streaming output
- * Implementing a generator handler for incremental processing
- * Testing and debugging generator-based Serverless functions
-
-7. [Advanced Handler Techniques](/tutorials/sdks/python/101/async):
-
- * Synchronous vs asynchronous handlers
- * Using generator functions for streaming output
- * Handling multiple inputs and complex data structures
-
-8. [Error Handling and Logging](/tutorials/sdks/python/101/error):
-
- * Implementing try-except blocks in handlers
- * Using Runpod's logging system
- * Best practices for error management in Serverless functions
-
-9. [Hugging Face Integration](/tutorials/sdks/python/102/huggingface-models):
-
- * Installing and importing external libraries
- * Loading and using a Hugging Face model
- * Optimizing model loading for Serverless environments
-
-10. [Stable Diffusion](/tutorials/sdks/python/102/stable-diffusion-text-to-image):
-
- * Setting up a text-to-image generation function
- * Handling larger inputs and outputs
-
-Now, move on to the [prerequisites](/tutorials/sdks/python/get-started/prerequisites) and then set up [your first “Hello World”](/tutorials/sdks/python/get-started/hello-world) application with Runpod Python SDK.
diff --git a/tutorials/sdks/python/get-started/prerequisites.mdx b/tutorials/sdks/python/get-started/prerequisites.mdx
index 15f91c9e..e69de29b 100644
--- a/tutorials/sdks/python/get-started/prerequisites.mdx
+++ b/tutorials/sdks/python/get-started/prerequisites.mdx
@@ -1,106 +0,0 @@
----
-title: "Prerequisites"
----
-
-import { ServerlessTooltip } from "/snippets/tooltips.jsx";
-
-Setting up a proper development environment is fundamental to effectively building AI applications using Runpod. This guide will take you through each necessary step to prepare your system for Runpod development, ensuring you have the correct tools and configurations.
-
-In this guide, you will learn how to the Runpod library.
-
-When you're finished, you'll have a fully prepared environment to begin developing your Serverless AI applications with Runpod.
-
-## Prerequisites
-
-Before beginning, ensure your system meets the following requirements:
-
-* **Python 3.8 or later**: This is the programming language in which you'll be writing your Runpod applications.
-* **Access to a terminal or command prompt**: This will be used to run various commands throughout this tutorial.
-
-## Install Python
-
-First, you need to have Python installed on your system. Python is a programming language that's widely used in various types of software development and what is used to develop with the Runpod Python SDK.
-
-To install Python, follow these steps:
-
-1. Visit the [official Python website](https://www.python.org/downloads/).
-2. Download the latest stable version of Python (version is 3.8 or later).
-3. Follow the installation instructions for your operating system.
-
-Once Python is installed, you can move onto setting up a virtual environment.
-
-## Set up a virtual environment
-
-Using a virtual environment is a best practice in Python development.
-
-It keeps project dependencies isolated, avoiding conflicts between packages used in different projects.
-
-Here’s how you can set up a virtual environment:
-
-1. Open your terminal or command prompt.
-
-2. Navigate to your project directory using the `cd` command. For example:
-
- ```bash
- cd path/to/your/project
- ```
-
-3. Create a virtual environment named `venv` by running the following command:
-
- ```bash
- python -m venv venv
- ```
-
-This command uses Python's built-in `venv` module to create a virtual environment.
-
-4. Activate the virtual environment:
-
- * On Windows, use:
-
- ```bash
- venv\Scripts\activate
- ```
-
- * On macOS and Linux, use:
-
- ```bash
- source venv/bin/activate
- ```
-
-Activating the virtual environment ensures that any Python packages you install will be confined to this environment.
-
-You have now set up and activated a virtual environment for your project. The next step is to install the Runpod library within this virtual environment.
-
-## Install the Runpod Library
-
-With the virtual environment activated, you need to install the Runpod Python SDK. This library provides the tools necessary to develop Serverless applications on the Runpod platform.
-
-To install the Runpod library, execute:
-
-```bash
-pip install runpod
-```
-
-This command uses `pip`, Python's package installer, to download and install the latest version of the Runpod SDK.
-
-## Verify the Installation
-
-It's essential to confirm that the Runpod library has been installed correctly. You can do this by running the following Python command:
-
-```bash
-python -c "import runpod; print(runpod.__version__)"
-```
-
-If everything is set up correctly, this command will output the version number of the installed Runpod SDK.
-
-For example:
-
-```
-1.6.2
-```
-
-You have now successfully set up your development environment. Your system is equipped with Python, a virtual environment, and the Runpod library.
-
-You will use the Runpod Python library for writing your Serverless application.
-
-Next, we'll proceed with creating a [Hello World application with Runpod](/tutorials/sdks/python/get-started/hello-world).
diff --git a/tutorials/sdks/python/get-started/running-locally.mdx b/tutorials/sdks/python/get-started/running-locally.mdx
index 3a81e682..e69de29b 100644
--- a/tutorials/sdks/python/get-started/running-locally.mdx
+++ b/tutorials/sdks/python/get-started/running-locally.mdx
@@ -1,99 +0,0 @@
----
-title: "Running code locally"
-sidebarTitle: "Running locally"
----
-
-import { ServerlessTooltip, WorkerTooltip } from "/snippets/tooltips.jsx";
-
-Before deploying your functions to the cloud, it's crucial to test them locally. In the previous lesson, [Hello World with Runpod](/tutorials/sdks/python/get-started/hello-world), you created a Python file called `hello_world.py`.
-
-In this guide, you'll learn how to run your Runpod Serverless applications on your local machine using the Runpod Python SDK.
-
-## Understanding Runpod's Local Testing Environment
-
-When you run your code locally using the Runpod Python SDK, here's what happens behind the scenes:
-
-* FastAPI Server: The SDK spins up a FastAPI server on your local machine. This server simulates the Runpod Serverless environment.
-* Request Handling: The FastAPI server receives and processes requests just like the cloud version would, allowing you to test your function's input handling and output generation.
-* Environment Simulation: The local setup mimics key aspects of the Runpod Serverless environment, helping ensure your code will behave similarly when deployed.
-
-## Running Your Code Locally
-
-Let's walk through how to run your Serverless functions locally using the Runpod Python SDK.
-
-**Options for Passing Information to Your API**
-
-The Runpod Python SDK offers two main methods for sending data to your local FastAPI server:
-
-1. Using a JSON file
-2. Using inline JSON via command line
-
-Both methods allow you to simulate how your function would receive data in the actual cloud environment.
-
-### Using a JSON File
-
-1. Create a JSON file:
-
- Create a file called `test_input.json` with your test data:
-
- ```json test_input.json
- {
- "input": {
- "name": "World"
- }
- }
- ```
-
-2. Run the Serverless function:
-
- Execute your `hello_world.py` script with the `--rp_server_api` flag:
-
- ```bash
- python hello_world.py --rp_server_api
- ```
-
- The SDK will automatically look for and use the `test_input.json` file in the current directory.
-
-### Using Inline JSON
-
-You can also pass your test data directly via the command line:
-
-```bash
-python hello_world.py --test_input '{"input": {"name": "World"}}'
-```
-
-This method is useful for quick tests or when you want to vary the input without editing a file.
-
-### Understanding the output
-
-When you run your function locally, you'll see output similar to this:
-
-```bash
---- Starting Serverless Worker | Version 1.6.2 ---
-INFO | Using test_input.json as job input.
-DEBUG | Retrieved local job: {'input': {'name': 'World'}, 'id': 'local_test'}
-INFO | local_test | Started.
-DEBUG | local_test | Handler output: Hello World!
-DEBUG | local_test | run_job return: {'output': 'Hello World!'}
-INFO | Job local_test completed successfully.
-INFO | Job result: {'output': 'Hello World!'}
-INFO | Local testing complete, exiting.
-```
-
-This output provides valuable information:
-
-* Confirmation that the Serverless started successfully
-* Details about the input data being used
-* Step-by-step execution of your function
-* The final output and job status
-
-By analyzing this output, you can verify that your function is behaving as expected and debug any issues that arise.
-
-### Key Takeaways
-
-* Local testing with the Runpod Python SDK allows you to simulate the cloud environment on your machine.
-* The SDK creates a FastAPI server to mock the Serverless function execution.
-* You can provide input data via a JSON file or inline JSON in the command line.
-* Local testing accelerates development, reduces costs, and helps catch issues early.
-
-Next, we'll explore the structure of Runpod handlers in more depth, enabling you to create more sophisticated Serverless functions.
From 9b9eff6e6f96896fa79e629dcb6ab5bba15e8080 Mon Sep 17 00:00:00 2001
From: Mo King
Date: Mon, 23 Feb 2026 10:20:26 -0500
Subject: [PATCH 2/8] Add hugging face guide and aggregate outputs for handler
functions
---
docs.json | 2 +
serverless/development/aggregate-outputs.mdx | 280 ++++++++++++++++++
serverless/development/huggingface-models.mdx | 201 +++++++++++++
serverless/sdks.mdx | 8 +-
serverless/workers/handler-functions.mdx | 3 +
5 files changed, 492 insertions(+), 2 deletions(-)
create mode 100644 serverless/development/aggregate-outputs.mdx
create mode 100644 serverless/development/huggingface-models.mdx
diff --git a/docs.json b/docs.json
index 440fe98f..3ac8943b 100644
--- a/docs.json
+++ b/docs.json
@@ -60,6 +60,8 @@
"serverless/development/cleanup",
"serverless/development/write-logs",
"serverless/development/environment-variables",
+ "serverless/development/aggregate-outputs",
+ "serverless/development/huggingface-models",
"serverless/workers/concurrent-handler"
]
},
diff --git a/serverless/development/aggregate-outputs.mdx b/serverless/development/aggregate-outputs.mdx
new file mode 100644
index 00000000..2073160a
--- /dev/null
+++ b/serverless/development/aggregate-outputs.mdx
@@ -0,0 +1,280 @@
+---
+title: "Aggregate streaming outputs"
+sidebarTitle: "Aggregate outputs"
+description: "Automatically collect and aggregate yielded results from streaming handler functions."
+---
+
+import { HandlerFunctionTooltip, WorkerTooltip, ServerlessTooltip } from "/snippets/tooltips.jsx";
+
+When building a streaming that yields results incrementally, you can use the `return_aggregate_stream` feature to automatically collect all yielded outputs into a single aggregated response. This simplifies result handling by eliminating the need to manually collect and format streaming results, making your handlers easier to implement and consume.
+
+This guide shows you how to use output aggregation effectively in your applications.
+
+## Understanding output aggregation
+
+By default, streaming handlers that yield results are only accessible via the `/stream` operation, which returns results as they become available. When you enable `return_aggregate_stream`, Runpod automatically:
+
+1. Collects all yielded results as your handler produces them.
+2. Aggregates them into a single list.
+3. Makes the complete aggregated results available via `/run` and `/runsync` operations.
+
+This allows clients to choose between streaming individual results as they arrive or waiting for the complete aggregated response.
+
+## Basic aggregation example
+
+Here's a simple handler that processes multiple items and yields results incrementally:
+
+```python handler.py
+import runpod
+
+def handler(job):
+ job_input = job["input"]
+ items = job_input.get("items", [])
+
+ results = []
+ for item in items:
+ # Process each item
+ result = f"Processed: {item}"
+
+ # Yield result immediately for streaming
+ yield result
+
+ # Also collect for final return
+ results.append(result)
+
+ # Return complete list
+ return results
+
+runpod.serverless.start({
+ "handler": handler,
+ "return_aggregate_stream": True
+})
+```
+
+When a client calls this handler with multiple items, they can:
+- Use `/stream` to receive each result as it's yielded.
+- Use `/run` or `/runsync` to receive all results aggregated into a list.
+
+## Processing multiple items
+
+A common pattern is processing a batch of items and yielding results as each completes. This is useful for tasks like:
+
+- Analyzing sentiment for multiple text samples.
+- Generating images from multiple prompts.
+- Running inference on multiple inputs.
+
+Here's a practical example:
+
+```python handler.py
+import runpod
+import time
+
+def analyze_items(items, task_type):
+ """Process items based on task type."""
+ results = []
+
+ for item in items:
+ # Simulate processing time
+ time.sleep(0.5)
+
+ # Process based on type
+ if task_type == "sentiment":
+ result = {"text": item, "sentiment": "positive", "score": 0.92}
+ elif task_type == "classify":
+ result = {"text": item, "category": "technology", "confidence": 0.88}
+ else:
+ result = {"error": f"Unknown task type: {task_type}"}
+
+ # Yield each result as it completes
+ yield result
+ results.append(result)
+
+ return results
+
+def handler(job):
+ job_input = job["input"]
+ task_type = job_input.get("task_type", "sentiment")
+ items = job_input.get("items", [])
+
+ # Validate input
+ if not items:
+ return {"error": "No items provided"}
+
+ # Process items and yield results
+ return analyze_items(items, task_type)
+
+runpod.serverless.start({
+ "handler": handler,
+ "return_aggregate_stream": True
+})
+```
+
+This handler processes each item sequentially, yielding results immediately while building a complete list to return.
+
+## Local testing with aggregation
+
+When testing locally, generators behave differently than in production. You need to handle the difference between the local test environment and production:
+
+```python handler.py
+import runpod
+
+def handler(job):
+ job_input = job["input"]
+ items = job_input.get("items", [])
+
+ for item in items:
+ result = f"Processed: {item}"
+ yield result
+
+def start_handler():
+ """Wrapper to handle local testing vs. production."""
+ def wrapper(job):
+ generator = handler(job)
+
+ # In local testing, convert generator to list
+ if job.get("id") == "local_test":
+ return list(generator)
+
+ # In production, return the generator
+ return generator
+
+ runpod.serverless.start({
+ "handler": wrapper,
+ "return_aggregate_stream": True
+ })
+
+if __name__ == "__main__":
+ start_handler()
+```
+
+The wrapper function checks if the job ID is `local_test` (indicating local testing) and converts the generator to a list. In production, it returns the generator directly, allowing Runpod to handle the aggregation.
+
+## Testing locally
+
+Create a test input file to verify your aggregation works correctly:
+
+```json test_input.json
+{
+ "input": {
+ "task_type": "sentiment",
+ "items": [
+ "I love this product!",
+ "The service was okay.",
+ "Not great, could be better."
+ ]
+ }
+}
+```
+
+Run your handler:
+
+```bash
+python handler.py --test_input '{"input": {"task_type": "sentiment", "items": ["Item 1", "Item 2", "Item 3"]}}'
+```
+
+You should see output showing each result being processed and the final aggregated list:
+
+```bash
+--- Starting Serverless Worker | Version 1.6.2 ---
+INFO | Using test_input.json as job input.
+DEBUG | Retrieved local job: {'input': {'task_type': 'sentiment', 'items': ['Item 1', 'Item 2', 'Item 3']}, 'id': 'local_test'}
+INFO | local_test | Started.
+DEBUG | local_test | Handler output: ['Processed: Item 1', 'Processed: Item 2', 'Processed: Item 3']
+INFO | Job local_test completed successfully.
+```
+
+## Understanding the output format
+
+When `return_aggregate_stream` is enabled, the final output structure includes all yielded results in a list:
+
+**Without aggregation** (streaming only):
+- Results arrive one at a time via `/stream`.
+- No combined output available via `/run` or `/runsync`.
+
+**With aggregation enabled:**
+- Individual results still available via `/stream` as they're yielded.
+- Complete aggregated list available via `/run` and `/runsync`:
+
+```json
+{
+ "output": [
+ {"text": "Item 1", "sentiment": "positive", "score": 0.92},
+ {"text": "Item 2", "sentiment": "neutral", "score": 0.54},
+ {"text": "Item 3", "sentiment": "negative", "score": 0.78}
+ ]
+}
+```
+
+## When to use output aggregation
+
+Use `return_aggregate_stream` for:
+
+- **Batch processing**: Processing multiple items and clients need the complete set of results.
+- **Progress tracking**: Clients want to see incremental progress but also need the final aggregated results.
+- **Flexible consumption**: Supporting both streaming and batch consumption patterns.
+- **Simplified integration**: Clients don't want to implement streaming logic but still benefit from incremental processing.
+
+Don't use it for:
+
+- **Large result sets**: Aggregating thousands of results can create memory pressure and large response payloads.
+- **True streaming only**: Results should only be consumed as a stream (like real-time chat).
+- **Single result**: Handler only returns one result (no need for aggregation).
+
+## Best practices
+
+1. **Memory management**: Be mindful of memory usage when aggregating large numbers of results.
+
+2. **Payload limits**: Remember the payload size limits:
+ - `/run` operation: 10 MB
+ - `/runsync` operation: 20 MB
+
+ If aggregated results exceed these limits, consider using streaming only or storing results in cloud storage.
+
+3. **Error handling**: Handle errors for individual items without breaking the entire batch:
+
+ ```python
+ def handler(job):
+ items = job["input"].get("items", [])
+
+ for item in items:
+ try:
+ result = process_item(item)
+ yield {"success": True, "result": result}
+ except Exception as e:
+ yield {"success": False, "error": str(e), "item": item}
+ ```
+
+4. **Consistent output structure**: Yield results in a consistent format to simplify client-side processing.
+
+## Combining with async handlers
+
+You can also use aggregation with async handlers for concurrent processing:
+
+```python handler.py
+import runpod
+import asyncio
+
+async def async_handler(job):
+ items = job["input"].get("items", [])
+
+ for item in items:
+ # Simulate async processing
+ await asyncio.sleep(0.5)
+
+ result = f"Async processed: {item}"
+ yield result
+
+runpod.serverless.start({
+ "handler": async_handler,
+ "return_aggregate_stream": True
+})
+```
+
+This combines the benefits of async processing with automatic output aggregation.
+
+## Next steps
+
+- Learn more about [streaming handlers](/serverless/workers/handler-functions#streaming-handlers).
+- Explore [async handlers](/serverless/workers/handler-functions#asynchronous-handlers) for concurrent processing.
+- Understand [error handling](/serverless/development/error-handling) for robust batch processing.
+- Review [payload limits](/serverless/workers/handler-functions#payload-limits) to avoid oversized responses.
diff --git a/serverless/development/huggingface-models.mdx b/serverless/development/huggingface-models.mdx
new file mode 100644
index 00000000..26fbcc19
--- /dev/null
+++ b/serverless/development/huggingface-models.mdx
@@ -0,0 +1,201 @@
+---
+title: "Use Hugging Face models"
+sidebarTitle: "Hugging Face models"
+description: "Learn how to integrate pre-trained Hugging Face models into your Serverless handler functions."
+---
+
+import { HandlerFunctionTooltip, WorkerTooltip, ServerlessTooltip } from "/snippets/tooltips.jsx";
+
+Hugging Face provides thousands of pre-trained models for natural language processing, computer vision, audio processing, and more. You can integrate these models into your to deploy AI capabilities without training models from scratch.
+
+This guide shows you how to load and use Hugging Face models in your Serverless handlers, using sentiment analysis as an example that you can adapt for other model types.
+
+
+**Use cached models for production:** The approach shown in this guide downloads models when workers start, which increases cold start times and costs. For production, use [cached models](/serverless/endpoints/model-caching) instead. Cached models reduce cold starts to just a few seconds and eliminate charges for model download time. See the [cached model tutorial](/tutorials/serverless/model-caching-text) for a complete example.
+
+
+## Install dependencies
+
+Your handler needs the `transformers` library to load Hugging Face models, and `torch` to run inference. Install both in your development environment:
+
+```bash
+pip install torch transformers
+```
+
+When deploying to Runpod, you'll need to include these dependencies in your [Dockerfile](/serverless/workers/create-dockerfile) or requirements file.
+
+## Create your handler
+
+Create a file named `handler.py` and follow these steps to build a handler that performs sentiment analysis using a Hugging Face model.
+
+
+
+ Start by importing the necessary libraries:
+
+ ```python handler.py
+ import runpod
+ from transformers import pipeline
+ ```
+
+ The `pipeline` function from the `transformers` library provides a simple interface for using pre-trained models. It handles tokenization, model inference, and post-processing automatically.
+
+
+ The `pipeline` approach shown in this guide is convenient for local testing and development. For production endpoints, you should use [cached models](/serverless/endpoints/model-caching) instead, which dramatically reduce cold start times and eliminate charges for model download time.
+
+
+
+
+ Load your model outside the handler function to avoid reloading it on every request. This significantly improves performance by initializing the model only once when the starts:
+
+ ```python handler.py
+ # Load model once when worker starts
+ model = pipeline(
+ "sentiment-analysis",
+ model="distilbert-base-uncased-finetuned-sst-2-english"
+ )
+ ```
+
+ The `pipeline` function takes two arguments: the task type (like `"sentiment-analysis"`, `"text-generation"`, or `"image-classification"`) and the specific model identifier from the Hugging Face model hub.
+
+
+
+ Create a handler function that extracts input text from the request, validates it, runs inference, and returns results:
+
+ ```python handler.py
+ def handler(job):
+ # Extract input from the job
+ job_input = job["input"]
+ text = job_input.get("text")
+
+ # Validate input
+ if not text:
+ return {"error": "No text provided for analysis."}
+
+ # Run inference
+ result = model(text)[0]
+
+ # Return formatted results
+ return {
+ "sentiment": result["label"],
+ "score": float(result["score"])
+ }
+ ```
+
+ The handler follows Runpod's standard pattern: extract input, validate it, process it, and return results. The model returns a list of predictions, so we take the first result with `[0]` and extract the label and confidence score.
+
+
+
+ Add this line at the end of your file to register the handler and start the worker:
+
+ ```python handler.py
+ runpod.serverless.start({"handler": handler})
+ ```
+
+
+
+### Complete implementation
+
+Here's the complete code:
+
+```python handler.py
+import runpod
+from transformers import pipeline
+
+# Load model once when worker starts
+model = pipeline(
+ "sentiment-analysis",
+ model="distilbert-base-uncased-finetuned-sst-2-english"
+)
+
+def handler(job):
+ # Extract input from the job
+ job_input = job["input"]
+ text = job_input.get("text")
+
+ # Validate input
+ if not text:
+ return {"error": "No text provided for analysis."}
+
+ # Run inference
+ result = model(text)[0]
+
+ # Return formatted results
+ return {
+ "sentiment": result["label"],
+ "score": float(result["score"])
+ }
+
+runpod.serverless.start({"handler": handler})
+```
+
+## Test locally
+
+Create a test input file to verify your handler works correctly:
+
+```json test_input.json
+{
+ "input": {
+ "text": "This is absolutely wonderful and amazing!"
+ }
+}
+```
+
+Run your handler locally using the Runpod SDK:
+
+```bash
+python handler.py --rp_server_api
+```
+
+You should see output indicating successful sentiment analysis:
+
+```bash
+--- Starting Serverless Worker | Version 1.6.2 ---
+INFO | Using test_input.json as job input.
+DEBUG | Retrieved local job: {'input': {'text': 'This is absolutely wonderful and amazing!'}, 'id': 'local_test'}
+INFO | local_test | Started.
+DEBUG | local_test | Handler output: {'sentiment': 'POSITIVE', 'score': 0.999880313873291}
+INFO | Job local_test completed successfully.
+```
+
+The first time you run this, Hugging Face will download the model files. Subsequent runs will use the cached model.
+
+## Adapt for other models
+
+This pattern works for any Hugging Face model. To use a different model:
+
+1. **Choose your model**: Browse the [Hugging Face model hub](https://huggingface.co/models) to find a model for your task.
+
+2. **Update the pipeline**: Change the task type and model identifier:
+
+ ```python
+ # Text generation example
+ model = pipeline("text-generation", model="gpt2")
+
+ # Image classification example
+ model = pipeline("image-classification", model="google/vit-base-patch16-224")
+
+ # Translation example
+ model = pipeline("translation_en_to_fr", model="t5-base")
+ ```
+
+3. **Adjust input/output handling**: Different models expect different input formats and return different output structures. Check the model's documentation on Hugging Face to understand its API.
+
+## Production deployment
+
+When deploying Hugging Face models to production endpoints, follow these best practices:
+
+- **Use cached models**: The approach shown in this guide downloads models when workers start, which increases cold start times and costs. For production, use [cached models](/serverless/endpoints/model-caching) instead. Cached models reduce cold starts to just a few seconds and eliminate charges for model download time. See the [cached model tutorial](/tutorials/serverless/model-caching-text) for a complete example.
+
+- **Model size**: Larger models require more VRAM and take longer to load. Choose the smallest model that meets your accuracy requirements.
+
+- **GPU utilization**: Most Hugging Face models run faster on GPUs. Ensure your endpoint uses GPU workers for optimal performance.
+
+- **Batch processing**: If your model supports batching, process multiple inputs together to improve throughput.
+
+## Next steps
+
+- **For production**: Learn about [cached models](/serverless/endpoints/model-caching) and follow the [cached model tutorial](/tutorials/serverless/model-caching-text) to improve cold start times and reduce costs.
+- [Create a Dockerfile](/serverless/workers/create-dockerfile) to package your handler with its dependencies.
+- [Deploy your worker](/serverless/workers/deploy) to a Runpod endpoint.
+- Explore [optimization techniques](/serverless/development/optimization) to improve performance.
+- Learn about [error handling](/serverless/development/error-handling) for production deployments.
diff --git a/serverless/sdks.mdx b/serverless/sdks.mdx
index 06e4f19b..9d1f3369 100644
--- a/serverless/sdks.mdx
+++ b/serverless/sdks.mdx
@@ -1,7 +1,7 @@
---
-title: "Install the Runpod SDK"
+title: "Install the Runpod Serverless SDK"
sidebarTitle: "Install SDKs"
-description: "Install and configure the Runpod SDK for Python, JavaScript, or Go to interact with Serverless endpoints programmatically."
+description: "Install and configure the Runpod Serverless SDK for Python, JavaScript, or Go to interact with Serverless endpoints programmatically."
---
The Runpod SDK lets you interact with Serverless endpoints programmatically from your own applications.
@@ -116,6 +116,10 @@ func main() {
For more information, see the [Go SDK on GitHub](https://github.com/runpod/go-sdk) and the [Go package documentation](https://pkg.go.dev/github.com/runpod/go-sdk/pkg/sdk).
+## Use the SDKs
+
+For detailed examples of how to use the SDKs to interact with Serverless endpoints, see [Send API requests](/serverless/endpoints/send-requests).
+
## Next steps
Once you've installed and configured the SDK, you're ready to send requests to your Serverless endpoints:
diff --git a/serverless/workers/handler-functions.mdx b/serverless/workers/handler-functions.mdx
index 7a68ac7a..5cc9c581 100644
--- a/serverless/workers/handler-functions.mdx
+++ b/serverless/workers/handler-functions.mdx
@@ -118,6 +118,8 @@ runpod.serverless.start({
By default, outputs from streaming handlers are only available using the `/stream` operation. Set `return_aggregate_stream` to `True` to make outputs available from the `/run` and `/runsync` operations as well.
+To learn more about aggregating streaming outputs, including best practices for batch processing and handling local testing, see [Aggregate streaming outputs](/serverless/development/aggregate-outputs).
+
### Asynchronous handlers
Asynchronous handlers process operations concurrently for improved efficiency. Use these for tasks involving I/O operations, API calls, or processing large datasets.
@@ -284,6 +286,7 @@ If your results exceed these limits, consider stashing them in cloud storage and
Once you've created your handler function, you can:
+* [Learn how to aggregate streaming outputs.](/serverless/development/aggregate-outputs)
* [Explore flags for local testing.](/serverless/development/local-testing)
* [Create a Dockerfile for your worker.](/serverless/workers/create-dockerfile)
* [Deploy your worker image to a Serverless endpoint.](/serverless/workers/deploy)
From f77a328deaba352a4c6419083c9626de580f8807 Mon Sep 17 00:00:00 2001
From: Mo King
Date: Mon, 23 Feb 2026 10:54:33 -0500
Subject: [PATCH 3/8] Rewrite containers tutorial series, add crosslinking
---
pods/overview.mdx | 12 +-
pods/templates/create-custom-template.mdx | 4 +
pods/templates/overview.mdx | 2 +-
serverless/overview.mdx | 2 +-
serverless/quickstart.mdx | 4 +
serverless/workers/create-dockerfile.mdx | 4 +
serverless/workers/overview.mdx | 4 +-
tutorials/introduction/containers.mdx | 170 ++++---
.../containers/create-dockerfiles.mdx | 248 ++++++++--
.../containers/docker-commands.mdx | 467 ++++++++++++++++--
.../introduction/containers/persist-data.mdx | 285 +++++++++--
11 files changed, 993 insertions(+), 209 deletions(-)
diff --git a/pods/overview.mdx b/pods/overview.mdx
index 33b12605..f077225a 100644
--- a/pods/overview.mdx
+++ b/pods/overview.mdx
@@ -19,7 +19,7 @@ When you're ready to get started, [follow this tutorial](/get-started) to create
Each Pod consists of these core components:
-- **Container environment**: An Ubuntu Linux-based container that can run almost any compatible software.
+- **Container environment**: An Ubuntu Linux-based [container](/tutorials/introduction/containers) that can run almost any compatible software.
- **Unique identifier**: Each Pod receives a dynamic ID (e.g., `2s56cp0pof1rmt`) for management and access.
- [Storage](#storage-options):
- : Houses the operating system and temporary storage.
@@ -30,11 +30,11 @@ Each Pod consists of these core components:
## Pod templates
-Pod are pre-configured Docker image setups that let you quickly spin up Pods without manual environment configuration. They're essentially deployment configurations that include specific models, frameworks, or workflows bundled together.
+Pod are pre-configured [Docker image](/tutorials/introduction/containers#what-are-images) setups that let you quickly spin up Pods without manual environment configuration. They're essentially deployment configurations that include specific models, frameworks, or workflows bundled together.
Templates eliminate the need to manually set up environments, saving time and reducing configuration errors. For example, instead of installing PyTorch, configuring JupyterLab, and setting up all dependencies yourself, you can select an official Runpod PyTorch template and have everything ready to go instantly.
-To learn how to create your own custom templates, see [Build a custom Pod template](/pods/templates/create-custom-template).
+To learn how to create your own custom templates, see [Build a custom Pod template](/pods/templates/create-custom-template). If you're new to Docker, start with the [introduction to containers](/tutorials/introduction/containers) tutorial series.
## Storage
@@ -54,13 +54,11 @@ You can deploy Pods in several ways:
- [From a template](/pods/templates/overview): Pre-configured environments for quick setup of common workflows.
- **Custom containers**: Pull from any compatible container registry such as Docker Hub, GitHub Container Registry, or Amazon ECR.
-- **Custom images**: Build and deploy your own container images.
+- **Custom images**: [Build and deploy your own container images](/tutorials/introduction/containers/create-dockerfiles).
- [From Serverless repos](/hub/overview#deploy-as-a-pod): Deploy any -compatible repository from the directly as a Pod, providing a cost-effective option for consistent workloads.
-
-When building a container image for Runpod on a Mac (Apple Silicon), use the flag `--platform linux/amd64` to ensure your image is compatible with the platform.
-
+When building a container image for Runpod on a Mac (Apple Silicon), use the flag `--platform linux/amd64` to ensure your image is compatible with the platform. Learn more about [building Docker images](/tutorials/introduction/containers/create-dockerfiles#building-for-runpod).
## Connecting to your Pod
diff --git a/pods/templates/create-custom-template.mdx b/pods/templates/create-custom-template.mdx
index 21c06289..acb7ba31 100644
--- a/pods/templates/create-custom-template.mdx
+++ b/pods/templates/create-custom-template.mdx
@@ -36,6 +36,10 @@ Before you begin, you'll need:
- A Docker Hub account (or access to another container registry).
- Basic familiarity with Docker and Python.
+
+New to Docker? Learn the fundamentals with the [introduction to containers](/tutorials/introduction/containers) tutorial series, which covers [creating Dockerfiles](/tutorials/introduction/containers/create-dockerfiles), [Docker commands](/tutorials/introduction/containers/docker-commands), and [data persistence](/tutorials/introduction/containers/persist-data).
+
+
## Step 1: Set up your project structure
First, create a directory for your custom template and the necessary files.
diff --git a/pods/templates/overview.mdx b/pods/templates/overview.mdx
index 465947d5..11a59dcd 100644
--- a/pods/templates/overview.mdx
+++ b/pods/templates/overview.mdx
@@ -5,7 +5,7 @@ description: "Streamline your Pod deployments with templates, bundling prebuilt
import { PodTooltip, PodEnvironmentVariablesTooltip } from "/snippets/tooltips.jsx";
- templates are pre-configured Docker image setups that let you quickly spin up Pods without manual environment configuration. They're essentially deployment configurations that include specific models, frameworks, or workflows bundled together.
+ templates are pre-configured [Docker image](/tutorials/introduction/containers#what-are-images) setups that let you quickly spin up Pods without manual environment configuration. They're essentially deployment configurations that include specific models, frameworks, or workflows bundled together.
Templates eliminate the need to manually set up environments, saving time and reducing configuration errors. For example, instead of installing PyTorch, configuring JupyterLab, and setting up all dependencies yourself, you can select a pre-configured template and have everything ready to go instantly.
diff --git a/serverless/overview.mdx b/serverless/overview.mdx
index 339d43e5..4d7d38b5 100644
--- a/serverless/overview.mdx
+++ b/serverless/overview.mdx
@@ -47,7 +47,7 @@ The access point for your Serverless application. Endpoints provide a URL where
### [Workers](/serverless/workers/overview)
-The container instances that execute your code when requests arrive at your endpoint. Runpod automatically manages worker lifecycle, starting them when needed and stopping them when idle to optimize resource usage.
+The container instances that execute your code when requests arrive at your endpoint. Each worker runs your custom [Docker container](/tutorials/introduction/containers) with your application code and dependencies. Runpod automatically manages worker lifecycle, starting them when needed and stopping them when idle to optimize resource usage.
### [Handler functions](/serverless/workers/handler-functions)
diff --git a/serverless/quickstart.mdx b/serverless/quickstart.mdx
index c102f504..c72b52cc 100644
--- a/serverless/quickstart.mdx
+++ b/serverless/quickstart.mdx
@@ -143,6 +143,10 @@ INFO | Local testing complete, exiting.
Create a file named `Dockerfile` with the following content:
+
+New to Dockerfiles? Learn the fundamentals with our [introduction to containers](/tutorials/introduction/containers) tutorial series, which covers [creating Dockerfiles](/tutorials/introduction/containers/create-dockerfiles), [Docker commands](/tutorials/introduction/containers/docker-commands), and [persisting data](/tutorials/introduction/containers/persist-data).
+
+
```dockerfile Dockerfile
FROM python:3.10-slim
diff --git a/serverless/workers/create-dockerfile.mdx b/serverless/workers/create-dockerfile.mdx
index 698cb816..d29cf7e6 100644
--- a/serverless/workers/create-dockerfile.mdx
+++ b/serverless/workers/create-dockerfile.mdx
@@ -7,6 +7,10 @@ import { HandlerFunctionTooltip, CUDATooltip } from "/snippets/tooltips.jsx";
A Dockerfile defines the build process for a Docker image containing your and all its dependencies. This page explains how to organize your project files and create a Dockerfile for your Serverless worker.
+
+New to Docker? Learn the fundamentals with the [introduction to containers](/tutorials/introduction/containers) tutorial series, which covers [creating Dockerfiles](/tutorials/introduction/containers/create-dockerfiles), [essential Docker commands](/tutorials/introduction/containers/docker-commands), and [data persistence](/tutorials/introduction/containers/persist-data).
+
+
## Project organization
Organize your project files in a clear directory structure:
diff --git a/serverless/workers/overview.mdx b/serverless/workers/overview.mdx
index e4d0a477..48e4c46a 100644
--- a/serverless/workers/overview.mdx
+++ b/serverless/workers/overview.mdx
@@ -5,7 +5,9 @@ description: "Package your handler function for deployment."
import { WorkerContainerDiskTooltip, MachineTooltip, NetworkVolumeTooltip } from "/snippets/tooltips.jsx";
-Workers are the containerized environments that run your code on Runpod Serverless. After creating and testing your [handler function](/serverless/workers/handler-functions), you need to package it into a Docker image and deploy it to an endpoint.
+Workers are the containerized environments that run your code on Runpod Serverless. After creating and testing your [handler function](/serverless/workers/handler-functions), you need to package it into a Docker image and deploy it to an endpoint.
+
+If you're new to containers and Docker, start with the [introduction to containers](/tutorials/introduction/containers) tutorial series to learn the fundamentals.
This page provides an overview of the worker deployment process.
diff --git a/tutorials/introduction/containers.mdx b/tutorials/introduction/containers.mdx
index 0510ee9c..d09dad64 100644
--- a/tutorials/introduction/containers.mdx
+++ b/tutorials/introduction/containers.mdx
@@ -1,112 +1,136 @@
---
-title: "Overview"
-sidebar_label: Overview
-description: Learn about containers and how to use them with Runpod
+title: "Introduction to containers"
+sidebarTitle: "Overview"
+description: "Learn about Docker containers, images, and how they enable portable application deployment on Runpod."
---
-## What are containers?
+Containers are the foundation of modern cloud computing, enabling you to package applications with all their dependencies and run them consistently across different environments. This tutorial series teaches you the container fundamentals you need to work effectively with Runpod's [Serverless](/serverless/overview) and [Pods](/pods/overview) platforms.
+
+## What you'll learn
+
+In this tutorial series, you will learn:
+
+- What containers and images are and why they matter for cloud deployment.
+- How to create custom Docker images using Dockerfiles.
+- Essential Docker commands for building, running, and managing containers.
+- How to persist data outside of containers using volumes.
+- How container concepts apply to Runpod's Serverless and Pods platforms.
+
+## Requirements
-> A container is an isolated environment for your code. This means that a container has no knowledge of your operating system, or your files. It runs on the environment provided to you by Docker Desktop. Containers have everything that your code needs in order to run, down to a base operating system.
+To follow this tutorial series, you need:
+
+- Docker Desktop installed on your system.
+- Basic command-line familiarity.
+- A text editor for creating Dockerfiles and scripts.
+
+If you haven't installed Docker yet, follow Docker's [official installation guide](https://docs.docker.com/get-started/get-docker/) for your operating system.
+
+## What are containers?
-[From Docker's website](https://docs.docker.com/guides/walkthroughs/what-is-a-container/#:~:text=A%20container%20is%20an%20isolated,to%20a%20base%20operating%20system)
+A container is an isolated environment for your code. Containers package applications with everything they need to run, including the code, runtime, system tools, libraries, and settings. Unlike virtual machines, containers share the host operating system's kernel, making them lightweight and fast to start.
-Developers package their applications, frameworks, and libraries into a Docker container. Then, those containers can run outside their development environment.
+When you run a container, it has no knowledge of your operating system or your files. It runs in a self-contained environment with only the resources and files you explicitly provide. This isolation ensures that applications behave consistently regardless of where they run—whether on your laptop, a teammate's computer, or in the cloud.
### Why use containers?
-> Build, ship, and run anywhere.
+Containers solve the classic "it works on my machine" problem by ensuring applications run identically across all environments. This consistency makes containers essential for modern software development and deployment.
-Containers are self-contained and run anywhere Docker runs. This means you can run a container on-premises or in the cloud, as well as in hybrid environments. Containers include both the application and any dependencies, such as libraries and frameworks, configuration data, and certificates needed to run your application.
+Key benefits include:
-In cloud computing, you get the best cold start times with containers.
+- **Portability**: Run containers anywhere Docker runs—on-premises, in the cloud, or in hybrid environments.
+- **Consistency**: Applications behave the same way in development, testing, and production.
+- **Fast cold starts**: Containers start in seconds, making them ideal for serverless computing.
+- **Dependency isolation**: Each container includes its own dependencies, preventing conflicts between applications.
+- **Resource efficiency**: Containers share the host OS kernel, using fewer resources than virtual machines.
+
+### Containers and Runpod
+
+Runpod uses containers extensively across its platform:
+
+- **Serverless workers**: When you [deploy a Serverless endpoint](/serverless/quickstart), you provide a container image that defines how your [worker processes requests](/serverless/workers/overview). Your handler code runs inside the container, and Runpod automatically scales workers up and down based on demand.
+- **Pods**: With [Pods](/pods/overview), you can bring your own container (BYOC) to [run long-running GPU workloads](/pods/manage-pods) like training, inference servers, or development environments. [Choose from pre-built templates](/pods/choose-a-pod) or deploy custom containers.
+- **Templates**: Runpod's [templates](/pods/templates/overview) are pre-configured container images optimized for specific tasks. You can [create custom templates](/pods/templates/create-custom-template) to standardize your container configurations across deployments.
+
+Understanding containers is essential for leveraging Runpod's full capabilities.
## What are images?
-Docker images are fixed templates for creating containers. They ensure that applications operate consistently and reliably across different environments, which is vital for modern software development.
+Docker images are read-only templates used to create containers. Think of an image as a snapshot that includes your application code, runtime environment, libraries, and all dependencies needed to run your application.
-To create Docker images, you use a process known as "Docker build." This process uses a Dockerfile, a text document containing a sequence of commands, as instructions guiding Docker on how to build the image.
+Images are built using a Dockerfile, which contains a series of instructions for assembling the image. Once built, images can be stored in registries like Docker Hub or private registries, making them easy to share and deploy.
-### Why use images?
+### How images and containers relate
+
+The relationship between images and containers is similar to the relationship between a class and an instance in programming:
-Using Docker images helps in various stages of software development, including testing, development, and deployment. Images ensure a seamless workflow across diverse computing environments.
+- **Image**: A template or blueprint (like a class definition).
+- **Container**: A running instance created from an image (like an object).
-### Why not use images?
+You can create multiple containers from a single image, and each container runs independently with its own state and data.
+
+### Why use images?
-You must rebuild and push the container image, then edit your endpoint to use the new image each time you iterate on your code. Since development requires changing your code every time you need to troubleshoot a problem or add a feature, this workflow can be inconvenient.
+Docker images provide several advantages for development and deployment:
-### What is Docker Hub?
+- **Reproducibility**: Images ensure your application runs the same way every time.
+- **Version control**: You can tag images with version numbers and roll back to previous versions if needed.
+- **Easy distribution**: Store images in registries and pull them onto any system with Docker installed.
+- **Efficient storage**: Images use layers that can be shared between different images, reducing storage requirements.
-After their creation, Docker images are stored in a registry, such as Docker Hub. From these registries, you can download images and use them to generate containers, which make it easy to widely distribute and deploy applications.
+### When to use custom images
-Now that you've got an understanding of Docker, containers, images, and whether containerization is right for you, let's move on to installing Docker.
+While you can use pre-built images from Docker Hub, you'll often need custom images that:
-## Installing Docker
+- Include your specific application code.
+- Install custom dependencies or libraries.
+- Configure [environment variables](/serverless/development/environment-variables) or settings.
+- Optimize for your specific use case.
-For this walkthrough, install Docker Desktop. Docker Desktop bundles a variety of tools including:
+For Runpod Serverless deployments, you'll create custom images that include your [handler functions](/serverless/workers/handler-functions) and model dependencies. For Pods, you might use existing [templates](/pods/templates/manage-templates) or create custom ones with your specific tools and frameworks installed.
-* Docker GUI
-* Docker CLI
-* Docker extensions
-* Docker Compose
+## What is Docker Hub?
-The majority of this walkthrough uses the Docker CLI, but feel free to use the GUI if you prefer.
+[Docker Hub](https://hub.docker.com/) is the default public registry for Docker images. It hosts millions of pre-built images for popular software, frameworks, and operating systems. You can pull images from Docker Hub, use them as base images for your own containers, or push your custom images to share with others.
-For the best installation experience, see Docker's [official documentation](https://docs.docker.com/get-started/get-docker/).
+When you run a command like `docker run nginx`, Docker automatically pulls the `nginx` image from Docker Hub if it's not already available locally.
-### Running your first command
+For production deployments on Runpod, you'll typically push your custom images to Docker Hub or a private registry, then configure your endpoint or Pod to use that image.
-Now that you've installed Docker, open a terminal window and run the following command:
+## Tutorial series
-```
-docker version
-```
+This tutorial series guides you through container fundamentals in a hands-on way:
-You should see something similar to the following output.
+
+
+ Learn how to write Dockerfiles, build custom images, and run your first container. Start with Docker installation and work up to creating executable containers.
+
-```swift
-docker version
-Client: Docker Engine - Community
- Version: 24.0.7
- API version: 1.43
- Go version: go1.21.3
- Git commit: afdd53b4e3
- Built: Thu Oct 26 07:06:42 2023
- OS/Arch: darwin/arm64
- Context: desktop-linux
+
+ Master essential Docker CLI commands for building, running, managing, and debugging containers. Includes Runpod-specific guidance for deployment.
+
-Server: Docker Desktop 4.26.1 (131620)
- Engine:
- Version: 24.0.7
- API version: 1.43 (minimum version 1.12)
- Go version: go1.20.10
- Git commit: 311b9ff
- Built: Thu Oct 26 09:08:15 2023
- OS/Arch: linux/arm64
- Experimental: false
- containerd:
- Version: 1.6.25
- GitCommit: abcd
- runc:
- Version: 1.1.10
- GitCommit: v1.1.10-0-g18a0cb0
- docker-init:
- Version: 0.19.0
-```
+
+ Understand how to use Docker volumes to persist data outside of containers, essential for machine learning training and data processing workflows.
+
+
-If at any point you need help with a command, you can use the `--help` flag to see documentation on the command you're running.
+## Next steps
-```python
-docker --help
-```
+Ready to get hands-on with Docker? Start with [creating your first Dockerfile](/tutorials/introduction/containers/create-dockerfiles).
-Let's run `busybox` from the command line to print out today's date.
+For more in-depth container concepts, see Docker's [container concepts documentation](https://docs.docker.com/guides/docker-concepts/).
-```python
-docker run busybox sh -c 'echo "The time is: $(date)"'
-# The time is: Thu Jan 11 06:35:39 UTC 2024
-```
+When you're ready to deploy containers on Runpod:
-* `busybox` is a lightweight Docker image with the bare minimum Linux utilities installed, including `echo`
-* The `echo` command prints the container's uptime.
+**For Serverless:**
+- Learn about [Serverless workers](/serverless/workers/overview) for scalable, GPU-powered inference.
+- Review [creating Dockerfiles for Serverless](/serverless/workers/create-dockerfile) with Runpod-specific best practices.
+- Understand [endpoint configurations](/serverless/endpoints/endpoint-configurations) for optimizing performance and cost.
+- Explore [deploying your first endpoint](/serverless/quickstart) to get started quickly.
-You've successfully installed Docker and run your first commands.
+**For Pods:**
+- Explore [Pods](/pods/overview) for long-running GPU workloads and development environments.
+- Learn how to [choose the right Pod](/pods/choose-a-pod) for your workload.
+- Understand [connecting to Pods](/pods/connect-to-a-pod) via SSH, web terminal, or IDE.
+- Review [Pod storage options](/pods/storage/types) for persisting data.
diff --git a/tutorials/introduction/containers/create-dockerfiles.mdx b/tutorials/introduction/containers/create-dockerfiles.mdx
index 91243477..fc439c7e 100644
--- a/tutorials/introduction/containers/create-dockerfiles.mdx
+++ b/tutorials/introduction/containers/create-dockerfiles.mdx
@@ -1,82 +1,250 @@
---
-title: "Dockerfile"
+title: "Create Dockerfiles"
+sidebarTitle: "Create Dockerfiles"
+description: "Learn how to write Dockerfiles, build custom images, and run your first containers."
---
-In the previous step, you ran a command that prints the container's uptime. Now you'll create a Dockerfile to customize the contents of your own Docker image.
+A Dockerfile is a text file containing instructions for building a Docker image. By creating a Dockerfile, you can package your application with its dependencies and configuration, making it easy to deploy anywhere Docker runs. This guide walks you through creating your first Dockerfile, building an image, and running a container.
-### Create a Dockerfile
+## What you'll learn
-Create a new file called `Dockerfile` and add the following items.
+In this guide, you will learn how to:
+
+- Verify your Docker installation.
+- Run your first container from an existing image.
+- Write a Dockerfile with common instructions.
+- Create and configure an entrypoint script.
+- Build a custom Docker image.
+- Run containers from your custom image.
+
+## Requirements
+
+Before starting, you need:
+
+- Docker Desktop installed and running (see the [overview](/tutorials/introduction/containers) for installation instructions).
+- Basic command-line familiarity.
+- A text editor for creating files.
+
+## Step 1: Verify Docker installation
+
+First, verify that Docker is installed correctly by checking the version:
+
+```bash
+docker version
+```
+
+You should see output showing both the client and server versions. If you see an error about the Docker daemon not running, make sure Docker Desktop is started.
+
+To test that Docker can pull and run images, run a simple command using the `busybox` image:
```bash
+docker run busybox echo "Hello from Docker!"
+```
+
+This command downloads the lightweight `busybox` image (if not already present), starts a container from it, runs the `echo` command inside the container, and then exits. You should see "Hello from Docker!" printed to your terminal.
+
+Breaking down what happened:
+
+- `docker run`: Creates and starts a new container.
+- `busybox`: The image to use (automatically pulled from Docker Hub if needed).
+- `echo "Hello from Docker!"`: The command to run inside the container.
+
+## Step 2: Create a project directory
+
+Create a new directory for this tutorial and navigate into it:
+
+```bash
+mkdir my-first-container
+cd my-first-container
+```
+
+This keeps your Dockerfile and related files organized in one place.
+
+## Step 3: Write a Dockerfile
+
+Create a file named `Dockerfile` (no file extension) with the following content:
+
+```dockerfile
FROM busybox
COPY entrypoint.sh /
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
```
-This Dockerfile starts from the `busybox` image like we used before. It then adds a custom `entrypoint.sh` script, makes it executable, and configures it as the entrypoint.
+Let's understand each instruction:
-## The entrypoint script
+**FROM busybox**: This specifies the base image for your container. `busybox` is a minimal Linux image with basic utilities. Every Dockerfile must start with a `FROM` instruction. For real applications, you might use images like `python:3.11`, `node:20`, or `nvidia/cuda:12.0.0-runtime-ubuntu22.04`.
-Now let's create `entrypoint.sh` with the following contents:
+**COPY entrypoint.sh /**: This copies the `entrypoint.sh` file from your local directory into the root directory of the container's filesystem. The `COPY` instruction is how you add your application code and files to the image.
-```bash
+**RUN chmod +x /entrypoint.sh**: This executes a command during the image build process to make the script executable. `RUN` instructions execute commands and save the results as a new layer in the image.
+
+**ENTRYPOINT ["/entrypoint.sh"]**: This specifies the command that runs when a container starts from this image. Using the JSON array syntax (with brackets and quotes) is recommended as it prevents shell processing quirks.
+
+## Step 4: Create the entrypoint script
+
+Create a file named `entrypoint.sh` in the same directory:
+
+```sh
#!/bin/sh
-echo "The time is: $(date)"
+echo "Container started at: $(date)"
+echo "Running on: $(uname -a)"
```
-
+This simple script prints the current date/time and system information when the container starts.
-While we named this script `entrypoint.sh` you will see a variety of naming conventions; such as:
+### Understanding entrypoint scripts
-* `start.sh`
-* `CMD.sh`
-* `entry_path.sh`
+An entrypoint script is the command that runs when your container starts. Think of it as the "main" function of your container. Common uses include:
-These files are normally placed in a folder called `script` but it is dependent on the maintainers of that repository.
+- **Starting applications**: Launch a web server, API, or background process.
+- **Setup tasks**: Initialize databases, check configurations, or [set environment variables](/pods/templates/environment-variables).
+- **Processing workflows**: Run data processing pipelines or batch jobs.
-
+For Runpod Serverless workers, your entrypoint typically starts a Python script that imports the `runpod` library and defines your [handler function](/serverless/workers/handler-functions). For example, you might run `python handler.py` which calls `runpod.serverless.start()`. For Pods, the entrypoint might start [JupyterLab](/tutorials/pods/run-your-first), a training script, or a development environment like VS Code.
-This is a simple script that will print the current time when the container starts.
+
+While we named this script `entrypoint.sh`, you'll see various naming conventions in Docker projects:
-### Why an entrypoint script:
+- `start.sh`
+- `docker-entrypoint.sh`
+- `run.sh`
+- `cmd.sh`
-* It lets you customize what command gets run when a container starts from your image.
-* For example, our script runs date to print the time.
-* Without it, containers would exit immediately after starting.
-* Entrypoints make images executable and easier to reuse.
+These scripts are often placed in a `/scripts` or `/app` directory, depending on the project structure.
+
-## Build the image
+## Step 5: Build the image
-With those files created, we can now build a Docker image using our Dockerfile:
+Now build a Docker image from your Dockerfile:
+```bash
+docker build -t my-time-image .
```
-docker image build -t my-time-image .
-```
-This will build the image named `my-time-image` from the Dockerfile in the current directory.
+Breaking down this command:
+
+- `docker build`: Initiates the image build process.
+- `-t my-time-image`: Tags the image with a name for easy reference. Without a tag, you'd have to use the image ID.
+- `.`: Specifies the build context (current directory). Docker looks for a Dockerfile here and can access files in this directory.
-### Why build a custom image:
+You'll see output showing each Dockerfile instruction being executed. Docker builds images in layers, and each instruction creates a new layer. These layers are cached, so rebuilding after small changes is fast.
-* Lets you package up custom dependencies and configurations.
-* For example you can install extra software needed for your app.
-* Makes deploying applications more reliable and portable.
-* Instead of installing things manually on every server, just use your image.
-* Custom images can be shared and reused easily across environments.
-* Building images puts your application into a standardized unit that "runs anywhere".
-* You can version images over time as you update configurations.
+### Why build custom images?
-## Run the image
+Custom images let you:
-Finally, let's run a container from our new image:
+- **Package dependencies**: Install specific libraries, frameworks, or tools your application needs.
+- **Configure environments**: Set [environment variables](/serverless/development/environment-variables), create directories, or configure settings.
+- **Include application code**: Bundle your code so it's ready to run anywhere.
+- **Version applications**: Tag images with version numbers to track changes over time.
+- **Ensure consistency**: Eliminate "works on my machine" problems by standardizing the environment.
+For Runpod deployments, custom images are essential:
+
+- **Serverless workers** need images with your [handler code](/serverless/workers/handler-functions), inference libraries, and optionally [cached models](/serverless/endpoints/model-caching).
+- **Pods** might need images with specific ML frameworks, CUDA versions, development tools, or [custom configurations saved as templates](/pods/templates/create-custom-template).
+
+## Step 6: Run the container
+
+Run a container from your newly built image:
+
+```bash
+docker run my-time-image
```
+
+You should see output showing the container start time and system information, confirming that your entrypoint script ran successfully.
+
+The container executes the script and then exits. This is normal behavior for containers that complete their task. In production, containers often run continuously (like web servers) or process tasks and exit (like batch jobs or Serverless functions).
+
+## Step 7: Experiment with your container
+
+Try a few variations to understand how containers work:
+
+**Run the container multiple times** to see different timestamps:
+
+```bash
+docker run my-time-image
docker run my-time-image
```
-We should see the same output as before printing the current time!
+Each invocation creates a new container instance with a fresh environment.
+
+**View running containers** (in another terminal, run a long-running command):
+
+```bash
+docker run busybox sleep 30
+```
+
+Then in another terminal:
+
+```bash
+docker ps
+```
+
+This shows currently running containers with their IDs, names, and status.
+
+**See all containers**, including stopped ones:
+
+```bash
+docker ps -a
+```
+
+You'll see all the containers you've created, even those that have exited.
+
+## Understanding Dockerfile best practices
+
+As you create more complex Dockerfiles, keep these practices in mind:
+
+**Use specific base image tags**: Instead of `FROM python:3`, use `FROM python:3.11-slim` to ensure consistent builds.
+
+**Minimize layers**: Combine related `RUN` commands with `&&` to reduce image size:
+
+```dockerfile
+RUN apt-get update && \
+ apt-get install -y package1 package2 && \
+ apt-get clean
+```
+
+**Order instructions by change frequency**: Put instructions that change rarely (like installing system packages) before instructions that change often (like copying application code). This maximizes layer caching.
+
+**Clean up in the same layer**: Remove temporary files in the same `RUN` command that creates them:
+
+```dockerfile
+RUN wget https://example.com/file.tar.gz && \
+ tar -xzf file.tar.gz && \
+ rm file.tar.gz
+```
+
+For more best practices, see Docker's [Dockerfile reference documentation](https://docs.docker.com/reference/dockerfile/).
+
+## Building for Runpod
+
+When building images for Runpod, keep these platform-specific considerations in mind:
+
+**Use the correct architecture**: Runpod's infrastructure uses `linux/amd64` architecture. If you're building on an Apple Silicon Mac (ARM64), specify the platform:
+
+```bash
+docker build --platform=linux/amd64 -t my-image .
+```
+
+**Optimize for cold starts**: Smaller images start faster, reducing [cold start times](/serverless/overview#cold-starts) for Serverless workers. Use minimal base images like `-slim` or `-alpine` variants when possible.
+
+**Include model caching**: For ML models, consider using Runpod's [model caching feature](/serverless/endpoints/model-caching) instead of baking large models into your image. This dramatically reduces cold starts and deployment costs.
+
+**Configure GPU access**: For GPU workloads, ensure your base image includes the correct [CUDA version for your framework](/pods/choose-a-pod#gpu-compatibility).
+
+For detailed guidance on creating Dockerfiles for Serverless workers, see [creating Dockerfiles for Serverless](/serverless/workers/create-dockerfile).
+
+## Next steps
+
+Now that you can create Dockerfiles and build images, continue learning:
-Entrypoints and Dockerfiles let you define reusable, executable containers that run the software and commands you need. This makes deploying and sharing applications much easier without per-server configuration.
+**Continue the tutorial series:**
+- [Master Docker commands](/tutorials/introduction/containers/docker-commands) for building, running, and managing containers.
+- [Learn about data persistence](/tutorials/introduction/containers/persist-data) with Docker volumes.
-By putting commands like this into a Dockerfile, you can easily build reusable and shareable images.
+**Deploy on Runpod:**
+- For Serverless: [Deploy your first endpoint](/serverless/quickstart) and learn about [worker deployment](/serverless/workers/deploy).
+- For Pods: [Run your first Pod](/tutorials/pods/run-your-first) and explore [connecting to Pods](/pods/connect-to-a-pod).
+- Review [creating Dockerfiles for Serverless](/serverless/workers/create-dockerfile) with production best practices.
diff --git a/tutorials/introduction/containers/docker-commands.mdx b/tutorials/introduction/containers/docker-commands.mdx
index e4ccdc5f..9febbd10 100644
--- a/tutorials/introduction/containers/docker-commands.mdx
+++ b/tutorials/introduction/containers/docker-commands.mdx
@@ -1,87 +1,468 @@
---
-title: "Docker commands"
+title: "Docker command reference"
+sidebarTitle: "Docker commands"
+description: "Essential Docker CLI commands for building, running, managing, and debugging containers."
---
-Runpod enables bring-your-own-container (BYOC) development. If you choose this workflow, you will be using Docker commands to build, run, and manage your containers.
+This reference guide covers the most commonly used Docker commands for working with images and containers. Use this as a quick reference when building and deploying applications, especially for Runpod's bring-your-own-container (BYOC) workflows with [Serverless](/serverless/workers/overview) and [Pods](/pods/overview).
-The following is a reference sheet to some of the most commonly used Docker commands.
+## What you'll learn
-## Login
+In this reference, you'll find:
-Log in to a registry (like Docker Hub) from the CLI. This saves credentials locally.
+- Commands for building and managing Docker images.
+- Commands for running and controlling containers.
+- Commands for working with volumes and networks.
+- Common command workflows for development and deployment.
+- Runpod-specific guidance for container deployment.
+## Building images
+
+These commands help you create and manage Docker images.
+
+### docker build
+
+Builds a Docker image from a Dockerfile. This is how you create custom images with your application code and dependencies.
+
+```bash
+# Build an image from the current directory
+docker build -t myapp:latest .
+
+# Build with a specific Dockerfile
+docker build -f Dockerfile.prod -t myapp:prod .
+
+# Build for a specific platform (important for Runpod)
+docker build --platform=linux/amd64 -t myapp:latest .
+```
+
+**Common options:**
+
+- `-t, --tag`: Name and optionally tag the image in `name:tag` format.
+- `-f, --file`: Specify a Dockerfile (default is `./Dockerfile`).
+- `--platform`: Set target platform for the build (use `linux/amd64` for Runpod).
+- `--no-cache`: Build without using cache from previous builds.
+- `--build-arg`: Set build-time variables defined in the Dockerfile.
+
+
+**For Runpod deployments**: Always use `--platform=linux/amd64` when building on Apple Silicon Macs or ARM systems. Runpod's infrastructure requires AMD64 (x86_64) architecture images.
+
+
+### docker images
+
+Lists Docker images available on your local system.
+
+```bash
+# List all images
+docker images
+
+# List images with specific name
+docker images myapp
+
+# Show all images including intermediate layers
+docker images -a
+```
+
+Each image shows its repository, tag, image ID, creation date, and size. Image IDs are useful when you need to reference an untagged image or want to be precise about which image to use.
+
+### docker tag
+
+Creates a new tag for an existing image, useful for versioning or preparing images for registry pushes.
+
+```bash
+# Tag an image for Docker Hub
+docker tag myapp:latest username/myapp:v1.0
+
+# Tag an image for a private registry
+docker tag myapp:latest registry.example.com/myapp:latest
+```
+
+Tags don't create copies of images; they're just additional names pointing to the same image data.
+
+### docker rmi
+
+Removes Docker images from your local system. Useful for cleaning up unused images and freeing disk space.
+
+```bash
+# Remove an image by name
+docker rmi myapp:latest
+
+# Remove an image by ID
+docker rmi abc123def456
+
+# Force remove an image even if containers use it
+docker rmi -f myapp:latest
+
+# Remove all unused images
+docker image prune
```
+
+You can't remove an image if running containers are using it (unless you force it with `-f`).
+
+## Managing images in registries
+
+These commands help you share images via Docker registries.
+
+### docker login
+
+Authenticates with a Docker registry to push or pull private images.
+
+```bash
+# Log in to Docker Hub
docker login
+
+# Log in with username
docker login -u myusername
+
+# Log in to a private registry
+docker login registry.example.com
```
-## Images
+Credentials are stored locally, so you only need to log in once per registry. For Runpod, you'll typically push images to Docker Hub or a private registry, then configure your endpoint or Pod to pull from that registry.
-`docker push` - Uploads a container image to a registry like Docker Hub. `docker pull` - Downloads container images from a registry like Docker Hub. `docker images` - Lists container images that have been downloaded locally. `docker rmi` - Deletes/removes a Docker container image from the machine.
+### docker push
+
+Uploads a Docker image to a registry, making it available for deployment.
```bash
-docker push myuser/myimage:v1 # Push custom image
-docker pull someimage # Pull shared image
-docker images # List downloaded images
-docker rmi # Remove/delete image
+# Push to Docker Hub
+docker push username/myapp:latest
+
+# Push a specific version
+docker push username/myapp:v1.0
```
-## Containers
+Before pushing, make sure you've tagged your image with your registry username or private registry URL. For Runpod:
-`docker run` - Launches a new container from a Docker image. `docker ps` - Prints out a list of containers currently running. `docker logs` - Shows stdout/stderr logs for a specific container. `docker stop/rm` - Stops or totally removes a running container.
+- **Serverless**: After pushing your image, specify the image name when [deploying your worker](/serverless/workers/deploy). Runpod pulls the image when creating workers.
+- **Pods**: Reference your registry image when [choosing a Pod template](/pods/choose-a-pod) or [creating a custom template](/pods/templates/create-custom-template).
-```python
-docker run # Start new container from image
-docker ps # List running containers
-docker logs # Print logs from container
-docker stop # Stop running container
-docker rm # Remove/delete container
+### docker pull
+
+Downloads a Docker image from a registry to your local system.
+
+```bash
+# Pull latest version
+docker pull nginx
+
+# Pull specific version
+docker pull nginx:1.25
+
+# Pull from a private registry
+docker pull registry.example.com/myapp:latest
```
-## Dockerfile
+If you don't specify a tag, Docker pulls the `latest` tag by default. Be aware that `latest` doesn't necessarily mean the most recent version—it's just a tag name that image maintainers choose to use or not.
+
+## Running containers
+
+These commands create and manage running containers.
+
+### docker run
+
+Creates and starts a new container from an image. This is the most commonly used Docker command.
+
+```bash
+# Run a simple command
+docker run busybox echo "Hello World"
+
+# Run in detached mode (background)
+docker run -d nginx
+
+# Run with a name
+docker run --name my-container nginx
+
+# Run with port mapping
+docker run -p 8080:80 nginx
-`docker build` - Builds a Docker image by reading build instructions from a Dockerfile.
+# Run with volume mount
+docker run -v $(pwd)/data:/data myapp
-```python
-docker build # Build image from Dockerfile
-docker build --platform=linux/amd64 # Build for specific architecture
+# Run with environment variables
+docker run -e API_KEY=secret myapp
+
+# Run interactively with a shell
+docker run -it ubuntu /bin/bash
```
-
+**Common options:**
+
+- `-d, --detach`: Run container in background.
+- `-p, --publish`: Map host port to container port (`host:container`).
+- `-v, --volume`: Mount a volume (`host_path:container_path`).
+- `-e, --env`: Set environment variables.
+- `--name`: Assign a name to the container.
+- `-it`: Interactive mode with terminal (for shells).
+- `--rm`: Automatically remove container when it exits.
+- `--gpus all`: Enable GPU access (relevant for Runpod Pods).
+
+### docker ps
-For the purposes of using Docker with Runpod, you should ensure your build command uses the `--platform=linux/amd64` flag to build for the correct architecture.
+Lists running containers. Use this to check container status and get container IDs.
+
+```bash
+# List running containers
+docker ps
+
+# List all containers (including stopped)
+docker ps -a
+
+# Show only container IDs
+docker ps -q
+```
+
+The output shows container ID, image, command, creation time, status, ports, and name. Container IDs and names are useful for other commands like `docker stop`, `docker logs`, or `docker exec`.
+
+### docker stop
+
+Gracefully stops a running container by sending a SIGTERM signal, then SIGKILL if it doesn't stop within a timeout.
+
+```bash
+# Stop a container by name
+docker stop my-container
+
+# Stop a container by ID
+docker stop abc123
+
+# Stop multiple containers
+docker stop container1 container2 container3
+
+# Stop all running containers
+docker stop $(docker ps -q)
+```
+
+Stopped containers remain on your system until you remove them with `docker rm`.
+
+### docker start
+
+Starts a stopped container. Unlike `docker run`, this restarts an existing container rather than creating a new one.
+
+```bash
+# Start a stopped container
+docker start my-container
+
+# Start and attach to container output
+docker start -a my-container
+```
+
+### docker restart
+
+Stops and then starts a container. Useful for applying configuration changes or resolving issues.
+
+```bash
+docker restart my-container
+```
-
+### docker rm
+
+Removes stopped containers from your system.
+
+```bash
+# Remove a stopped container
+docker rm my-container
+
+# Force remove a running container
+docker rm -f my-container
+
+# Remove all stopped containers
+docker container prune
+```
+
+## Debugging containers
+
+These commands help you inspect and troubleshoot running containers.
+
+### docker logs
+
+Shows the stdout and stderr output from a container. Essential for debugging and monitoring.
+
+```bash
+# View logs
+docker logs my-container
+
+# Follow logs in real-time
+docker logs -f my-container
+
+# View last 100 lines
+docker logs --tail 100 my-container
+
+# View logs with timestamps
+docker logs -t my-container
+```
+
+For Runpod Serverless, you can view worker logs through the web console or API. For Pods, `docker logs` helps debug containers you're running during development.
+
+### docker exec
+
+Executes a command in a running container. Extremely useful for debugging and inspecting container state.
+
+```bash
+# Open a shell in a running container
+docker exec -it my-container /bin/bash
+
+# Run a command and see output
+docker exec my-container ls -la /app
+
+# Run as a specific user
+docker exec -u root my-container apt-get update
+```
+
+This is invaluable when you need to inspect files, check processes, or debug issues in a running container.
+
+### docker inspect
+
+Returns detailed low-level information about containers or images in JSON format.
+
+```bash
+# Inspect a container
+docker inspect my-container
+
+# Get specific information with formatting
+docker inspect --format='{{.State.Status}}' my-container
+
+# Inspect an image
+docker inspect nginx:latest
+```
+
+Useful for getting IP addresses, environment variables, mount points, and other configuration details.
## Volumes
-
+These commands manage persistent storage for containers.
+
+### docker volume create
+
+Creates a named volume that can persist data beyond container lifecycles.
+
+```bash
+# Create a volume
+docker volume create my-data
+
+# Create with specific driver
+docker volume create --driver local my-data
+```
+
+Named volumes are managed by Docker and stored in a Docker-managed location on the host. For more on volumes, see the [persist data guide](/tutorials/introduction/containers/persist-data).
+
+### docker volume ls
+
+Lists all volumes on your system.
+
+```bash
+docker volume ls
+```
+
+### docker volume rm
-When working with a Docker and Runpod, see how to [attach a network volume](/storage/network-volumes).
+Removes a volume. The volume must not be in use by any containers.
+
+```bash
+docker volume rm my-data
+```
-
+
+When working with Runpod, see how to [attach network volumes](/storage/network-volumes) to persist data across Serverless workers or Pod instances. Network volumes provide persistent storage that survives container restarts and can be accessed by multiple workers or Pods.
+
-`docker volume create` - Creates a persisted and managed volume that can outlive containers. `docker run -v` - Mounts a volume into a specific container to allow persisting data past container lifecycle.
+## Networks
+
+These commands manage Docker networks for container communication.
+
+### docker network create
+
+Creates a custom network that allows containers to communicate with each other.
+
+```bash
+# Create a bridge network
+docker network create my-network
-```ruby
-docker volume create # Create volume
-docker run -v :/data # Mount volume into container
+# Create with specific driver
+docker network create --driver bridge my-network
```
-## Network
+### docker network connect
-`docker network create` - Creates a custom virtual network for containers to communicate over. `docker run --network=` - Connects a running container to a Docker user-defined network.
+Connects a container to a network.
-```python
-docker network create # Create user-defined network
-docker run --network= # Connect container
+```bash
+docker network connect my-network my-container
```
-## Execute
+Containers on the same network can communicate using container names as hostnames.
+
+## Common workflows
+
+Here are typical command sequences for common tasks.
-`docker exec` - Execute a command in an already running container. Useful for debugging/inspecting containers:
+### Build, tag, and push a custom image
```bash
-docker exec
-docker exec mycontainer ls -l /etc # List files in container
+# Build for Runpod
+docker build --platform=linux/amd64 -t myapp:latest .
+
+# Tag for Docker Hub
+docker tag myapp:latest username/myapp:v1.0
+
+# Log in to Docker Hub
+docker login
+
+# Push to registry
+docker push username/myapp:v1.0
```
+
+### Develop with live code reloading
+
+```bash
+# Mount source code as a volume
+docker run -v $(pwd):/app -p 8000:8000 myapp
+
+# Changes to local files are reflected in the container
+```
+
+### Clean up unused resources
+
+```bash
+# Remove stopped containers
+docker container prune -f
+
+# Remove unused images
+docker image prune -a -f
+
+# Remove unused volumes
+docker volume prune -f
+
+# Remove everything unused (be careful!)
+docker system prune -a --volumes -f
+```
+
+### Debug a failing container
+
+```bash
+# Check if container is running
+docker ps -a
+
+# View logs
+docker logs my-container
+
+# Inspect container state
+docker inspect my-container
+
+# Execute shell for manual inspection
+docker exec -it my-container /bin/bash
+```
+
+## Learning more
+
+This reference covers the most essential Docker commands. For comprehensive documentation on all Docker CLI commands, see:
+
+- [Docker CLI reference](https://docs.docker.com/reference/cli/docker/)
+- [Dockerfile reference](https://docs.docker.com/reference/dockerfile/)
+- [Docker Compose reference](https://docs.docker.com/compose/compose-file/)
+
+## Next steps
+
+Now that you're familiar with Docker commands, explore:
+
+- [Persisting data with volumes](/tutorials/introduction/containers/persist-data) for machine learning workflows.
+- [Serverless worker deployment](/serverless/workers/deploy) to run your containers on Runpod.
+- [Creating Dockerfiles for Serverless](/serverless/workers/create-dockerfile) with Runpod-specific best practices.
+- [Pods overview](/pods/overview) to run long-running GPU containers.
diff --git a/tutorials/introduction/containers/persist-data.mdx b/tutorials/introduction/containers/persist-data.mdx
index f48e87b9..04f36164 100644
--- a/tutorials/introduction/containers/persist-data.mdx
+++ b/tutorials/introduction/containers/persist-data.mdx
@@ -1,96 +1,295 @@
---
-title: "Persist data outside of containers"
+title: "Persist data with volumes"
+sidebarTitle: "Persist data"
+description: "Learn how to use Docker volumes to persist data outside of containers for machine learning and data processing workflows."
---
-In the [previous step](/tutorials/introduction/containers/create-dockerfiles), you created a Dockerfile and executed a command. Now, you'll learn how to persist data outside of containers.
+By default, containers are ephemeral—when a container stops, any data written inside it is lost. For many use cases, especially machine learning training and data processing, you need data to persist beyond a container's lifecycle. Docker volumes solve this problem by providing persistent storage that exists outside the container filesystem.
-
+This guide shows you how to use volumes to persist data, a fundamental concept for working with Runpod's [Serverless](/serverless/workers/overview) and [Pods](/pods/overview) platforms.
-This walkthrough teaches you how to persist data outside of container. Runpod has the same concept used for attaching a Network Volume to your Pod.
+## What you'll learn
-Consult the documentation on [attaching a network volume to your Pod](/storage/network-volumes).
+In this guide, you will learn how to:
-
+- Understand why containers lose data when they stop.
+- Create Docker volumes for persistent storage.
+- Mount volumes to containers at runtime.
+- Read and write data to volumes.
+- Access volume data across multiple containers.
+- Apply these concepts to Runpod's storage solutions.
-## Why persist data outside a container?
+## Requirements
-The key goal is to have data persist across multiple container runs and removals.
+Before starting, you should have:
-By default, containers are ephemeral - everything inside them disappears when they exit.
+- Completed the [Dockerfile creation guide](/tutorials/introduction/containers/create-dockerfiles).
+- Docker Desktop installed and running.
+- Basic familiarity with Docker commands.
-So running something like:
+## Why persist data outside containers?
-```csharp
-docker run busybox date > file.txt
-```
+Containers are designed to be immutable and ephemeral. When a container stops or is removed, everything inside it—including files, data, and state—is deleted. This design makes containers portable and reproducible, but it creates a challenge when you need to preserve data.
+
+Consider these scenarios where persistence matters:
+
+**Machine learning training**: You train a model over hours or days. If the container stops, you lose all training progress, checkpoints, and the final model unless you save them outside the container.
-Would only write the date to `file.txt` temporarily inside that container. As soon as the container shuts down, that file and data is destroyed. This isn't great when you're training data and want your information to persist past your LLM training.
+**Data processing pipelines**: You process large datasets and generate results. Without persistent storage, you'd need to reprocess everything if the container restarts.
-Because of this, we need to persist data outside the container. Let's take a look at a workflow you can use to persist data outside a container.
+**Application state**: Databases, logs, user uploads, and configuration changes need to survive container restarts.
-***
+**Development workflows**: You want to edit code on your host machine and have changes immediately available inside the container without rebuilding the image.
-## Create a named volume
+Docker volumes provide the solution by storing data outside the container on the host system. When a container stops, the volume data remains intact and can be mounted to new containers.
-First, we'll create a named volume to represent the external storage:
+## Step 1: Create a named volume
+Start by creating a Docker volume that will store your persistent data:
+
+```bash
+docker volume create my-data
```
-docker volume create date-volume
+
+This creates a named volume called `my-data` managed by Docker. The volume exists independently of any container and persists until you explicitly delete it.
+
+You can verify the volume was created:
+
+```bash
+docker volume ls
```
-### Update Dockerfile
+You should see `my-data` in the list of volumes.
+
+### Understanding volume storage
+
+Docker stores volumes in a Docker-managed location on your host system (typically `/var/lib/docker/volumes/` on Linux). You don't need to worry about the exact location—Docker handles the storage details. The key point is that this storage exists outside any container's filesystem.
+
+## Step 2: Create your project files
-Next, we'll modify our Dockerfile to write the date output to a file rather than printing directly to stdout:
+For this example, you'll modify the Dockerfile from the previous guide to write data to a volume instead of just printing output.
+
+Create a new directory and navigate to it:
```bash
+mkdir volume-example
+cd volume-example
+```
+
+Create a `Dockerfile`:
+
+```dockerfile
FROM busybox
WORKDIR /data
-RUN touch current_date.txt
COPY entrypoint.sh /
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
```
-This sets the working directory to `/data`, touches a file called `current_date.txt`, and copies our script.
+This Dockerfile:
-### Update entrypoint script
+- Uses `busybox` as the base image.
+- Sets `/data` as the working directory (where our script will write files).
+- Copies and makes the entrypoint script executable.
+- Configures the script to run when containers start.
-The `entrypoint.sh` script is updated:
+Create an `entrypoint.sh` script:
-```bash
+```sh
#!/bin/sh
-date > /data/current_date.txt
+timestamp=$(date '+%Y-%m-%d %H:%M:%S')
+echo "Container started at: $timestamp" >> /data/timestamps.txt
+echo "Data written to /data/timestamps.txt"
+cat /data/timestamps.txt
+```
+
+This script:
+
+- Generates a timestamp.
+- Appends it to `/data/timestamps.txt` (using `>>` to append, not overwrite).
+- Prints confirmation and shows all timestamps.
+
+## Step 3: Build the image
+
+Build a Docker image from your Dockerfile:
+
+```bash
+docker build -t timestamp-logger .
```
-This will write the date to the `/data/current_date.txt` file instead of printing it.
+This creates an image called `timestamp-logger` that you can use to demonstrate persistent storage.
-## Mount the volume
+## Step 4: Run a container with a mounted volume
-Now when the container runs, this will write the date to the `/data/current_date.txt` file instead of printing it.
+Now run a container and mount your volume to the `/data` directory:
-Finally, we can mount the named volume to this data directory:
+```bash
+docker run -v my-data:/data timestamp-logger
+```
+
+Breaking down this command:
+
+- `docker run`: Creates and starts a new container.
+- `-v my-data:/data`: Mounts the `my-data` volume to `/data` inside the container.
+- `timestamp-logger`: The image to use.
+
+The `-v` flag creates a mount point. Files written to `/data` inside the container are actually written to the `my-data` volume on the host. This means the data persists even after the container exits.
+
+You should see output showing the timestamp was written and displaying the contents of the file.
+
+## Step 5: Verify data persistence
+
+Run the container again several times to see data persist across container instances:
+
+```bash
+docker run -v my-data:/data timestamp-logger
+docker run -v my-data:/data timestamp-logger
+docker run -v my-data:/data timestamp-logger
+```
+
+Each run creates a new container, but they all share the same volume. You should see the list of timestamps grow with each execution, proving that data persists beyond individual container lifecycles.
+
+This demonstrates the key benefit of volumes: data written by one container is available to other containers that mount the same volume.
+
+## Step 6: Access volume data from another container
+
+You can access the persisted data from any container that mounts the volume, even using a completely different image:
+
+```bash
+docker run --rm -v my-data:/data busybox cat /data/timestamps.txt
+```
+
+This command:
+
+- Runs a new `busybox` container (different from our custom image).
+- Mounts the same `my-data` volume to `/data`.
+- Runs `cat` to display the file contents.
+- Removes the container after it exits (`--rm` flag).
+
+You'll see all the timestamps from previous runs, demonstrating that volumes enable data sharing between containers.
+
+## Step 7: Inspect the volume
+
+You can get detailed information about a volume:
```bash
-docker run -v date-volume:/data my-time-image
+docker volume inspect my-data
```
-This runs a container from my-time-image and mounts the `date-volume` Docker volume to the /data directory in the container. Anything written to `/data` inside the container will now be written to the `date-volume` on the host instead of the container's ephemeral file system. This allows the data to persist. Once the container exits, the date output file is safely stored on the host volume.
+This shows the volume's mount point on the host system, when it was created, and other metadata. While you can technically access files directly at the mount point, it's better to interact with volumes through containers to avoid permission and compatibility issues.
+
+## Understanding volume mount syntax
-After the container exits, we can exec into another container sharing the volume to see the persisted data file:
+When using volumes, you specify mounts with the `-v` or `--mount` flag. The basic syntax is:
```bash
-docker run --rm -v date-volume:/data busybox cat /data/current_date.txt
+-v volume-name:/container/path
```
-This runs a new busybox container and also mounts the `date-volume`.
+Or for bind mounts (mounting host directories directly):
+
+```bash
+-v /host/absolute/path:/container/path
+```
+
+**Named volumes** (like `my-data`) are managed by Docker and recommended for most use cases. **Bind mounts** map specific host directories and are useful for development when you want live code reloading.
+
+### Volume mount options
+
+You can specify additional mount options:
+
+```bash
+# Mount read-only
+docker run -v my-data:/data:ro timestamp-logger
+
+# Create volume if it doesn't exist
+docker run -v new-volume:/data timestamp-logger
+```
+
+The `:ro` suffix makes the mount read-only inside the container, preventing accidental data modification.
+
+## Applying volumes to real-world scenarios
+
+### Machine learning training
+
+For ML training workflows, mount a volume to store:
+
+- **Training checkpoints**: Save model state at intervals so you can resume if interrupted.
+- **Final models**: Persist trained models for deployment.
+- **Training logs**: Keep TensorBoard logs or custom metrics.
+- **Datasets**: Store large datasets that don't change often.
+
+Example:
+
+```bash
+docker run -v ml-models:/models -v training-data:/data myapp/train
+```
+
+### Data processing pipelines
+
+For data processing, use volumes to:
+
+- **Store input data**: Mount datasets that multiple containers process.
+- **Save results**: Write processed data to a volume for downstream tasks.
+- **Cache intermediates**: Store intermediate processing results to avoid recomputation.
+
+### Development workflows
+
+During development, mount your source code as a volume for live reloading:
+
+```bash
+docker run -v $(pwd)/src:/app/src -p 8000:8000 myapp/dev
+```
+
+Changes to files in your local `src` directory immediately reflect inside the container without rebuilding the image.
+
+## Volumes and Runpod
+
+Runpod provides volume-like functionality through [network volumes](/storage/network-volumes), which work similarly to Docker volumes but with cloud-native features:
+
+**For Serverless**: Network volumes allow your workers to access shared data like models or datasets. Multiple workers can read from the same volume, avoiding the need to include large files in your container image. See [Serverless storage](/serverless/storage/overview) for details.
+
+**For Pods**: You can [attach network volumes](/storage/network-volumes) to Pods to persist data across Pod restarts or share data between Pods. This is essential for training workflows where you need to preserve checkpoints and models. See [Pod storage types](/pods/storage/types) for more information.
+
+Network volumes provide persistent storage that survives beyond individual containers, similar to the Docker volumes you've used in this guide, but optimized for cloud deployment.
+
+## Cleaning up volumes
+
+Volumes persist until you explicitly remove them. To clean up:
+
+```bash
+# Remove a specific volume
+docker volume rm my-data
+
+# Remove all unused volumes
+docker volume prune
+```
+
+Be careful with `docker volume prune`—it removes all volumes not currently in use by containers, potentially deleting important data.
+
+## Troubleshooting
+
+**Volume is empty after mounting**: If you mount a volume to a directory that exists in the image, the volume contents will appear instead of the image's original directory contents. The image directory contents aren't copied to the volume automatically.
+
+**Permission errors**: If you get permission errors when writing to a volume, it might be due to user ID mismatches. The container process runs as a specific user, and the volume permissions must allow that user to write. You may need to change permissions or run the container as a different user.
+
+**Volume doesn't persist after reboot**: Docker volumes persist across Docker restarts and system reboots. If you're losing data, verify you're using named volumes (not anonymous volumes) and not removing them accidentally.
+
+**Can't remove volume**: If you can't remove a volume, a container might be using it even if stopped. List all containers with `docker ps -a`, remove containers using the volume, then try removing the volume again.
+
+## Learning more
+
+For deeper coverage of Docker storage concepts, see Docker's official documentation:
-* Using the same -`v date-volume:/data mount` point maps the external volume dir to `/data` again.
-* This allows the new container to access the persistent date file that the first container wrote.
-* The `cat /data/current_date.txt` command prints out the file with the date output from the first container.
-* The `--rm`flag removes the container after running so we don't accumulate stopped containers.
+- [Volumes documentation](https://docs.docker.com/storage/volumes/)
+- [Bind mounts](https://docs.docker.com/storage/bind-mounts/)
+- [Storage drivers](https://docs.docker.com/storage/storagedriver/)
-
+## Next steps
-Remember, this is a general tutorial on Docker. These concepts will help give you a better understanding of working with Runpod.
+You now understand how to persist data with Docker volumes, a critical skill for production deployments. Continue your learning:
-
+- Review the [Docker commands reference](/tutorials/introduction/containers/docker-commands) for volume management commands.
+- Explore [Runpod network volumes](/storage/network-volumes) for cloud-native persistent storage.
+- Learn about [Serverless storage options](/serverless/storage/overview) for your workers.
+- Understand [Pod storage types](/pods/storage/types) for long-running workloads.
From c7e4e5986c4801b05f90e7199940ebd0cb1b2e55 Mon Sep 17 00:00:00 2001
From: Mo King
Date: Mon, 9 Mar 2026 12:27:57 -0400
Subject: [PATCH 4/8] Expand hugging face how-to doc
---
docs.json | 4 +-
get-started/mcp-servers.mdx | 1 +
integrations/overview.mdx | 1 -
serverless/development/huggingface-models.mdx | 215 ++++++++++++++++--
serverless/endpoints/model-caching.mdx | 69 +++---
serverless/overview.mdx | 1 +
serverless/quickstart.mdx | 1 +
serverless/sdks.mdx | 4 +-
8 files changed, 235 insertions(+), 61 deletions(-)
diff --git a/docs.json b/docs.json
index a949b6ca..f2dabe16 100644
--- a/docs.json
+++ b/docs.json
@@ -60,9 +60,9 @@
"serverless/development/error-handling",
"serverless/development/cleanup",
"serverless/development/write-logs",
+ "serverless/development/huggingface-models",
"serverless/development/environment-variables",
"serverless/development/aggregate-outputs",
- "serverless/development/huggingface-models",
"serverless/workers/concurrent-handler"
]
},
@@ -74,7 +74,6 @@
"serverless/workers/deploy",
"serverless/workers/github-integration",
"serverless/storage/overview",
- "serverless/endpoints/model-caching",
"serverless/development/dual-mode-worker"
]
},
@@ -84,6 +83,7 @@
"serverless/endpoints/overview",
"serverless/endpoints/send-requests",
"serverless/endpoints/endpoint-configurations",
+ "serverless/endpoints/model-caching",
"serverless/development/optimization"
]
},
diff --git a/get-started/mcp-servers.mdx b/get-started/mcp-servers.mdx
index 09a233ce..9f62e7e8 100644
--- a/get-started/mcp-servers.mdx
+++ b/get-started/mcp-servers.mdx
@@ -2,6 +2,7 @@
title: "Use Runpod's MCP servers"
sidebarTitle: "Runpod MCP servers"
description: "Connect AI tools to Runpod using the Model Context Protocol for infrastructure management and documentation access."
+tag: "NEW"
---
Runpod provides two [Model Context Protocol (MCP)](https://modelcontextprotocol.io) servers that connect AI tools and coding agents directly to Runpod:
diff --git a/integrations/overview.mdx b/integrations/overview.mdx
index 704cb3ee..0191a451 100644
--- a/integrations/overview.mdx
+++ b/integrations/overview.mdx
@@ -2,7 +2,6 @@
title: "Integrate your applications with Runpod"
sidebarTitle: "Overview"
description: "Integrate Runpod compute resources with your applications, external tools, and agentic frameworks."
-tag: "NEW"
---
import { InferenceTooltip } from "/snippets/tooltips.jsx";
diff --git a/serverless/development/huggingface-models.mdx b/serverless/development/huggingface-models.mdx
index 26fbcc19..ebda8857 100644
--- a/serverless/development/huggingface-models.mdx
+++ b/serverless/development/huggingface-models.mdx
@@ -1,7 +1,7 @@
---
title: "Use Hugging Face models"
-sidebarTitle: "Hugging Face models"
-description: "Learn how to integrate pre-trained Hugging Face models into your Serverless handler functions."
+sidebarTitle: "Use Hugging Face models"
+description: "Integrate pre-trained Hugging Face models into your Serverless handler functions."
---
import { HandlerFunctionTooltip, WorkerTooltip, ServerlessTooltip } from "/snippets/tooltips.jsx";
@@ -10,9 +10,10 @@ Hugging Face provides thousands of pre-trained models for natural language proce
This guide shows you how to load and use Hugging Face models in your Serverless handlers, using sentiment analysis as an example that you can adapt for other model types.
-
-**Use cached models for production:** The approach shown in this guide downloads models when workers start, which increases cold start times and costs. For production, use [cached models](/serverless/endpoints/model-caching) instead. Cached models reduce cold starts to just a few seconds and eliminate charges for model download time. See the [cached model tutorial](/tutorials/serverless/model-caching-text) for a complete example.
-
+This guide covers two approaches:
+
+- [Downloading models at runtime](#load-models-at-runtime) (simpler, good for development).
+- [Using cached models](#use-cached-models) (recommended for production).
## Install dependencies
@@ -24,7 +25,7 @@ pip install torch transformers
When deploying to Runpod, you'll need to include these dependencies in your [Dockerfile](/serverless/workers/create-dockerfile) or requirements file.
-## Create your handler
+## Load models at runtime
Create a file named `handler.py` and follow these steps to build a handler that performs sentiment analysis using a Hugging Face model.
@@ -39,9 +40,6 @@ Create a file named `handler.py` and follow these steps to build a handler that
The `pipeline` function from the `transformers` library provides a simple interface for using pre-trained models. It handles tokenization, model inference, and post-processing automatically.
-
- The `pipeline` approach shown in this guide is convenient for local testing and development. For production endpoints, you should use [cached models](/serverless/endpoints/model-caching) instead, which dramatically reduce cold start times and eliminate charges for model download time.
-
@@ -180,11 +178,202 @@ This pattern works for any Hugging Face model. To use a different model:
3. **Adjust input/output handling**: Different models expect different input formats and return different output structures. Check the model's documentation on Hugging Face to understand its API.
-## Production deployment
+## Use cached models
+
+The example above downloads models when workers start, which works fine for development and testing.
+
+For production endpoints, we highly recommend using [cached models](/serverless/endpoints/model-caching) instead. Cached models provide faster cold starts (seconds instead of minutes) and eliminate charges for model download time.
+
+### Enable model caching
+
+To enable cached models on your endpoint:
+
+
+
+ Navigate to the [Serverless section](https://www.console.runpod.io/serverless) of the console. Either create a new endpoint or select **Manage → Edit Endpoint** on an existing one.
+
+
+ Scroll to the **Model** field and enter your Hugging Face model identifier.
+
+ For example: `distilbert/distilbert-base-uncased-finetuned-sst-2-english`
+
+
+ Save your endpoint configuration. Runpod will automatically cache the model and make it available to your workers.
+
+
+
+### Locate cached models
+
+Cached models are stored at `/runpod-volume/huggingface-cache/hub/` following Hugging Face cache conventions. Add this helper function to your handler to resolve the correct snapshot path:
+
+```python
+import os
+
+HF_CACHE_ROOT = "/runpod-volume/huggingface-cache/hub"
+
+
+def resolve_snapshot_path(model_id: str) -> str:
+ """
+ Resolve the local snapshot path for a cached model.
+
+ Args:
+ model_id: The model name from Hugging Face
+ (e.g., 'distilbert/distilbert-base-uncased-finetuned-sst-2-english')
+
+ Returns:
+ The full path to the cached model snapshot
+ """
+ if "/" not in model_id:
+ raise ValueError(f"model_id '{model_id}' must be in 'org/name' format")
+
+ org, name = model_id.split("/", 1)
+ model_root = os.path.join(HF_CACHE_ROOT, f"models--{org}--{name}")
+ refs_main = os.path.join(model_root, "refs", "main")
+ snapshots_dir = os.path.join(model_root, "snapshots")
+
+ # Read the snapshot hash from refs/main
+ if os.path.isfile(refs_main):
+ with open(refs_main, "r") as f:
+ snapshot_hash = f.read().strip()
+ candidate = os.path.join(snapshots_dir, snapshot_hash)
+ if os.path.isdir(candidate):
+ return candidate
+
+ # Fall back to first available snapshot
+ if os.path.isdir(snapshots_dir):
+ versions = [
+ d for d in os.listdir(snapshots_dir)
+ if os.path.isdir(os.path.join(snapshots_dir, d))
+ ]
+ if versions:
+ versions.sort()
+ return os.path.join(snapshots_dir, versions[0])
+
+ raise RuntimeError(f"Cached model not found: {model_id}")
+```
+
+### Adapt your handler for cached models
+
+Once model caching is enabled, you need to update your handler to load the model from the local cache instead of downloading it. Here's how the code changes:
+
+
+
+ ```python
+ from transformers import pipeline
+
+ # Downloads model when worker starts
+ model = pipeline(
+ "sentiment-analysis",
+ model="distilbert/distilbert-base-uncased-finetuned-sst-2-english"
+ )
+ ```
+
+
+ ```python
+ import os
+ from transformers import pipeline
+
+ # Force offline mode to prevent accidental downloads
+ os.environ["HF_HUB_OFFLINE"] = "1"
+ os.environ["TRANSFORMERS_OFFLINE"] = "1"
+
+ # Resolve the cached model path
+ LOCAL_PATH = resolve_snapshot_path(
+ "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
+ )
+
+ # Load from local cache
+ model = pipeline(
+ "sentiment-analysis",
+ model=LOCAL_PATH,
+ local_files_only=True
+ )
+ ```
+
+
+
+The key differences are:
+
+- **Offline mode**: Setting `HF_HUB_OFFLINE` and `TRANSFORMERS_OFFLINE` prevents accidental downloads if the model isn't cached.
+- **Local path**: Instead of a model identifier, you pass the resolved local path to the cached model files.
+- **local_files_only**: This flag tells the transformers library to only use local files.
+
+### Complete cached implementation
+
+Here's the complete handler using cached models:
+
+```python handler.py
+import os
+import runpod
+from transformers import pipeline
+
+MODEL_ID = "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
+HF_CACHE_ROOT = "/runpod-volume/huggingface-cache/hub"
+
+# Force offline mode to use only cached models
+os.environ["HF_HUB_OFFLINE"] = "1"
+os.environ["TRANSFORMERS_OFFLINE"] = "1"
+
+
+def resolve_snapshot_path(model_id: str) -> str:
+ """Resolve the local snapshot path for a cached model."""
+ if "/" not in model_id:
+ raise ValueError(f"model_id '{model_id}' must be in 'org/name' format")
+
+ org, name = model_id.split("/", 1)
+ model_root = os.path.join(HF_CACHE_ROOT, f"models--{org}--{name}")
+ refs_main = os.path.join(model_root, "refs", "main")
+ snapshots_dir = os.path.join(model_root, "snapshots")
+
+ if os.path.isfile(refs_main):
+ with open(refs_main, "r") as f:
+ snapshot_hash = f.read().strip()
+ candidate = os.path.join(snapshots_dir, snapshot_hash)
+ if os.path.isdir(candidate):
+ return candidate
+
+ if os.path.isdir(snapshots_dir):
+ versions = [
+ d for d in os.listdir(snapshots_dir)
+ if os.path.isdir(os.path.join(snapshots_dir, d))
+ ]
+ if versions:
+ versions.sort()
+ return os.path.join(snapshots_dir, versions[0])
+
+ raise RuntimeError(f"Cached model not found: {model_id}")
+
+
+# Load model once when worker starts
+LOCAL_PATH = resolve_snapshot_path(MODEL_ID)
+model = pipeline("sentiment-analysis", model=LOCAL_PATH, local_files_only=True)
+
+
+def handler(job):
+ job_input = job["input"]
+ text = job_input.get("text")
+
+ if not text:
+ return {"error": "No text provided for analysis."}
+
+ result = model(text)[0]
+
+ return {
+ "sentiment": result["label"],
+ "score": float(result["score"])
+ }
+
+
+runpod.serverless.start({"handler": handler})
+```
+
+
+For a complete walkthrough including Dockerfile creation and deployment, see the [cached model tutorial](/tutorials/serverless/model-caching-text).
+
-When deploying Hugging Face models to production endpoints, follow these best practices:
+## Other best practices
-- **Use cached models**: The approach shown in this guide downloads models when workers start, which increases cold start times and costs. For production, use [cached models](/serverless/endpoints/model-caching) instead. Cached models reduce cold starts to just a few seconds and eliminate charges for model download time. See the [cached model tutorial](/tutorials/serverless/model-caching-text) for a complete example.
+When deploying Hugging Face models to production endpoints, keep these additional considerations in mind:
- **Model size**: Larger models require more VRAM and take longer to load. Choose the smallest model that meets your accuracy requirements.
@@ -194,7 +383,7 @@ When deploying Hugging Face models to production endpoints, follow these best pr
## Next steps
-- **For production**: Learn about [cached models](/serverless/endpoints/model-caching) and follow the [cached model tutorial](/tutorials/serverless/model-caching-text) to improve cold start times and reduce costs.
+- Learn more about [how cached models work](/serverless/endpoints/model-caching).
- [Create a Dockerfile](/serverless/workers/create-dockerfile) to package your handler with its dependencies.
- [Deploy your worker](/serverless/workers/deploy) to a Runpod endpoint.
- Explore [optimization techniques](/serverless/development/optimization) to improve performance.
diff --git a/serverless/endpoints/model-caching.mdx b/serverless/endpoints/model-caching.mdx
index 4e4a5b5e..0e36945b 100644
--- a/serverless/endpoints/model-caching.mdx
+++ b/serverless/endpoints/model-caching.mdx
@@ -2,13 +2,12 @@
title: "Cached models"
sidebarTitle: "Cached models"
description: "Accelerate worker cold starts and reduce costs by using cached models."
-tag: "NEW"
---
import { MachineTooltip, MachinesTooltip, ColdStartTooltip, WorkersTooltip, HandlerFunctionTooltip, InferenceTooltip } from "/snippets/tooltips.jsx";
-For a step-by-step example showing how to integrate cached models with custom workers, see [Deploy a cached model](/tutorials/serverless/model-caching-text).
+To learn how to use cached models with the Hugging Face Transformers library, see [Use Hugging Face models](/serverless/development/huggingface-models#use-cached-models). For a complete end-to-end deployment walkthrough, see the [cached model tutorial](/tutorials/serverless/model-caching-text).
Enabling cached models on your endpoints can reduce times and dramatically reduce the cost for loading large models.
@@ -143,52 +142,38 @@ For example, here is how the model `gensyn/qwen2.5-0.5b-instruct` would be store
-### Programmatically locate cached models
+### Locate cached models in your handler
-To dynamically locate cached models without hardcoding paths, you can add this helper function to your to scan the cache directory for the model you want to use:
+To use a cached model in your , you need to resolve the local path to the model files. The path follows a predictable pattern based on the model identifier:
-```python handler.py
-import os
-
-CACHE_DIR = "/runpod-volume/huggingface-cache/hub"
-
-def find_model_path(model_name):
- """
- Find the path to a cached model.
-
- Args:
- model_name: The model name from Hugging Face
- (e.g., 'Qwen/Qwen2.5-0.5B-Instruct')
-
- Returns:
- The full path to the cached model, or None if not found
- """
- # Convert model name format: "Org/Model" -> "models--Org--Model"
- cache_name = model_name.replace("/", "--")
- snapshots_dir = os.path.join(CACHE_DIR, f"models--{cache_name}", "snapshots")
-
- # Check if the model exists in cache
- if os.path.exists(snapshots_dir):
- snapshots = os.listdir(snapshots_dir)
- if snapshots:
- # Return the path to the first (usually only) snapshot
- return os.path.join(snapshots_dir, snapshots[0])
-
- return None
-
-# Example usage
-model_path = find_model_path("Qwen/Qwen2.5-0.5B-Instruct")
-if model_path:
- print(f"Model found at: {model_path}")
-else:
- print("Model not found in cache")
+```
+/runpod-volume/huggingface-cache/hub/models--{org}--{name}/snapshots/{hash}/
```
-### Custom worker examples
+For example, `Qwen/Qwen2.5-0.5B-Instruct` would be stored at:
-The following sample applications demonstrate how you can integrate cached models into your custom workers:
+```
+/runpod-volume/huggingface-cache/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/abc123.../
+```
-- [Cached models + LLMs](https://github.com/runpod-workers/model-store-cache-example): A custom worker that uses cached models to serve LLMs.
+For complete implementation details including a helper function to resolve these paths dynamically, see [Use Hugging Face models](/serverless/development/huggingface-models#use-cached-models).
+
+### Examples and resources
+
+
+
+ Learn how to adapt your Transformers code to use cached models
+
+
+ End-to-end walkthrough deploying Phi-3 with model caching
+
+
+ Sample worker using cached models for LLM inference
+
+
+ Pre-built workers with automatic cached model support
+
+
## Current limitations
diff --git a/serverless/overview.mdx b/serverless/overview.mdx
index 4d7d38b5..3ba334ab 100644
--- a/serverless/overview.mdx
+++ b/serverless/overview.mdx
@@ -263,6 +263,7 @@ Ready to get started with Runpod Serverless?
* [Learn more about endpoints.](/serverless/endpoints/overview)
* [Learn more about workers.](/serverless/workers/overview)
* [Learn how to build handler functions.](/serverless/workers/handler-functions)
+* [Integrate Hugging Face models into your handlers.](/serverless/development/huggingface-models)
* [Deploy large language models in minutes with vLLM.](/serverless/vllm/overview)
* [Review storage options for your endpoints.](/serverless/storage/overview)
* [Learn how to send requests to your endpoints.](/serverless/endpoints/send-requests)
diff --git a/serverless/quickstart.mdx b/serverless/quickstart.mdx
index c72b52cc..555e727d 100644
--- a/serverless/quickstart.mdx
+++ b/serverless/quickstart.mdx
@@ -247,6 +247,7 @@ Congratulations! You've successfully deployed and tested your first Serverless e
Now that you've learned the basics, you're ready to:
* [Create more advanced handler functions.](/serverless/workers/handler-functions)
+* [Integrate Hugging Face models into your handlers.](/serverless/development/huggingface-models)
* [Update your Dockerfile with AI/ML models and other dependencies.](/serverless/workers/create-dockerfile)
* [Learn how to structure and send requests to your endpoint.](/serverless/endpoints/send-requests)
* [Manage your Serverless endpoints in the Runpod console.](/serverless/endpoints/overview)
\ No newline at end of file
diff --git a/serverless/sdks.mdx b/serverless/sdks.mdx
index 9d1f3369..b41e03cd 100644
--- a/serverless/sdks.mdx
+++ b/serverless/sdks.mdx
@@ -1,11 +1,9 @@
---
title: "Install the Runpod Serverless SDK"
sidebarTitle: "Install SDKs"
-description: "Install and configure the Runpod Serverless SDK for Python, JavaScript, or Go to interact with Serverless endpoints programmatically."
+description: "Install and configure the Serverless SDK for Python, JavaScript, or Go to interact with Serverless endpoints programmatically."
---
-The Runpod SDK lets you interact with Serverless endpoints programmatically from your own applications.
-
## Python
### Install
From f13fcf42654ab92a1bdf955de26e2a0d4fb06cfc Mon Sep 17 00:00:00 2001
From: Mo King
Date: Mon, 9 Mar 2026 12:55:53 -0400
Subject: [PATCH 5/8] Move model reference
---
docs.json | 2 +-
public-endpoints/reference.mdx | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs.json b/docs.json
index f2dabe16..1e9c0147 100644
--- a/docs.json
+++ b/docs.json
@@ -171,13 +171,13 @@
"pages": [
"public-endpoints/overview",
"public-endpoints/quickstart",
+ "public-endpoints/reference",
"public-endpoints/requests",
"public-endpoints/ai-sdk",
"public-endpoints/ai-coding-tools",
{
"group": "Models",
"pages": [
- "public-endpoints/reference",
{
"group": "Image models",
"pages": [
diff --git a/public-endpoints/reference.mdx b/public-endpoints/reference.mdx
index a23cdf61..591db0fb 100644
--- a/public-endpoints/reference.mdx
+++ b/public-endpoints/reference.mdx
@@ -1,6 +1,6 @@
---
title: "Available models"
-sidebarTitle: "Overview"
+sidebarTitle: "Models"
description: "Browse all available models for Runpod Public Endpoints."
---
From 124ab4132599d0f4107e34af14831899ef9112ce Mon Sep 17 00:00:00 2001
From: Justin
Date: Mon, 9 Mar 2026 13:11:57 -0400
Subject: [PATCH 6/8] fix: JS import order and HF model ID format consistency
---
serverless/development/huggingface-models.mdx | 4 ++--
serverless/sdks.mdx | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/serverless/development/huggingface-models.mdx b/serverless/development/huggingface-models.mdx
index ebda8857..4d0611bf 100644
--- a/serverless/development/huggingface-models.mdx
+++ b/serverless/development/huggingface-models.mdx
@@ -49,7 +49,7 @@ Create a file named `handler.py` and follow these steps to build a handler that
# Load model once when worker starts
model = pipeline(
"sentiment-analysis",
- model="distilbert-base-uncased-finetuned-sst-2-english"
+ model="distilbert/distilbert-base-uncased-finetuned-sst-2-english"
)
```
@@ -102,7 +102,7 @@ from transformers import pipeline
# Load model once when worker starts
model = pipeline(
"sentiment-analysis",
- model="distilbert-base-uncased-finetuned-sst-2-english"
+ model="distilbert/distilbert-base-uncased-finetuned-sst-2-english"
)
def handler(job):
diff --git a/serverless/sdks.mdx b/serverless/sdks.mdx
index b41e03cd..15f12c85 100644
--- a/serverless/sdks.mdx
+++ b/serverless/sdks.mdx
@@ -66,8 +66,8 @@ yarn add runpod-sdk
### Configure your API key
```javascript
-const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
import runpodSdk from "runpod-sdk";
+const { RUNPOD_API_KEY, ENDPOINT_ID } = process.env;
const runpod = runpodSdk(RUNPOD_API_KEY);
const endpoint = runpod.endpoint(ENDPOINT_ID);
From 84c5af7e041a0884770728eae7bdfe7d960b80f0 Mon Sep 17 00:00:00 2001
From: Mo King
Date: Tue, 10 Mar 2026 15:16:20 -0400
Subject: [PATCH 7/8] Fix docs.json
---
docs.json | 2 ++
1 file changed, 2 insertions(+)
diff --git a/docs.json b/docs.json
index 172d8df0..76fd1b0a 100644
--- a/docs.json
+++ b/docs.json
@@ -876,6 +876,8 @@
{
"source": "/get-started/install-sdks",
"destination": "/serverless/sdks"
+ },
+ {
"source": "/flash/endpoint-functions",
"destination": "/flash/create-endpoints"
}
From c8a76c087eede929aef94e2ffa087e831406a69c Mon Sep 17 00:00:00 2001
From: Mo King
Date: Wed, 11 Mar 2026 13:10:42 -0400
Subject: [PATCH 8/8] Disk volume -> volume disk
---
get-started/api-keys.mdx | 2 +-
get-started/connect-to-runpod.mdx | 2 +-
pods/pricing.mdx | 4 ++--
pods/storage/create-network-volumes.mdx | 2 +-
4 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/get-started/api-keys.mdx b/get-started/api-keys.mdx
index 8cf82ae5..230493e4 100644
--- a/get-started/api-keys.mdx
+++ b/get-started/api-keys.mdx
@@ -1,6 +1,6 @@
---
title: "Manage API keys"
-description: "Learn how to create, edit, and disable Runpod API keys."
+description: "Create, edit, and disable Runpod API keys."
---
import { ServerlessTooltip } from "/snippets/tooltips.jsx";
diff --git a/get-started/connect-to-runpod.mdx b/get-started/connect-to-runpod.mdx
index aa292281..118af80a 100644
--- a/get-started/connect-to-runpod.mdx
+++ b/get-started/connect-to-runpod.mdx
@@ -1,6 +1,6 @@
---
title: "Choose a workflow"
-description: "Review the available methods for accessing and managing Runpod resources."
+description: "Review available methods for accessing and managing Runpod resources."
---
import { PodsTooltip, EndpointTooltip, ServerlessTooltip } from "/snippets/tooltips.jsx";
diff --git a/pods/pricing.mdx b/pods/pricing.mdx
index bbfc9b65..c17b076a 100644
--- a/pods/pricing.mdx
+++ b/pods/pricing.mdx
@@ -146,8 +146,8 @@ You can select your preferred pricing model directly from the Runpod console whe
Runpod offers [three types of storage](/pods/storage/types) for Pods::
-- **Container volumes:** Temporary storage that is erased if the Pod is stopped, billed at \$0.10 per GB per month for storage on running Pods. Billed per-second.
-- **Disk volumes:** Persistent storage that is billed at \$0.10 per GB per month on running Pods and \$0.20 per GB per month for volume storage on stopped Pods. Billed per-second.
+- **Container disk:** Temporary storage that is erased if the Pod is stopped, billed at \$0.10 per GB per month for storage on running Pods. Billed per-second.
+- **Volume disk:** Persistent storage that is billed at \$0.10 per GB per month on running Pods and \$0.20 per GB per month for volume storage on stopped Pods. Billed per-second.
- **Network volumes:** External storage that is billed at \$0.07 per GB per month for storage requirements below 1TB. For requirements exceeding 1TB, the rate is \$0.05 per GB per month. Billed hourly.
You are not charged for storage if the host is down or unavailable from the public internet.
diff --git a/pods/storage/create-network-volumes.mdx b/pods/storage/create-network-volumes.mdx
index 37d79b94..75190c4e 100644
--- a/pods/storage/create-network-volumes.mdx
+++ b/pods/storage/create-network-volumes.mdx
@@ -127,7 +127,7 @@ Network volumes are backed by high-performance storage servers co-located with R
Using network volumes provides significant flexibility that can lead to cost savings, especially if you need to frequently switch between Pods or share large datasets.
-Network volume storage space costs less than for disk volumes (\$0.07/GB/month rather than \$0.10/GB/month), and storing data on a network volume can save you money compared to provisioning separate disk space for multiple Pods (even with just two Pods sharing one volume).
+Network volume storage space costs less than for volume disks (\$0.07/GB/month rather than \$0.10/GB/month), and storing data on a network volume can save you money compared to provisioning separate disk space for multiple Pods (even with just two Pods sharing one volume).
For a deeper dive into potential benefits, read this [blog article on network volumes](https://blog.runpod.io/four-reasons-to-set-up-a/).