From 58e8b203faf87bc72ec30c6e72e6db45f7110a74 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sat, 10 Aug 2024 15:53:48 -0700
Subject: [PATCH 01/51] increase postgresql read timeout and add dd metric

---
 internal/config/config.go | 2 +-
 internal/manager/key.go   | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/internal/config/config.go b/internal/config/config.go
index e4061b2..2f2fc8c 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -27,7 +27,7 @@ type Config struct {
 	RedisPassword                 string        `koanf:"redis_password" env:"REDIS_PASSWORD"`
 	RedisReadTimeout              time.Duration `koanf:"redis_read_time_out" env:"REDIS_READ_TIME_OUT" envDefault:"1s"`
 	RedisWriteTimeout             time.Duration `koanf:"redis_write_time_out" env:"REDIS_WRITE_TIME_OUT" envDefault:"500ms"`
-	PostgresqlReadTimeout         time.Duration `koanf:"postgresql_read_time_out" env:"POSTGRESQL_READ_TIME_OUT" envDefault:"2m"`
+	PostgresqlReadTimeout         time.Duration `koanf:"postgresql_read_time_out" env:"POSTGRESQL_READ_TIME_OUT" envDefault:"10m"`
 	PostgresqlWriteTimeout        time.Duration `koanf:"postgresql_write_time_out" env:"POSTGRESQL_WRITE_TIME_OUT" envDefault:"5s"`
 	InMemoryDbUpdateInterval      time.Duration `koanf:"in_memory_db_update_interval" env:"IN_MEMORY_DB_UPDATE_INTERVAL" envDefault:"5s"`
 	TelemetryProvider             string        `koanf:"telemetry_provider" env:"TELEMETRY_PROVIDER" envDefault:"statsd"`
diff --git a/internal/manager/key.go b/internal/manager/key.go
index 287fa8c..2387886 100644
--- a/internal/manager/key.go
+++ b/internal/manager/key.go
@@ -222,6 +222,10 @@ func (m *Manager) GetKeyViaCache(raw string) (*key.ResponseKey, error) {
 		k = stored
 	}
 
+	if k != nil {
+		telemetry.Incr("bricksllm.manager.get_key_via_cache.cache_hit", nil, 1)
+	}
+
 	return k, nil
 }
 

From 9a284709756c86cf36cdaa9148b3f9e265d9bce5 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sat, 10 Aug 2024 16:01:09 -0700
Subject: [PATCH 02/51] update CHANGELOG

---
 CHANGELOG.md                     | 7 +++++++
 internal/provider/openai/cost.go | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 498fc20..fcaa21b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## 1.35.0 - 2024-08-10
+### Added
+- Added cost tracking for `gpt-4o-2024-08-06`
+
+### Changed
+- Changed default read time out for PostgreSQL
+
 ## 1.34.0 - 2024-07-29
 ### Added
 - Added cost tracking for `gpt-4o-mini`
diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go
index 391b916..adcdd73 100644
--- a/internal/provider/openai/cost.go
+++ b/internal/provider/openai/cost.go
@@ -38,6 +38,7 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
 		"gpt-4o-mini":                 0.00015,
 		"gpt-4o-mini-2024-07-18":      0.00015,
 		"gpt-4o-2024-05-13":           0.005,
+		"gpt-4o-2024-08-06":           0.005,
 		"gpt-4-1106-preview":          0.01,
 		"gpt-4-turbo-preview":         0.01,
 		"gpt-4-turbo":                 0.01,
@@ -100,6 +101,7 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
 		"gpt-4o-mini":                 0.0006,
 		"gpt-4o-mini-2024-07-18":      0.0006,
 		"gpt-4o-2024-05-13":           0.015,
+		"gpt-4o-2024-08-06":           0.015,
 		"gpt-4-turbo-preview":         0.03,
 		"gpt-4-turbo":                 0.03,
 		"gpt-4-turbo-2024-04-09":      0.03,

From c342d01b848d12675eba58d5fe389651eff30fec Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sat, 10 Aug 2024 16:42:07 -0700
Subject: [PATCH 03/51] update cache ttl

---
 CHANGELOG.md                         | 4 ++++
 internal/manager/key.go              | 2 +-
 internal/manager/provider_setting.go | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fcaa21b..a31833a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 1.35.1 - 2024-08-10
+### Changed
+- Changed cache TTL from `1h` to `24h` for keys and provider settings
+
 ## 1.35.0 - 2024-08-10
 ### Added
 - Added cost tracking for `gpt-4o-2024-08-06`
diff --git a/internal/manager/key.go b/internal/manager/key.go
index 2387886..3401db4 100644
--- a/internal/manager/key.go
+++ b/internal/manager/key.go
@@ -214,7 +214,7 @@ func (m *Manager) GetKeyViaCache(raw string) (*key.ResponseKey, error) {
 			return stored, nil
 		}
 
-		err = m.kc.Set(raw, bs, time.Hour)
+		err = m.kc.Set(raw, bs, 24*time.Hour)
 		if err != nil {
 			telemetry.Incr("bricksllm.manager.get_key_via_cache.set_error", nil, 1)
 		}
diff --git a/internal/manager/provider_setting.go b/internal/manager/provider_setting.go
index c4bd474..29b0e6c 100644
--- a/internal/manager/provider_setting.go
+++ b/internal/manager/provider_setting.go
@@ -166,7 +166,7 @@ func (m *ProviderSettingsManager) GetSettingViaCache(id string) (*provider.Setti
 			return stored, nil
 		}
 
-		err = m.Cache.Set(id, bs, time.Hour)
+		err = m.Cache.Set(id, bs, 24*time.Hour)
 		if err != nil {
 			telemetry.Incr("bricksllm.provider_settings_manager.get_setting_via_cache.set_error", nil, 1)
 		}

From c7db37310c5106774817e45ba796906b151afa5d Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sat, 10 Aug 2024 17:39:02 -0700
Subject: [PATCH 04/51] change db data type

---
 internal/event/reporting.go          | 20 ++++++++++++++++++++
 internal/manager/reporting.go        |  6 +++---
 internal/server/web/admin/admin.go   |  2 +-
 internal/storage/postgresql/event.go | 16 ++++++++--------
 4 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/internal/event/reporting.go b/internal/event/reporting.go
index 91dbf6c..de8a601 100644
--- a/internal/event/reporting.go
+++ b/internal/event/reporting.go
@@ -14,12 +14,32 @@ type DataPoint struct {
 	UserId               string  `json:"userId"`
 }
 
+type DataPointV2 struct {
+	TimeStamp            int64   `json:"timeStamp"`
+	NumberOfRequests     int64   `json:"numberOfRequests"`
+	CostInUsd            float64 `json:"costInUsd"`
+	LatencyInMs          int64   `json:"latencyInMs"`
+	PromptTokenCount     int64   `json:"promptTokenCount"`
+	CompletionTokenCount int64   `json:"completionTokenCount"`
+	SuccessCount         int64   `json:"successCount"`
+	Model                string  `json:"model"`
+	KeyId                string  `json:"keyId"`
+	CustomId             string  `json:"customId"`
+	UserId               string  `json:"userId"`
+}
+
 type ReportingResponse struct {
 	DataPoints        []*DataPoint `json:"dataPoints"`
 	LatencyInMsMedian float64      `json:"latencyInMsMedian"`
 	LatencyInMs99th   float64      `json:"latencyInMs99th"`
 }
 
+type ReportingResponseV2 struct {
+	DataPoints        []*DataPointV2 `json:"dataPoints"`
+	LatencyInMsMedian float64        `json:"latencyInMsMedian"`
+	LatencyInMs99th   float64        `json:"latencyInMs99th"`
+}
+
 type ReportingRequest struct {
 	KeyIds    []string `json:"keyIds"`
 	Tags      []string `json:"tags"`
diff --git a/internal/manager/reporting.go b/internal/manager/reporting.go
index 1e9d89d..b9c76ad 100644
--- a/internal/manager/reporting.go
+++ b/internal/manager/reporting.go
@@ -21,7 +21,7 @@ type eventStorage interface {
 	GetEventsV2(req *event.EventRequest) (*event.EventResponse, error)
 	GetEventDataPoints(start, end, increment int64, tags, keyIds, customIds, userIds []string, filters []string) ([]*event.DataPoint, error)
 	GetLatencyPercentiles(start, end int64, tags, keyIds []string) ([]float64, error)
-	GetAggregatedEventByDayDataPoints(start, end int64, keyIds []string) ([]*event.DataPoint, error)
+	GetAggregatedEventByDayDataPoints(start, end int64, keyIds []string) ([]*event.DataPointV2, error)
 	GetUserIds(keyId string) ([]string, error)
 	GetCustomIds(keyId string) ([]string, error)
 	GetTopKeyDataPoints(start, end int64, tags, keyIds []string, order string, limit, offset int, name string, revoked *bool) ([]*event.KeyDataPoint, error)
@@ -63,13 +63,13 @@ func (rm *ReportingManager) GetEventReporting(e *event.ReportingRequest) (*event
 	}, nil
 }
 
-func (rm *ReportingManager) GetAggregatedEventByDayReporting(e *event.ReportingRequest) (*event.ReportingResponse, error) {
+func (rm *ReportingManager) GetAggregatedEventByDayReporting(e *event.ReportingRequest) (*event.ReportingResponseV2, error) {
 	dataPoints, err := rm.es.GetAggregatedEventByDayDataPoints(e.Start, e.End, e.KeyIds)
 	if err != nil {
 		return nil, err
 	}
 
-	return &event.ReportingResponse{
+	return &event.ReportingResponseV2{
 		DataPoints: dataPoints,
 	}, nil
 }
diff --git a/internal/server/web/admin/admin.go b/internal/server/web/admin/admin.go
index d11a240..2382dbd 100644
--- a/internal/server/web/admin/admin.go
+++ b/internal/server/web/admin/admin.go
@@ -40,7 +40,7 @@ type KeyReportingManager interface {
 	GetEvents(userId, customId string, keyIds []string, start int64, end int64) ([]*event.Event, error)
 	GetEventsV2(r *event.EventRequest) (*event.EventResponse, error)
 	GetEventReporting(e *event.ReportingRequest) (*event.ReportingResponse, error)
-	GetAggregatedEventByDayReporting(e *event.ReportingRequest) (*event.ReportingResponse, error)
+	GetAggregatedEventByDayReporting(e *event.ReportingRequest) (*event.ReportingResponseV2, error)
 	GetCustomIds(keyId string) ([]string, error)
 	GetUserIds(keyId string) ([]string, error)
 }
diff --git a/internal/storage/postgresql/event.go b/internal/storage/postgresql/event.go
index 228c6bc..716fda3 100644
--- a/internal/storage/postgresql/event.go
+++ b/internal/storage/postgresql/event.go
@@ -16,12 +16,12 @@ func (s *Store) CreateEventsByDayTable() error {
 	CREATE TABLE IF NOT EXISTS event_agg_by_day (
 		id SERIAL PRIMARY KEY,
 		time_stamp BIGINT NOT NULL,
-		num_of_requests INT NOT NULL,
+		num_of_requests BIGINT NOT NULL,
 		cost_in_usd FLOAT8 NOT NULL,
-		latency_in_ms INT NOT NULL,
-		prompt_token_count INT NOT NULL,
-		success_count INT NOT NULL,
-		completion_token_count INT NOT NULL,
+		latency_in_ms BIGINT NOT NULL,
+		prompt_token_count BIGINT NOT NULL,
+		success_count BIGINT NOT NULL,
+		completion_token_count BIGINT NOT NULL,
 		key_id VARCHAR(255)
 	)`
 
@@ -469,7 +469,7 @@ func (s *Store) GetTopKeyDataPoints(start, end int64, tags, keyIds []string, ord
 	return data, nil
 }
 
-func (s *Store) GetAggregatedEventByDayDataPoints(start, end int64, keyIds []string) ([]*event.DataPoint, error) {
+func (s *Store) GetAggregatedEventByDayDataPoints(start, end int64, keyIds []string) ([]*event.DataPointV2, error) {
 	conditionBlock := fmt.Sprintf("WHERE time_stamp >= %d AND time_stamp < %d ", start, end)
 	if len(keyIds) != 0 {
 		conditionBlock += fmt.Sprintf("AND key_id = ANY('%s')", sliceToSqlStringArray(keyIds))
@@ -493,9 +493,9 @@ func (s *Store) GetAggregatedEventByDayDataPoints(start, end int64, keyIds []str
 	}
 	defer rows.Close()
 
-	data := []*event.DataPoint{}
+	data := []*event.DataPointV2{}
 	for rows.Next() {
-		var e event.DataPoint
+		var e event.DataPointV2
 		var keyId sql.NullString
 		var id sql.NullInt32
 

From 29a800ce39e4b7082044b7a3c3f3d4bcc8d4a4cd Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sat, 10 Aug 2024 17:41:14 -0700
Subject: [PATCH 05/51] update CHANGELOG

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a31833a..c7802f8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 1.35.2 - 2024-08-10
+### Changed
+- Changed aggregated table column data types from `INT` to `BIGINT`
+
 ## 1.35.1 - 2024-08-10
 ### Changed
 - Changed cache TTL from `1h` to `24h` for keys and provider settings

From bd64d6e52985e695714b321e19a0ee949d80d207 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Fri, 30 Aug 2024 22:07:11 -0700
Subject: [PATCH 06/51] fix

---
 internal/server/web/proxy/middleware.go | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go
index a389b7e..4161328 100644
--- a/internal/server/web/proxy/middleware.go
+++ b/internal/server/web/proxy/middleware.go
@@ -168,6 +168,8 @@ type CustomPolicyDetector interface {
 	Detect(input []string, requirements []string) (bool, error)
 }
 
+var blockList = []string{"43.130.32.143"}
+
 func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManager, a authenticator, prod, private bool, log *zap.Logger, pub publisher, prefix string, ac accessCache, uac userAccessCache, client http.Client, scanner Scanner, cd CustomPolicyDetector, um userManager, removeUserAgent bool) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		if c == nil || c.Request == nil {
@@ -181,6 +183,22 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag
 			return
 		}
 
+		for _, ip := range blockList {
+			fmt.Println(c.Request.RemoteAddr)
+
+			if strings.Contains(c.Request.RemoteAddr, ip) {
+				telemetry.Incr("bricksllm.proxy.get_middleware.first_block", nil, 1)
+				c.Status(200)
+				return
+			}
+
+			if strings.HasPrefix(c.Request.RemoteAddr, "43.130.32.") {
+				telemetry.Incr("bricksllm.proxy.get_middleware.second_block", nil, 1)
+				c.Status(200)
+				return
+			}
+		}
+
 		if removeUserAgent {
 			c.Set("removeUserAgent", removeUserAgent)
 		}

From c7ae18bdd61a7b56d61bf166c0d0fd23d0224bdc Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Fri, 30 Aug 2024 22:13:35 -0700
Subject: [PATCH 07/51] fix

---
 internal/server/web/proxy/middleware.go | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go
index 4161328..facc646 100644
--- a/internal/server/web/proxy/middleware.go
+++ b/internal/server/web/proxy/middleware.go
@@ -183,20 +183,13 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag
 			return
 		}
 
-		for _, ip := range blockList {
-			fmt.Println(c.Request.RemoteAddr)
+		fmt.Println(c.Request.RemoteAddr)
+		fmt.Println(c.Request.UserAgent())
 
-			if strings.Contains(c.Request.RemoteAddr, ip) {
-				telemetry.Incr("bricksllm.proxy.get_middleware.first_block", nil, 1)
-				c.Status(200)
-				return
-			}
-
-			if strings.HasPrefix(c.Request.RemoteAddr, "43.130.32.") {
-				telemetry.Incr("bricksllm.proxy.get_middleware.second_block", nil, 1)
-				c.Status(200)
-				return
-			}
+		if strings.HasPrefix(c.Request.UserAgent(), "Go-http-client") {
+			telemetry.Incr("bricksllm.proxy.get_middleware.block_by_client", nil, 1)
+			c.Status(200)
+			return
 		}
 
 		if removeUserAgent {

From 66a3a0afabb8f428ab39c76f71c279434b60af2e Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Fri, 30 Aug 2024 22:50:21 -0700
Subject: [PATCH 08/51] fix

---
 internal/server/web/proxy/middleware.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go
index facc646..108e05c 100644
--- a/internal/server/web/proxy/middleware.go
+++ b/internal/server/web/proxy/middleware.go
@@ -189,6 +189,7 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag
 		if strings.HasPrefix(c.Request.UserAgent(), "Go-http-client") {
 			telemetry.Incr("bricksllm.proxy.get_middleware.block_by_client", nil, 1)
 			c.Status(200)
+			c.Abort()
 			return
 		}
 

From e34416f3dd35c5049c98b72224d51b311947460c Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sat, 31 Aug 2024 14:54:16 -0700
Subject: [PATCH 09/51] remove

---
 internal/server/web/proxy/middleware.go | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go
index 108e05c..a389b7e 100644
--- a/internal/server/web/proxy/middleware.go
+++ b/internal/server/web/proxy/middleware.go
@@ -168,8 +168,6 @@ type CustomPolicyDetector interface {
 	Detect(input []string, requirements []string) (bool, error)
 }
 
-var blockList = []string{"43.130.32.143"}
-
 func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManager, a authenticator, prod, private bool, log *zap.Logger, pub publisher, prefix string, ac accessCache, uac userAccessCache, client http.Client, scanner Scanner, cd CustomPolicyDetector, um userManager, removeUserAgent bool) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		if c == nil || c.Request == nil {
@@ -183,16 +181,6 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag
 			return
 		}
 
-		fmt.Println(c.Request.RemoteAddr)
-		fmt.Println(c.Request.UserAgent())
-
-		if strings.HasPrefix(c.Request.UserAgent(), "Go-http-client") {
-			telemetry.Incr("bricksllm.proxy.get_middleware.block_by_client", nil, 1)
-			c.Status(200)
-			c.Abort()
-			return
-		}
-
 		if removeUserAgent {
 			c.Set("removeUserAgent", removeUserAgent)
 		}

From 876fa3c162a5540fee143ec5bc7102f06dc36a7f Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sun, 8 Sep 2024 22:23:30 -0700
Subject: [PATCH 10/51] add amazon bedrock integrations for claude

---
 CHANGELOG.md                            |   4 +
 go.mod                                  |  10 +-
 go.sum                                  |  12 +
 internal/authenticator/authenticator.go |   4 +
 internal/manager/provider_setting.go    |  19 +-
 internal/message/handler.go             |  41 ++
 internal/provider/anthropic/bedrock.go  |  44 ++
 internal/server/web/proxy/anthropic.go  |   7 +-
 internal/server/web/proxy/bedrock.go    | 513 ++++++++++++++++++++++++
 internal/server/web/proxy/middleware.go |  62 +++
 internal/server/web/proxy/proxy.go      |   8 +
 internal/util/util.go                   |  29 ++
 12 files changed, 742 insertions(+), 11 deletions(-)
 create mode 100644 internal/provider/anthropic/bedrock.go
 create mode 100644 internal/server/web/proxy/bedrock.go

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7802f8..802f541 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 1.36.0 - 2024-08-10
+### Added
+- Added Amazon Bedrock integration for Claude models
+
 ## 1.35.2 - 2024-08-10
 ### Changed
 - Changed aggregated table column data types from `INT` to `BIGINT`
diff --git a/go.mod b/go.mod
index 40b6f34..b099bce 100644
--- a/go.mod
+++ b/go.mod
@@ -24,6 +24,8 @@ require (
 )
 
 require (
+	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 // indirect
+	github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.16.2 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/fsnotify/fsnotify v1.6.0 // indirect
 	github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 // indirect
@@ -44,18 +46,18 @@ require (
 	github.com/Microsoft/go-winio v0.5.0 // indirect
 	github.com/asticode/go-astikit v0.20.0 // indirect
 	github.com/asticode/go-astits v1.8.0 // indirect
-	github.com/aws/aws-sdk-go-v2 v1.25.3 // indirect
+	github.com/aws/aws-sdk-go-v2 v1.30.5 // indirect
 	github.com/aws/aws-sdk-go-v2/credentials v1.17.7 // indirect
 	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.3 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.3 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.3 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 // indirect
 	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 // indirect
 	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.1 // indirect
 	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.5 // indirect
 	github.com/aws/aws-sdk-go-v2/service/sso v1.20.2 // indirect
 	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.2 // indirect
 	github.com/aws/aws-sdk-go-v2/service/sts v1.28.4 // indirect
-	github.com/aws/smithy-go v1.20.1 // indirect
+	github.com/aws/smithy-go v1.20.4 // indirect
 	github.com/bytedance/sonic v1.9.1 // indirect
 	github.com/cespare/xxhash/v2 v2.2.0 // indirect
 	github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
diff --git a/go.sum b/go.sum
index 85cb81a..aaf0e9b 100644
--- a/go.sum
+++ b/go.sum
@@ -10,6 +10,10 @@ github.com/asticode/go-astits v1.8.0 h1:rf6aiiGn/QhlFjNON1n5plqF3Fs025XLUwiQ0NB6
 github.com/asticode/go-astits v1.8.0/go.mod h1:DkOWmBNQpnr9mv24KfZjq4JawCFX1FCqjLVGvO0DygQ=
 github.com/aws/aws-sdk-go-v2 v1.25.3 h1:xYiLpZTQs1mzvz5PaI6uR0Wh57ippuEthxS4iK5v0n0=
 github.com/aws/aws-sdk-go-v2 v1.25.3/go.mod h1:35hUlJVYd+M++iLI3ALmVwMOyRYMmRqUXpTtRGW+K9I=
+github.com/aws/aws-sdk-go-v2 v1.30.5 h1:mWSRTwQAb0aLE17dSzztCVJWI9+cRMgqebndjwDyK0g=
+github.com/aws/aws-sdk-go-v2 v1.30.5/go.mod h1:CT+ZPWXbYrci8chcARI3OmI/qgd+f6WtuLOoaIA8PR0=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 h1:70PVAiL15/aBMh5LThwgXdSQorVr91L127ttckI9QQU=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4/go.mod h1:/MQxMqci8tlqDH+pjmoLu1i0tbWCUP1hhyMRuFxpQCw=
 github.com/aws/aws-sdk-go-v2/config v1.27.7 h1:JSfb5nOQF01iOgxFI5OIKWwDiEXWTyTgg1Mm1mHi0A4=
 github.com/aws/aws-sdk-go-v2/config v1.27.7/go.mod h1:PH0/cNpoMO+B04qET699o5W92Ca79fVtbUnvMIZro4I=
 github.com/aws/aws-sdk-go-v2/credentials v1.17.7 h1:WJd+ubWKoBeRh7A5iNMnxEOs982SyVKOJD+K8HIezu4=
@@ -18,10 +22,16 @@ github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.3 h1:p+y7FvkK2dxS+FEwRIDHDe/
 github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.3/go.mod h1:/fYB+FZbDlwlAiynK9KDXlzZl3ANI9JkD0Uhz5FjNT4=
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.3 h1:ifbIbHZyGl1alsAhPIYsHOg5MuApgqOvVeI8wIugXfs=
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.3/go.mod h1:oQZXg3c6SNeY6OZrDY+xHcF4VGIEoNotX2B4PrDeoJI=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 h1:pI7Bzt0BJtYA0N/JEC6B8fJ4RBrEMi1LBrkMdFYNSnQ=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17/go.mod h1:Dh5zzJYMtxfIjYW+/evjQ8uj2OyR/ve2KROHGHlSFqE=
 github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.3 h1:Qvodo9gHG9F3E8SfYOspPeBt0bjSbsevK8WhRAUHcoY=
 github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.3/go.mod h1:vCKrdLXtybdf/uQd/YfVR2r5pcbNuEYKzMQpcxmeSJw=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 h1:Mqr/V5gvrhA2gvgnF42Zh5iMiQNcOYthFYwCyrnuWlc=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17/go.mod h1:aLJpZlCmjE+V+KtN1q1uyZkfnUWpQGpbsn89XPKyzfU=
 github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 h1:hT8rVHwugYE2lEfdFE0QWVo81lF7jMrYJVDWI+f+VxU=
 github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0/go.mod h1:8tu/lYfQfFe6IGnaOdrpVgEL2IrrDOf6/m9RQum4NkY=
+github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.16.2 h1:hmzsX43PIJ8x+dwJwruqMjE2F8tZuCQMxVz9Vn0EZkc=
+github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.16.2/go.mod h1:emMKL0OTFG+l9pW11RMgfvJRxZ5e093OS1o102YEGoA=
 github.com/aws/aws-sdk-go-v2/service/comprehend v1.31.2 h1:iAnydKItgi2m2rOPFfyolvjXuZimVZgRPxGlYg6Vt5U=
 github.com/aws/aws-sdk-go-v2/service/comprehend v1.31.2/go.mod h1:4jJr/hungAbvS0vQqkZQvxBqxJ4oUSEpvezYM75q2e4=
 github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.1 h1:EyBZibRTVAs6ECHZOw5/wlylS9OcTzwyjeQMudmREjE=
@@ -36,6 +46,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.28.4 h1:Ppup1nVNAOWbBOrcoOxaxPeEnSFB
 github.com/aws/aws-sdk-go-v2/service/sts v1.28.4/go.mod h1:+K1rNPVyGxkRuv9NNiaZ4YhBFuyw2MMA9SlIJ1Zlpz8=
 github.com/aws/smithy-go v1.20.1 h1:4SZlSlMr36UEqC7XOyRVb27XMeZubNcBNN+9IgEPIQw=
 github.com/aws/smithy-go v1.20.1/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E=
+github.com/aws/smithy-go v1.20.4 h1:2HK1zBdPgRbjFOHlfeQZfpC4r72MOb9bZkiFwggKO+4=
+github.com/aws/smithy-go v1.20.4/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg=
 github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
 github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
diff --git a/internal/authenticator/authenticator.go b/internal/authenticator/authenticator.go
index 25c4bb4..005b67c 100644
--- a/internal/authenticator/authenticator.go
+++ b/internal/authenticator/authenticator.go
@@ -87,6 +87,10 @@ func rewriteHttpAuthHeader(req *http.Request, setting *provider.Setting) error {
 	}
 
 	if len(apiKey) == 0 {
+		if setting.Provider == "bedrock" {
+			return nil
+		}
+
 		return errors.New("api key is empty in provider setting")
 	}
 
diff --git a/internal/manager/provider_setting.go b/internal/manager/provider_setting.go
index 29b0e6c..5b246d1 100644
--- a/internal/manager/provider_setting.go
+++ b/internal/manager/provider_setting.go
@@ -40,7 +40,7 @@ func NewProviderSettingsManager(s ProviderSettingsStorage, cache ProviderSetting
 }
 
 func isProviderNativelySupported(provider string) bool {
-	return provider == "openai" || provider == "anthropic" || provider == "azure" || provider == "vllm" || provider == "deepinfra"
+	return provider == "openai" || provider == "anthropic" || provider == "azure" || provider == "vllm" || provider == "deepinfra" || provider == "bedrock"
 }
 
 func findMissingAuthParams(providerName string, params map[string]string) string {
@@ -55,6 +55,23 @@ func findMissingAuthParams(providerName string, params map[string]string) string
 		return strings.Join(missingFields, " ,")
 	}
 
+	if providerName == "bedrock" {
+		val := params["awsAccessKeyId"]
+		if len(val) == 0 {
+			missingFields = append(missingFields, "awsAccessKeyId")
+		}
+
+		val = params["awsSecretAccessKey"]
+		if len(val) == 0 {
+			missingFields = append(missingFields, "awsSecretAccessKey")
+		}
+
+		val = params["awsRegion"]
+		if len(val) == 0 {
+			missingFields = append(missingFields, "awsRegion")
+		}
+	}
+
 	if providerName == "azure" {
 		val := params["resourceName"]
 		if len(val) == 0 {
diff --git a/internal/message/handler.go b/internal/message/handler.go
index a7f0ee5..e216820 100644
--- a/internal/message/handler.go
+++ b/internal/message/handler.go
@@ -14,6 +14,7 @@ import (
 	"github.com/bricks-cloud/bricksllm/internal/provider/vllm"
 	"github.com/bricks-cloud/bricksllm/internal/telemetry"
 	"github.com/bricks-cloud/bricksllm/internal/user"
+	"github.com/bricks-cloud/bricksllm/internal/util"
 	"github.com/tidwall/gjson"
 	"go.uber.org/zap"
 
@@ -468,6 +469,46 @@ func (h *Handler) decorateEvent(m Message) error {
 		}
 	}
 
+	if e.Event.Path == "/api/providers/bedrock/anthropic/v1/complete" {
+		cr, ok := e.Request.(*anthropic.CompletionRequest)
+		if !ok {
+			telemetry.Incr("bricksllm.message.handler.decorate_event.event_request_parsing_error", nil, 1)
+			h.log.Debug("event contains request that cannot be converted to anthropic completion request", zap.Any("data", m.Data))
+			return errors.New("event request data cannot be parsed as anthropic completion request")
+		}
+
+		if !cr.Stream {
+			tks := h.ae.Count(cr.Prompt)
+			tks += anthropicPromptMagicNum
+
+			model := cr.Model
+
+			translatedModel := util.TranslateBedrockModelToAnthropicModel(model)
+			cost, err := h.ae.EstimatePromptCost(translatedModel, tks)
+			if err != nil {
+				telemetry.Incr("bricksllm.message.handler.decorate_event.estimate_prompt_cost", nil, 1)
+				h.log.Debug("event contains request that cannot be converted to anthropic completion request", zap.Error(err))
+				return err
+			}
+
+			completiontks := h.ae.Count(e.Content)
+			completiontks += anthropicCompletionMagicNum
+
+			completionCost, err := h.ae.EstimateCompletionCost(translatedModel, completiontks)
+			if err != nil {
+				telemetry.Incr("bricksllm.message.handler.decorate_event.estimate_completion_cost_error", nil, 1)
+				return err
+			}
+
+			e.Event.PromptTokenCount = tks
+
+			e.Event.CompletionTokenCount = completiontks
+			if e.Event.Status == http.StatusOK {
+				e.Event.CostInUsd = completionCost + cost
+			}
+		}
+	}
+
 	if strings.HasPrefix(e.Event.Path, "/api/providers/azure/openai/deployments") && strings.HasSuffix(e.Event.Path, "/chat/completions") {
 		ccr, ok := e.Request.(*goopenai.ChatCompletionRequest)
 		if !ok {
diff --git a/internal/provider/anthropic/bedrock.go b/internal/provider/anthropic/bedrock.go
new file mode 100644
index 0000000..d92929e
--- /dev/null
+++ b/internal/provider/anthropic/bedrock.go
@@ -0,0 +1,44 @@
+package anthropic
+
+type BedrockCompletionRequest struct {
+	Prompt            string   `json:"prompt"`
+	MaxTokensToSample int      `json:"max_tokens_to_sample"`
+	StopSequences     []string `json:"stop_sequences,omitempty"`
+	Temperature       float32  `json:"temperature,omitempty"`
+	TopP              int      `json:"top_p,omitempty"`
+	TopK              int      `json:"top_k,omitempty"`
+}
+
+type BedrockCompletionResponse struct {
+	Completion string          `json:"completion"`
+	StopReason string          `json:"stop_reason"`
+	Model      string          `json:"model"`
+	Metrics    *BedrockMetrics `json:"amazon-bedrock-invocationMetrics"`
+}
+
+type BedrockMessageRequest struct {
+	AnthropicVersion string    `json:"anthropic_version"`
+	Messages         []Message `json:"messages"`
+	MaxTokens        int       `json:"max_tokens"`
+	StopSequences    []string  `json:"stop_sequences,omitempty"`
+	Temperature      float32   `json:"temperature,omitempty"`
+	TopP             int       `json:"top_p,omitempty"`
+	TopK             int       `json:"top_k,omitempty"`
+	Metadata         *Metadata `json:"metadata,omitempty"`
+}
+
+type BedrockMessagesStopResponse struct {
+	Type    string          `json:"type"`
+	Metrics *BedrockMetrics `json:"amazon-bedrock-invocationMetrics"`
+}
+
+type BedrockMetrics struct {
+	InputTokenCount   int `json:"inputTokenCount"`
+	OutputTokenCount  int `json:"outputTokenCount"`
+	InvocationLatency int `json:"invocationLatency"`
+	FirstByteLatency  int `json:"firstByteLatency"`
+}
+
+type BedrockMessageType struct {
+	Type string `json:"type"`
+}
diff --git a/internal/server/web/proxy/anthropic.go b/internal/server/web/proxy/anthropic.go
index 97e79ed..9c4c902 100644
--- a/internal/server/web/proxy/anthropic.go
+++ b/internal/server/web/proxy/anthropic.go
@@ -386,11 +386,6 @@ func getMessagesHandler(prod, private bool, client http.Client, e anthropicEstim
 					telemetry.Incr("bricksllm.proxy.get_messages_handler.estimate_total_cost_error", nil, 1)
 					logError(log, "error when estimating anthropic cost", prod, err)
 				}
-
-				if err != nil {
-					telemetry.Incr("bricksllm.proxy.get_messages_handler.record_key_spend_error", nil, 1)
-					logError(log, "error when recording anthropic spend", prod, err)
-				}
 			}
 
 			c.Set("costInUsd", cost)
@@ -402,7 +397,7 @@ func getMessagesHandler(prod, private bool, client http.Client, e anthropicEstim
 		}
 
 		if res.StatusCode != http.StatusOK {
-			dur := time.Now().Sub(start)
+			dur := time.Since(start)
 			telemetry.Timing("bricksllm.proxy.get_messages_handler.error_latency", dur, nil, 1)
 			telemetry.Incr("bricksllm.proxy.get_messages_handler.error_response", nil, 1)
 			bytes, err := io.ReadAll(res.Body)
diff --git a/internal/server/web/proxy/bedrock.go b/internal/server/web/proxy/bedrock.go
new file mode 100644
index 0000000..37d9669
--- /dev/null
+++ b/internal/server/web/proxy/bedrock.go
@@ -0,0 +1,513 @@
+package proxy
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"time"
+
+	"github.com/aws/aws-sdk-go-v2/aws"
+	"github.com/aws/aws-sdk-go-v2/config"
+	"github.com/aws/aws-sdk-go-v2/credentials"
+	"github.com/aws/aws-sdk-go-v2/service/bedrockruntime"
+	"github.com/aws/aws-sdk-go-v2/service/bedrockruntime/types"
+	"github.com/bricks-cloud/bricksllm/internal/provider/anthropic"
+	"github.com/bricks-cloud/bricksllm/internal/telemetry"
+	"github.com/bricks-cloud/bricksllm/internal/util"
+	"github.com/gin-gonic/gin"
+	"go.uber.org/zap"
+	"go.uber.org/zap/zapcore"
+)
+
+func getBedrockCompletionHandler(prod bool, e anthropicEstimator, timeOut time.Duration) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		log := util.GetLogFromCtx(c)
+		telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.requests", nil, 1)
+
+		if c == nil || c.Request == nil {
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty")
+			return
+		}
+
+		body, err := io.ReadAll(c.Request.Body)
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.read_all_error", nil, 1)
+			log.Error("error when reading claude req data from body", []zapcore.Field{zap.Error(err)}...)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read claude req data from body")
+			return
+		}
+
+		anthropicReq := &anthropic.CompletionRequest{}
+		err = json.Unmarshal(body, anthropicReq)
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.unmarshal_anthropic_completion_request_error", nil, 1)
+			log.Error("error when unmarshalling anthropic completion request", []zapcore.Field{zap.Error(err)}...)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to unmarshal anthropic completion request")
+			return
+		}
+
+		req := &anthropic.BedrockCompletionRequest{}
+		err = json.Unmarshal(body, req)
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.unmarshal_bedrock_completion_request_error", nil, 1)
+			log.Error("error when unmarshalling bedrock completion request", []zapcore.Field{zap.Error(err)}...)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to unmarshal bedrock completion request")
+			return
+		}
+
+		bs, err := json.Marshal(req)
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.marshal_bedrock_completion_request_error", nil, 1)
+			log.Error("error when marshalling bedrock completion request", []zapcore.Field{zap.Error(err)}...)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to marshal bedrock completion request")
+			return
+		}
+
+		keyId := c.GetString("awsAccessKeyId")
+		secretKey := c.GetString("awsSecretAccessKey")
+		region := c.GetString("awsRegion")
+
+		if len(keyId) == 0 || len(secretKey) == 0 || len(region) == 0 {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.auth_error", nil, 1)
+			log.Error("key id, secret key or region is missing", []zapcore.Field{zap.Error(err)}...)
+			JSON(c, http.StatusUnauthorized, "[BricksLLM] auth credentials are missing")
+			return
+		}
+
+		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		defer cancel()
+		cfg, err := config.LoadDefaultConfig(ctx,
+			config.WithCredentialsProvider(credentials.StaticCredentialsProvider{
+				Value: aws.Credentials{
+					AccessKeyID: keyId, SecretAccessKey: secretKey,
+					Source: "BricksLLM Credentials",
+				},
+			}),
+			config.WithRegion(region))
+
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.aws_config_creation_error", nil, 1)
+			log.Error("error when creating aws config", []zapcore.Field{zap.Error(err)}...)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create aws config")
+			return
+		}
+
+		client := bedrockruntime.NewFromConfig(cfg)
+		stream := c.GetBool("stream")
+
+		ctx, cancel = context.WithTimeout(context.Background(), timeOut)
+		defer cancel()
+
+		start := time.Now()
+
+		if !stream {
+			output, err := client.InvokeModel(ctx, &bedrockruntime.InvokeModelInput{
+				ModelId:     &anthropicReq.Model,
+				ContentType: aws.String("application/json"),
+				Body:        bs,
+			})
+
+			if err != nil {
+				telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.error_response", nil, 1)
+				telemetry.Timing("bricksllm.proxy.get_bedrock_completion_handler.error_latency", time.Since(start), nil, 1)
+
+				log.Error("error when invoking bedrock model", []zapcore.Field{zap.Error(err)}...)
+				JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to invoke bedrock model")
+				return
+			}
+
+			completionRes := &anthropic.BedrockCompletionResponse{}
+			err = json.Unmarshal(output.Body, completionRes)
+			if err != nil {
+				telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.unmarshal_bedrock_completion_response_error", nil, 1)
+				logError(log, "error when unmarshalling bedrock anthropic completion response body", prod, err)
+			}
+
+			telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.success", nil, 1)
+			telemetry.Timing("bricksllm.proxy.get_bedrock_completion_handler.success_latency", time.Since(start), nil, 1)
+
+			c.Set("content", completionRes.Completion)
+
+			c.Data(http.StatusOK, "application/json", output.Body)
+			return
+		}
+
+		telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.streaming_requests", nil, 1)
+
+		streamOutput, err := client.InvokeModelWithResponseStream(ctx, &bedrockruntime.InvokeModelWithResponseStreamInput{
+			ModelId:     &anthropicReq.Model,
+			ContentType: aws.String("application/json"),
+			Body:        bs,
+		})
+
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.invoking_model_with_streaming_response_error", nil, 1)
+			log.Error("error when invoking bedrock model with streaming responses", []zapcore.Field{zap.Error(err)}...)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to invoke bedrock model with stream response")
+			return
+		}
+
+		streamingResponse := [][]byte{}
+		promptTokenCount := 0
+		completionTokenCount := 0
+
+		defer func() {
+			model := c.GetString("model")
+			translatedModel := util.TranslateBedrockModelToAnthropicModel(model)
+			compeltionCost, err := e.EstimateCompletionCost(translatedModel, completionTokenCount)
+			if err != nil {
+				telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.estimate_completion_cost_error", nil, 1)
+				logError(log, "error when estimating bedrock completion cost", prod, err)
+			}
+
+			promptCost, err := e.EstimatePromptCost(translatedModel, promptTokenCount)
+			if err != nil {
+				telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.estimate_prompt_cost_error", nil, 1)
+				logError(log, "error when estimating bedrock prompt cost", prod, err)
+			}
+
+			c.Set("costInUsd", compeltionCost+promptCost)
+			c.Set("promptTokenCount", promptTokenCount)
+			c.Set("completionTokenCount", completionTokenCount)
+			c.Set("streaming_response", bytes.Join(streamingResponse, []byte{'\n'}))
+		}()
+
+		eventName := ""
+		c.Stream(func(w io.Writer) bool {
+			for event := range streamOutput.GetStream().Events() {
+				switch v := event.(type) {
+				case *types.ResponseStreamMemberChunk:
+					raw := v.Value.Bytes
+					noSpaceLine := bytes.TrimSpace(raw)
+					if len(noSpaceLine) == 0 {
+						return true
+					}
+
+					eventName = getEventNameFromLine(noSpaceLine)
+					if len(eventName) == 0 {
+						return true
+					}
+
+					chatCompletionResp := &anthropic.BedrockCompletionResponse{}
+					if eventName == " completion" {
+						err := json.NewDecoder(bytes.NewReader(noSpaceLine)).Decode(&chatCompletionResp)
+						if err != nil {
+							telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.bedrock_completion_stream_response_unmarshall_error", nil, 1)
+							log.Error("error when unmarshalling bedrock streaming response chunks", []zapcore.Field{zap.Error(err)}...)
+							return false
+						}
+
+						if chatCompletionResp.Metrics != nil {
+							promptTokenCount = chatCompletionResp.Metrics.InputTokenCount
+							completionTokenCount = chatCompletionResp.Metrics.OutputTokenCount
+						}
+					}
+
+					noPrefixLine := bytes.TrimPrefix(noSpaceLine, headerData)
+					c.SSEvent(eventName, " "+string(noPrefixLine))
+
+					streamingResponse = append(streamingResponse, raw)
+					if len(chatCompletionResp.StopReason) != 0 {
+						return false
+					}
+				default:
+					telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.bedrock_completion_stream_response_unkown_error", nil, 1)
+					return false
+				}
+			}
+
+			telemetry.Timing("bricksllm.proxy.get_bedrock_completion_handler.streaming_latency", time.Since(start), nil, 1)
+			return false
+		})
+	}
+}
+
+var (
+	bedrockEventMessageStart      = []byte(`{"type":"message_start"`)
+	bedrockEventMessageDelta      = []byte(`{"type":"message_delta"`)
+	bedrockEventMessageStop       = []byte(`{"type":"message_stop"`)
+	bedrockEventContentBlockStart = []byte(`{"type":"content_block_start"`)
+	bedrockEventContentBlockDelta = []byte(`{"type":"content_block_delta"`)
+	bedrockEventContentBlockStop  = []byte(`{"type":"content_block_stop"`)
+	bedrockEventPing              = []byte(`{"type":"ping"`)
+	bedrockEventError             = []byte(`{"type":"error"`)
+	bedrockEventCompletion        = []byte(`{"type":"completion"`)
+)
+
+func getEventNameFromLine(line []byte) string {
+	if bytes.HasPrefix(line, bedrockEventMessageStart) {
+		return " message_start"
+	}
+
+	if bytes.HasPrefix(line, bedrockEventMessageDelta) {
+		return " message_delta"
+	}
+
+	if bytes.HasPrefix(line, bedrockEventMessageStop) {
+		return " message_stop"
+	}
+
+	if bytes.HasPrefix(line, bedrockEventContentBlockStart) {
+		return " content_block_start"
+	}
+
+	if bytes.HasPrefix(line, bedrockEventContentBlockDelta) {
+		return " content_block_delta"
+	}
+
+	if bytes.HasPrefix(line, bedrockEventContentBlockStop) {
+		return " content_block_stop"
+	}
+
+	if bytes.HasPrefix(line, bedrockEventPing) {
+		return " ping"
+	}
+
+	if bytes.HasPrefix(line, bedrockEventError) {
+		return " error"
+	}
+
+	if bytes.HasPrefix(line, bedrockEventCompletion) {
+		return " completion"
+	}
+
+	return ""
+}
+
+func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Duration) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		log := util.GetLogFromCtx(c)
+		telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.requests", nil, 1)
+
+		if c == nil || c.Request == nil {
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty")
+			return
+		}
+
+		body, err := io.ReadAll(c.Request.Body)
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.read_all_error", nil, 1)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read claude req data from body")
+			return
+		}
+
+		anthropicReq := &anthropic.MessagesRequest{}
+		err = json.Unmarshal(body, anthropicReq)
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.unmarshal_anthropic_messages_request_error", nil, 1)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to unmarshal anthropic messages request")
+			return
+		}
+
+		req := &anthropic.BedrockMessageRequest{}
+		err = json.Unmarshal(body, req)
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.unmarshal_bedrock_messages_request_error", nil, 1)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to unmarshal bedrock messages request")
+			return
+		}
+
+		bs, err := json.Marshal(req)
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.marshal_error", nil, 1)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to marshal bedrock messages request")
+			return
+		}
+
+		keyId := c.GetString("awsAccessKeyId")
+		secretKey := c.GetString("awsSecretAccessKey")
+		region := c.GetString("awsRegion")
+
+		if len(keyId) == 0 || len(secretKey) == 0 || len(region) == 0 {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.auth_error", nil, 1)
+			log.Error("key id, secret key or region is missing", []zapcore.Field{zap.Error(err)}...)
+			JSON(c, http.StatusUnauthorized, "[BricksLLM] auth credentials are missing")
+			return
+		}
+
+		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		defer cancel()
+		cfg, err := config.LoadDefaultConfig(ctx,
+			config.WithCredentialsProvider(credentials.StaticCredentialsProvider{
+				Value: aws.Credentials{
+					AccessKeyID: keyId, SecretAccessKey: secretKey,
+					Source: "BricksLLM Credentials",
+				},
+			}),
+			config.WithRegion(region))
+
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.aws_config_creation_error", nil, 1)
+			log.Error("error when creating aws config", []zapcore.Field{zap.Error(err)}...)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create aws config")
+			return
+		}
+
+		client := bedrockruntime.NewFromConfig(cfg)
+		stream := c.GetBool("stream")
+
+		ctx, cancel = context.WithTimeout(context.Background(), timeOut)
+		defer cancel()
+
+		start := time.Now()
+
+		if !stream {
+			output, err := client.InvokeModel(ctx, &bedrockruntime.InvokeModelInput{
+				ModelId:     &anthropicReq.Model,
+				ContentType: aws.String("application/json"),
+				Body:        bs,
+			})
+
+			if err != nil {
+				telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.error_response", nil, 1)
+				telemetry.Timing("bricksllm.proxy.get_bedrock_messages_handler.error_latency", time.Since(start), nil, 1)
+
+				log.Error("error when invoking bedrock model", []zapcore.Field{zap.Error(err)}...)
+				JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to invoke bedrock model")
+				return
+			}
+
+			var cost float64 = 0
+			completionTokens := 0
+			promptTokens := 0
+
+			messagesRes := &anthropic.MessagesResponse{}
+			telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.success", nil, 1)
+			telemetry.Timing("bricksllm.proxy.get_bedrock_messages_handler.success_latency", time.Since(start), nil, 1)
+
+			err = json.Unmarshal(output.Body, messagesRes)
+			if err != nil {
+				telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.unmarshal_bedrock_messages_response_error", nil, 1)
+				logError(log, "error when unmarshalling bedrock messages response body", prod, err)
+			}
+
+			if err == nil {
+				completionTokens = messagesRes.Usage.OutputTokens
+				promptTokens = messagesRes.Usage.InputTokens
+
+				model := c.GetString("model")
+				translated := util.TranslateBedrockModelToAnthropicModel(model)
+
+				cost, err = e.EstimateTotalCost(translated, promptTokens, completionTokens)
+				if err != nil {
+					telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.estimate_total_cost_error", nil, 1)
+					logError(log, "error when estimating anthropic cost", prod, err)
+				}
+			}
+
+			c.Set("costInUsd", cost)
+			c.Set("promptTokenCount", promptTokens)
+			c.Set("completionTokenCount", completionTokens)
+
+			c.Data(http.StatusOK, "application/json", output.Body)
+			return
+		}
+
+		telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.streaming_requests", nil, 1)
+
+		streamOutput, err := client.InvokeModelWithResponseStream(ctx, &bedrockruntime.InvokeModelWithResponseStreamInput{
+			ModelId:     &anthropicReq.Model,
+			ContentType: aws.String("application/json"),
+			Body:        bs,
+		})
+
+		if err != nil {
+			telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.invoking_model_with_streaming_response_error", nil, 1)
+
+			log.Error("error when invoking bedrock model with streaming responses", []zapcore.Field{zap.Error(err)}...)
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to invoke model request with stream response")
+			return
+		}
+
+		streamingResponse := [][]byte{}
+		promptTokenCount := 0
+		completionTokenCount := 0
+
+		defer func() {
+			model := c.GetString("model")
+			translatedModel := util.TranslateBedrockModelToAnthropicModel(model)
+			compeltionCost, err := e.EstimateCompletionCost(translatedModel, completionTokenCount)
+			if err != nil {
+				telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.estimate_completion_cost_error", nil, 1)
+				logError(log, "error when estimating bedrock completion cost", prod, err)
+			}
+
+			promptCost, err := e.EstimatePromptCost(translatedModel, promptTokenCount)
+			if err != nil {
+				telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.estimate_prompt_cost_error", nil, 1)
+				logError(log, "error when estimating bedrock prompt cost", prod, err)
+			}
+
+			c.Set("costInUsd", compeltionCost+promptCost)
+			c.Set("promptTokenCount", promptTokenCount)
+			c.Set("completionTokenCount", completionTokenCount)
+			c.Set("streaming_response", bytes.Join(streamingResponse, []byte{'\n'}))
+		}()
+
+		eventName := ""
+		c.Stream(func(w io.Writer) bool {
+			content := ""
+			for event := range streamOutput.GetStream().Events() {
+				switch v := event.(type) {
+				case *types.ResponseStreamMemberChunk:
+					raw := v.Value.Bytes
+					streamingResponse = append(streamingResponse, raw)
+
+					noSpaceLine := bytes.TrimSpace(raw)
+					if len(noSpaceLine) == 0 {
+						return true
+					}
+
+					eventName = getEventNameFromLine(noSpaceLine)
+					if len(eventName) == 0 {
+						return true
+					}
+
+					if eventName == " message_stop" {
+						stopResp := &anthropic.BedrockMessagesStopResponse{}
+						err := json.NewDecoder(bytes.NewReader(raw)).Decode(&stopResp)
+						if err != nil {
+							telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.bedrock_messages_stop_response_unmarshall_error", nil, 1)
+							log.Error("error when unmarshalling bedrock messages stop response response chunks", []zapcore.Field{zap.Error(err)}...)
+
+							return false
+						}
+
+						if stopResp.Metrics != nil {
+							promptTokenCount = stopResp.Metrics.InputTokenCount
+							completionTokenCount = stopResp.Metrics.OutputTokenCount
+						}
+					}
+
+					if eventName == " content_block_delta" {
+						chatCompletionResp := &anthropic.MessagesStreamBlockDelta{}
+						err := json.NewDecoder(bytes.NewReader(raw)).Decode(&chatCompletionResp)
+						if err != nil {
+							telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.bedrock_messages_content_block_response_unmarshall_error", nil, 1)
+							log.Error("error when unmarshalling bedrock messages content block response chunks", []zapcore.Field{zap.Error(err)}...)
+
+							return false
+						}
+
+						content += chatCompletionResp.Delta.Text
+					}
+
+					c.SSEvent(eventName, " "+string(noSpaceLine))
+
+					if eventName == " message_stop" {
+						return false
+					}
+				default:
+
+					telemetry.Timing("bricksllm.proxy.get_bedrock_messages_handler.streaming_latency", time.Since(start), nil, 1)
+					return false
+				}
+			}
+
+			telemetry.Timing("bricksllm.proxy.get_bedrock_messages_handler.streaming_latency", time.Since(start), nil, 1)
+			return false
+		})
+	}
+}
diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go
index a389b7e..30b9eb0 100644
--- a/internal/server/web/proxy/middleware.go
+++ b/internal/server/web/proxy/middleware.go
@@ -348,6 +348,20 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag
 				}
 			}
 
+			if strings.HasPrefix(c.FullPath(), "/api/providers/bedrock/anthropic") {
+				if selected != nil && len(selected.Setting["awsAccessKeyId"]) != 0 {
+					c.Set("awsAccessKeyId", selected.Setting["awsAccessKeyId"])
+				}
+
+				if selected != nil && len(selected.Setting["awsSecretAccessKey"]) != 0 {
+					c.Set("awsSecretAccessKey", selected.Setting["awsSecretAccessKey"])
+				}
+
+				if selected != nil && len(selected.Setting["awsRegion"]) != 0 {
+					c.Set("awsRegion", selected.Setting["awsRegion"])
+				}
+			}
+
 			if strings.HasPrefix(c.FullPath(), "/api/providers/vllm") {
 				if selected != nil && len(selected.Setting["url"]) != 0 {
 					c.Set("vllmUrl", selected.Setting["url"])
@@ -402,6 +416,54 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag
 			policyInput = cr
 		}
 
+		if c.FullPath() == "/api/providers/bedrock/anthropic/v1/complete" {
+			logCompletionRequest(logWithCid, body, prod, private)
+
+			cr := &anthropic.CompletionRequest{}
+			err = json.Unmarshal(body, cr)
+			if err != nil {
+				logError(logWithCid, "error when unmarshalling bedrock anthropic completion request", prod, err)
+				return
+			}
+
+			if cr.Metadata != nil {
+				userId = cr.Metadata.UserId
+			}
+
+			enrichedEvent.Request = cr
+
+			if cr.Stream {
+				c.Set("stream", cr.Stream)
+			}
+
+			c.Set("model", cr.Model)
+
+			policyInput = cr
+		}
+
+		if c.FullPath() == "/api/providers/bedrock/anthropic/v1/messages" {
+			logCreateMessageRequest(logWithCid, body, prod, private)
+
+			mr := &anthropic.MessagesRequest{}
+			err = json.Unmarshal(body, mr)
+			if err != nil {
+				logError(logWithCid, "error when unmarshalling anthropic messages request", prod, err)
+				return
+			}
+
+			if mr.Metadata != nil {
+				userId = mr.Metadata.UserId
+			}
+
+			if mr.Stream {
+				c.Set("stream", mr.Stream)
+			}
+
+			c.Set("model", mr.Model)
+
+			policyInput = mr
+		}
+
 		if c.FullPath() == "/api/providers/anthropic/v1/messages" {
 			logCreateMessageRequest(logWithCid, body, prod, private)
 
diff --git a/internal/server/web/proxy/proxy.go b/internal/server/web/proxy/proxy.go
index 5a92dc4..c7d9641 100644
--- a/internal/server/web/proxy/proxy.go
+++ b/internal/server/web/proxy/proxy.go
@@ -181,6 +181,10 @@ func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyMan
 	router.POST("/api/providers/anthropic/v1/complete", getCompletionHandler(prod, private, client, timeOut))
 	router.POST("/api/providers/anthropic/v1/messages", getMessagesHandler(prod, private, client, ae, timeOut))
 
+	// bedrock anthropic
+	router.POST("/api/providers/bedrock/anthropic/v1/complete", getBedrockCompletionHandler(prod, ae, timeOut))
+	router.POST("/api/providers/bedrock/anthropic/v1/messages", getBedrockMessagesHandler(prod, ae, timeOut))
+
 	// vllm
 	router.POST("/api/providers/vllm/v1/chat/completions", getVllmChatCompletionsHandler(prod, private, client, timeOut))
 	router.POST("/api/providers/vllm/v1/completions", getVllmCompletionsHandler(prod, private, client, timeOut))
@@ -992,6 +996,10 @@ func (ps *ProxyServer) Run() {
 		ps.log.Info("PORT 8002 | POST   | /api/providers/anthropic/v1/complete is ready for forwarding completion requests to anthropic")
 		ps.log.Info("PORT 8002 | POST   | /api/providers/anthropic/v1/messages is ready for forwarding message requests to anthropic")
 
+		// bedrock anthropic
+		ps.log.Info("PORT 8002 | POST   | /api/providers/bedrock/anthropic/v1/complete is ready for forwarding completion requests to bedrock anthropic")
+		ps.log.Info("PORT 8002 | POST   | /api/providers/bedrock/anthropic/v1/messages is ready for forwarding message requests to bedrock anthropic")
+
 		// vllm
 		ps.log.Info("PORT 8002 | POST   | /api/providers/vllm/v1/chat/completions is ready for forwarding vllm chat completions requests")
 		ps.log.Info("PORT 8002 | POST   | /api/providers/vllm/v1/completions is ready for forwarding vllm completions requests")
diff --git a/internal/util/util.go b/internal/util/util.go
index ae5d353..2ee40d5 100644
--- a/internal/util/util.go
+++ b/internal/util/util.go
@@ -3,6 +3,7 @@ package util
 import (
 	"context"
 	"errors"
+	"strings"
 
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
@@ -50,3 +51,31 @@ func ConvertAnyToStr(input any) (string, error) {
 
 	return converted, nil
 }
+
+func TranslateBedrockModelToAnthropicModel(model string) string {
+	if strings.HasPrefix(model, "anthropic.claude-v2") {
+		return "claude"
+	}
+
+	if strings.HasPrefix(model, "anthropic.claude-3-haiku") {
+		return "claude-3-haiku"
+	}
+
+	if strings.HasPrefix(model, "anthropic.claude-3-sonnet") {
+		return "claude-3-sonnet"
+	}
+
+	if strings.HasPrefix(model, "anthropic.claude-3-opus") {
+		return "claude-3-opus"
+	}
+
+	if strings.HasPrefix(model, "anthropic.claude-3-5-sonnet") {
+		return "claude-3.5-sonnet"
+	}
+
+	if strings.HasPrefix(model, "anthropic.claude-instant") {
+		return "claude-instant"
+	}
+
+	return model
+}

From 9f42b412bdafeecf87f6dcb976962c55eb3e8cdc Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sun, 8 Sep 2024 22:32:16 -0700
Subject: [PATCH 11/51] update doc

---
 docs/admin.yaml | 14 ++++++++++--
 docs/proxy.yaml | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/docs/admin.yaml b/docs/admin.yaml
index 08ee479..a022938 100644
--- a/docs/admin.yaml
+++ b/docs/admin.yaml
@@ -1423,8 +1423,6 @@ components:
       type: object
       description: API Credentials associated with different providers.
       example: { "apikey": "MY_OPENAI_API_KEY" }
-      required:
-        - apikey
       properties:
         apikey:
           type: string
@@ -1438,6 +1436,18 @@ components:
           type: string
           example: MY_AZURE_OPENAI_RESOURCE_NAME
           description: Required for Azure OpenAI integrations.
+        awsAccessKeyId:
+          type: string
+          example: MY_AWS_ACCESS_KEY_ID
+          description: Required for Bedrock Anthropic integrations.
+        awsSecretAccessKey:
+          type: string
+          example: MY_AWS_SECRET_ACCESS_KEY
+          description: Required for Bedrock Anthropic integrations.
+        awsRegion:
+          type: string
+          example: MY_AWS_REGION
+          description: Required for Bedrock Anthropic integrations.
 
     ReportingEventsRequest:
       type: object
diff --git a/docs/proxy.yaml b/docs/proxy.yaml
index 1e07fdb..a0613ac 100644
--- a/docs/proxy.yaml
+++ b/docs/proxy.yaml
@@ -14,6 +14,7 @@ tags:
   - name: DeepInfra
   - name: vLLM
   - name: Anthropic
+  - name: Bedrock
   - name: Azure
   - name: Custom Providers
   - name: Route
@@ -1168,6 +1169,16 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: Content-Type
+          schema:
+            type: string
+          description: Content type of the request.
+        - in: header
+          name: anthropic-version
+          schema:
+            type: string
+          description: Anthropic version.
       tags:
         - Anthropic
       summary: Create Anthropic completion
@@ -1186,11 +1197,57 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: Content-Type
+          schema:
+            type: string
+          description: Content type of the request.
+        - in: header
+          name: anthropic-version
+          schema:
+            type: string
+          description: Anthropic version.
       tags:
         - Anthropic
       summary: Create Anthropic messages
       description: This endpoint is set up for proxying Anthropic messages requests. Documentation for this endpoint can be found [here](https://docs.anthropic.com/claude/reference/messages_post).
 
+  /api/providers/bedrock/anthropic/v1/complete:
+    post:
+      parameters:
+        - in: header
+          name: X-CUSTOM-EVENT-ID
+          schema:
+            type: string
+          description: Custom Id that can be used to retrieve an event associated with each proxy request.
+        - in: header
+          name: X-METADATA
+          schema:
+            type: string
+          description: Metadata in stringified JSON format.
+      tags:
+        - Bedrock
+      summary: Create Bedrock Anthropic completion
+      description: This endpoint is set up for proxying Bedrock Anthropic completion requests. Documentation for this endpoint can be found [here](https://docs.anthropic.com/claude/reference/complete_post).
+
+  /api/providers/bedrock/anthropic/v1/messages:
+    post:
+      parameters:
+        - in: header
+          name: X-CUSTOM-EVENT-ID
+          schema:
+            type: string
+          description: Custom Id that can be used to retrieve an event associated with each proxy request.
+        - in: header
+          name: Content-Type
+          schema:
+            type: string
+          description: Content type of the request.
+      tags:
+        - Bedrock
+      summary: Creat Bedrock Anthropic messages
+      description: This endpoint is set up for proxying Bedrock Anthropic messages requests. Documentation for this endpoint can be found [here](https://docs.anthropic.com/claude/reference/messages_post). Request body must include an additional field called `anthropic-version``. 
+
   /api/providers/vllm/v1/chat/completions:
     post:
       parameters:

From 5dc1e681d75a7691f3b1522f9b023af1f5dfbc0d Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Wed, 11 Sep 2024 18:21:23 -0700
Subject: [PATCH 12/51] fix provider selection issue

---
 internal/authenticator/authenticator.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/internal/authenticator/authenticator.go b/internal/authenticator/authenticator.go
index 005b67c..38c53b8 100644
--- a/internal/authenticator/authenticator.go
+++ b/internal/authenticator/authenticator.go
@@ -156,6 +156,10 @@ func (a *Authenticator) getProviderSettingsThatCanAccessCustomRoute(path string,
 }
 
 func canAccessPath(provider string, path string) bool {
+	if provider == "bedrock" && !strings.HasPrefix(path, "/api/providers/bedrock") {
+		return false
+	}
+
 	if provider == "openai" && !strings.HasPrefix(path, "/api/providers/openai") {
 		return false
 	}

From 8682f5e50a1712c5cce727f50feafa6f516bac1d Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Wed, 11 Sep 2024 18:25:26 -0700
Subject: [PATCH 13/51] update CHANGELOG

---
 CHANGELOG.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 802f541..ab2a2ee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,8 @@
-## 1.36.0 - 2024-08-10
+## 1.36.1 - 2024-09-10
+### Fixed
+- Fixed provider selection issue when a key is associated with multiple providers
+
+## 1.36.0 - 2024-09-09
 ### Added
 - Added Amazon Bedrock integration for Claude models
 

From 0750ef86da6b7f75aa08234a81092f35ffb8eef3 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Fri, 13 Sep 2024 08:30:13 -0700
Subject: [PATCH 14/51] fixed compatibility issues

---
 CHANGELOG.md                         |  4 ++++
 internal/server/web/proxy/bedrock.go | 10 +++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab2a2ee..f5cf5df 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 1.36.2 - 2024-09-13
+### Fixed
+- Fixed compatibility issues between Anthropic SDK and AWS Bedrock
+  
 ## 1.36.1 - 2024-09-10
 ### Fixed
 - Fixed provider selection issue when a key is associated with multiple providers
diff --git a/internal/server/web/proxy/bedrock.go b/internal/server/web/proxy/bedrock.go
index 37d9669..83ff891 100644
--- a/internal/server/web/proxy/bedrock.go
+++ b/internal/server/web/proxy/bedrock.go
@@ -21,6 +21,12 @@ import (
 	"go.uber.org/zap/zapcore"
 )
 
+func setAnthropicVersionIfExists(version string, req *anthropic.BedrockMessageRequest) {
+	if req != nil && len(version) > 0 {
+		req.AnthropicVersion = version
+	}
+}
+
 func getBedrockCompletionHandler(prod bool, e anthropicEstimator, timeOut time.Duration) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
@@ -309,6 +315,8 @@ func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Dur
 			return
 		}
 
+		setAnthropicVersionIfExists(c.GetHeader("anthropic-version"), req)
+
 		bs, err := json.Marshal(req)
 		if err != nil {
 			telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.marshal_error", nil, 1)
@@ -410,6 +418,7 @@ func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Dur
 		streamOutput, err := client.InvokeModelWithResponseStream(ctx, &bedrockruntime.InvokeModelWithResponseStreamInput{
 			ModelId:     &anthropicReq.Model,
 			ContentType: aws.String("application/json"),
+			Accept:      aws.String("application/json"),
 			Body:        bs,
 		})
 
@@ -500,7 +509,6 @@ func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Dur
 						return false
 					}
 				default:
-
 					telemetry.Timing("bricksllm.proxy.get_bedrock_messages_handler.streaming_latency", time.Since(start), nil, 1)
 					return false
 				}

From aa6321c106f392b0810199da8b9134829ec58839 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Mon, 16 Sep 2024 15:58:23 -0700
Subject: [PATCH 15/51] add support for openai o1

---
 internal/provider/openai/cost.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go
index adcdd73..152d310 100644
--- a/internal/provider/openai/cost.go
+++ b/internal/provider/openai/cost.go
@@ -34,6 +34,8 @@ func parseFinetuneModel(model string) string {
 
 var OpenAiPerThousandTokenCost = map[string]map[string]float64{
 	"prompt": {
+		"o1-preview":                  0.015,
+		"o1-preview-2024-09-12":       0.015,
 		"gpt-4o":                      0.005,
 		"gpt-4o-mini":                 0.00015,
 		"gpt-4o-mini-2024-07-18":      0.00015,
@@ -96,6 +98,8 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
 		"tts-1-hd":  0.03,
 	},
 	"completion": {
+		"o1-preview":                  0.06,
+		"o1-preview-2024-09-12":       0.06,
 		"gpt-3.5-turbo-1106":          0.002,
 		"gpt-4o":                      0.015,
 		"gpt-4o-mini":                 0.0006,

From 805ef731a3e63130cb9d704919c652ce7f0caa92 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Mon, 16 Sep 2024 16:00:33 -0700
Subject: [PATCH 16/51] update CHANGElOG

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f5cf5df..7b21719 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 1.36.3 - 2024-09-16
+### Added
+- Added support for OpenAI o1 models
+
 ## 1.36.2 - 2024-09-13
 ### Fixed
 - Fixed compatibility issues between Anthropic SDK and AWS Bedrock

From 0976748f354874c48ac6f126862be705090072b7 Mon Sep 17 00:00:00 2001
From: Phuong Le <phuonghoangle@gmail.com>
Date: Thu, 10 Oct 2024 18:09:22 +0100
Subject: [PATCH 17/51] remove redundant gpt-4o entries from supported models

---
 internal/manager/route.go | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/internal/manager/route.go b/internal/manager/route.go
index e499a5d..cb5739e 100644
--- a/internal/manager/route.go
+++ b/internal/manager/route.go
@@ -115,8 +115,6 @@ var (
 		"gpt-4-32k-0613",
 		"gpt-4-32k-0314",
 		"gpt-4-turbo",
-		"gpt-4o-2024-05-13",
-		"gpt-4o",
 		"gpt-35-turbo",
 		"gpt-35-turbo-0125",
 		"gpt-35-turbo-1106",
@@ -146,8 +144,6 @@ var (
 		"gpt-4-turbo-2024-04-09",
 		"gpt-4-1106-preview",
 		"gpt-4-turbo-preview",
-		"gpt-4o-2024-05-13",
-		"gpt-4o",
 		"gpt-3.5-turbo",
 		"gpt-3.5-turbo-0125",
 		"gpt-3.5-turbo-1106",
@@ -177,8 +173,6 @@ var (
 		"gpt-4-turbo-2024-04-09",
 		"gpt-4-1106-preview",
 		"gpt-4-turbo-preview",
-		"gpt-4o-2024-05-13",
-		"gpt-4o",
 		"gpt-35-turbo",
 		"gpt-35-turbo-0125",
 		"gpt-35-turbo-1106",
@@ -232,8 +226,6 @@ var (
 		"gpt-4-turbo-2024-04-09",
 		"gpt-4-1106-preview",
 		"gpt-4-turbo-preview",
-		"gpt-4o-2024-05-13",
-		"gpt-4o",
 		"gpt-3.5-turbo",
 		"gpt-3.5-turbo-0125",
 		"gpt-3.5-turbo-1106",

From 7aedf1c6add6e56af532af56efdee446dca7bc21 Mon Sep 17 00:00:00 2001
From: Phuong Le <phuonghoangle@gmail.com>
Date: Thu, 10 Oct 2024 18:31:17 +0100
Subject: [PATCH 18/51] add `gpt-4o-mini` to supported models

---
 internal/manager/route.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/internal/manager/route.go b/internal/manager/route.go
index cb5739e..6a2d07b 100644
--- a/internal/manager/route.go
+++ b/internal/manager/route.go
@@ -103,6 +103,7 @@ var (
 	azureSupportedModels = []string{
 		"gpt-4o-2024-05-13",
 		"gpt-4o",
+		"gpt-4o-mini",
 		"gpt-4-turbo-2024-04-09",
 		"gpt-4-0125-preview",
 		"gpt-4-vision-preview",
@@ -129,6 +130,7 @@ var (
 	openaiSupportedModels = []string{
 		"gpt-4o-2024-05-13",
 		"gpt-4o",
+		"gpt-4o-mini",
 		"gpt-4-turbo-2024-04-09",
 		"gpt-4-0125-preview",
 		"gpt-4-vision-preview",
@@ -158,6 +160,7 @@ var (
 	supportedModels = []string{
 		"gpt-4o-2024-05-13",
 		"gpt-4o",
+		"gpt-4o-mini",
 		"gpt-4-turbo-2024-04-09",
 		"gpt-4-0125-preview",
 		"gpt-4-vision-preview",
@@ -211,6 +214,7 @@ var (
 		"gpt-35-turbo-16k-0613",
 		"gpt-4o-2024-05-13",
 		"gpt-4o",
+		"gpt-4o-mini",
 		"gpt-4-turbo-2024-04-09",
 		"gpt-4-0125-preview",
 		"gpt-4-vision-preview",

From b0be28eaadec7eba86f2f4c3b667b6daee0b06ea Mon Sep 17 00:00:00 2001
From: Phuong Le <phuonghoangle@gmail.com>
Date: Thu, 10 Oct 2024 18:31:48 +0100
Subject: [PATCH 19/51] remove redundant attribute

---
 docker-compose.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 07f9717..a044fa1 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,4 +1,3 @@
-version: '3.8'
 services:
   redis:
     image: redis:6.2-alpine
@@ -38,4 +37,4 @@ volumes:
   redis:
     driver: local
   postgresql:
-    driver: local
\ No newline at end of file
+    driver: local

From 193b3625b8ea2a7f055dca434d57998d81dcf414 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Tue, 15 Oct 2024 08:39:43 -0700
Subject: [PATCH 20/51] update CHANGELOG

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7b21719..4e3a3c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 1.36.4 - 2024-09-16
+### Added
+- Added support for `gpt-4o-mini` in routes
+
 ## 1.36.3 - 2024-09-16
 ### Added
 - Added support for OpenAI o1 models

From 677331419a67f3dea68512f4cae7deb93d68e7ab Mon Sep 17 00:00:00 2001
From: Amirhesam Adibinia <hesamadibi80@gmail.com>
Date: Wed, 16 Oct 2024 17:27:52 +0330
Subject: [PATCH 21/51] chore: update gpt-4o models' prices

---
 internal/provider/openai/cost.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go
index 152d310..dfa9040 100644
--- a/internal/provider/openai/cost.go
+++ b/internal/provider/openai/cost.go
@@ -36,11 +36,11 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
 	"prompt": {
 		"o1-preview":                  0.015,
 		"o1-preview-2024-09-12":       0.015,
-		"gpt-4o":                      0.005,
+		"gpt-4o":                      0.0025,
 		"gpt-4o-mini":                 0.00015,
 		"gpt-4o-mini-2024-07-18":      0.00015,
 		"gpt-4o-2024-05-13":           0.005,
-		"gpt-4o-2024-08-06":           0.005,
+		"gpt-4o-2024-08-06":           0.0025,
 		"gpt-4-1106-preview":          0.01,
 		"gpt-4-turbo-preview":         0.01,
 		"gpt-4-turbo":                 0.01,
@@ -101,11 +101,11 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
 		"o1-preview":                  0.06,
 		"o1-preview-2024-09-12":       0.06,
 		"gpt-3.5-turbo-1106":          0.002,
-		"gpt-4o":                      0.015,
+		"gpt-4o":                      0.01,
 		"gpt-4o-mini":                 0.0006,
 		"gpt-4o-mini-2024-07-18":      0.0006,
 		"gpt-4o-2024-05-13":           0.015,
-		"gpt-4o-2024-08-06":           0.015,
+		"gpt-4o-2024-08-06":           0.01,
 		"gpt-4-turbo-preview":         0.03,
 		"gpt-4-turbo":                 0.03,
 		"gpt-4-turbo-2024-04-09":      0.03,

From 8b3c614f3a91c2b463a58dfd13e097b6fd31a0ea Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Wed, 16 Oct 2024 07:16:01 -0700
Subject: [PATCH 22/51] update CHANGELOG

---
 CHANGELOG.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e3a3c8..4e60250 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,8 @@
-## 1.36.4 - 2024-09-16
+## 1.36.4 - 2024-10-16
+### Changed
+- Updated `gpt-4o` pricing according to OpenAI updates
+
+## 1.36.4 - 2024-10-15
 ### Added
 - Added support for `gpt-4o-mini` in routes
 

From 0c74013d62b700bf33d2d81c72fa9eadf6a1d6db Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Wed, 16 Oct 2024 07:17:44 -0700
Subject: [PATCH 23/51] update CHANGELOG

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e60250..39fe41f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## 1.36.4 - 2024-10-16
+## 1.36.5 - 2024-10-16
 ### Changed
 - Updated `gpt-4o` pricing according to OpenAI updates
 

From d792ed81b8e26601ebc53646ebd51b4055b5105a Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Wed, 23 Oct 2024 18:59:31 -0700
Subject: [PATCH 24/51] add request level timeout

---
 internal/server/web/proxy/anthropic.go        |  11 +-
 internal/server/web/proxy/audio.go            |  12 +-
 .../server/web/proxy/azure_chat_completion.go |   4 +-
 internal/server/web/proxy/azure_completion.go |   4 +-
 internal/server/web/proxy/azure_embedding.go  |   4 +-
 internal/server/web/proxy/bedrock.go          |  12 +-
 internal/server/web/proxy/chat_completion.go  |   4 +-
 internal/server/web/proxy/custom_provider.go  |   4 +-
 internal/server/web/proxy/deepinfra.go        |  12 +-
 internal/server/web/proxy/embedding.go        |   4 +-
 internal/server/web/proxy/proxy.go            | 157 +++++++++---------
 .../server/web/proxy/timeout_middleware.go    |  33 ++++
 internal/server/web/proxy/vector_store.go     |  20 +--
 .../server/web/proxy/vector_store_file.go     |  16 +-
 .../web/proxy/vector_store_file_batch.go      |  16 +-
 internal/server/web/proxy/vllm.go             |   8 +-
 16 files changed, 178 insertions(+), 143 deletions(-)
 create mode 100644 internal/server/web/proxy/timeout_middleware.go

diff --git a/internal/server/web/proxy/anthropic.go b/internal/server/web/proxy/anthropic.go
index 9c4c902..5853ed3 100644
--- a/internal/server/web/proxy/anthropic.go
+++ b/internal/server/web/proxy/anthropic.go
@@ -30,7 +30,7 @@ type anthropicEstimator interface {
 
 func copyHttpHeaders(source *http.Request, dest *http.Request, removeUseAgent bool) {
 	for k := range source.Header {
-		if strings.ToLower(k) != "X-CUSTOM-EVENT-ID" {
+		if strings.ToLower(k) != "x-custom-event-id" {
 			dest.Header.Set(k, source.Header.Get(k))
 		}
 	}
@@ -42,7 +42,7 @@ func copyHttpHeaders(source *http.Request, dest *http.Request, removeUseAgent bo
 	dest.Header.Set("Accept-Encoding", "*")
 }
 
-func getCompletionHandler(prod, private bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getCompletionHandler(prod, private bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_completion_handler.requests", nil, 1)
@@ -52,7 +52,7 @@ func getCompletionHandler(prod, private bool, client http.Client, timeOut time.D
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.anthropic.com/v1/complete", c.Request.Body)
@@ -198,6 +198,7 @@ func getCompletionHandler(prod, private bool, client http.Client, timeOut time.D
 		eventName := ""
 		c.Stream(func(w io.Writer) bool {
 			raw, err := buffer.ReadBytes('\n')
+
 			if err != nil {
 				if err == io.EOF {
 					return false
@@ -296,7 +297,7 @@ var (
 	eventContentBlockStop  = []byte("event: content_block_stop")
 )
 
-func getMessagesHandler(prod, private bool, client http.Client, e anthropicEstimator, timeOut time.Duration) gin.HandlerFunc {
+func getMessagesHandler(prod, private bool, client http.Client, e anthropicEstimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_messages_handler.requests", nil, 1)
@@ -306,7 +307,7 @@ func getMessagesHandler(prod, private bool, client http.Client, e anthropicEstim
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.anthropic.com/v1/messages", c.Request.Body)
diff --git a/internal/server/web/proxy/audio.go b/internal/server/web/proxy/audio.go
index aa50a62..af6e2a9 100644
--- a/internal/server/web/proxy/audio.go
+++ b/internal/server/web/proxy/audio.go
@@ -20,7 +20,7 @@ import (
 	"go.uber.org/zap/zapcore"
 )
 
-func getSpeechHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getSpeechHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_speech_handler.requests", nil, 1)
@@ -30,7 +30,7 @@ func getSpeechHandler(prod bool, client http.Client, timeOut time.Duration) gin.
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, c.Request.Method, "https://api.openai.com/v1/audio/speech", c.Request.Body)
@@ -167,7 +167,7 @@ func getContentType(format string) string {
 	return "text/plain; charset=utf-8"
 }
 
-func getTranscriptionsHandler(prod bool, client http.Client, timeOut time.Duration, e estimator) gin.HandlerFunc {
+func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.requests", nil, 1)
@@ -177,7 +177,7 @@ func getTranscriptionsHandler(prod bool, client http.Client, timeOut time.Durati
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, c.Request.Method, "https://api.openai.com/v1/audio/transcriptions", c.Request.Body)
@@ -331,7 +331,7 @@ func getTranscriptionsHandler(prod bool, client http.Client, timeOut time.Durati
 	}
 }
 
-func getTranslationsHandler(prod bool, client http.Client, timeOut time.Duration, e estimator) gin.HandlerFunc {
+func getTranslationsHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_translations_handler.requests", nil, 1)
@@ -341,7 +341,7 @@ func getTranslationsHandler(prod bool, client http.Client, timeOut time.Duration
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, c.Request.Method, "https://api.openai.com/v1/audio/translations", c.Request.Body)
diff --git a/internal/server/web/proxy/azure_chat_completion.go b/internal/server/web/proxy/azure_chat_completion.go
index 907f122..4b77a8f 100644
--- a/internal/server/web/proxy/azure_chat_completion.go
+++ b/internal/server/web/proxy/azure_chat_completion.go
@@ -30,7 +30,7 @@ func buildAzureUrl(path, deploymentId, apiVersion, resourceName string) string {
 	return fmt.Sprintf("https://%s.openai.azure.com/openai/deployments/%s/embeddings?api-version=%s", resourceName, deploymentId, apiVersion)
 }
 
-func getAzureChatCompletionHandler(prod, private bool, client http.Client, aoe azureEstimator, timeOut time.Duration) gin.HandlerFunc {
+func getAzureChatCompletionHandler(prod, private bool, client http.Client, aoe azureEstimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_azure_chat_completion_handler.requests", nil, 1)
@@ -40,7 +40,7 @@ func getAzureChatCompletionHandler(prod, private bool, client http.Client, aoe a
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, buildAzureUrl(c.FullPath(), c.Param("deployment_id"), c.Query("api-version"), c.GetString("resourceName")), c.Request.Body)
diff --git a/internal/server/web/proxy/azure_completion.go b/internal/server/web/proxy/azure_completion.go
index f1d5906..0a8cb79 100644
--- a/internal/server/web/proxy/azure_completion.go
+++ b/internal/server/web/proxy/azure_completion.go
@@ -66,7 +66,7 @@ func logAzureCompletionsResponse(log *zap.Logger, prod, private bool, cr *goopen
 	}
 }
 
-func getAzureCompletionsHandler(prod, private bool, client http.Client, aoe azureEstimator, timeOut time.Duration) gin.HandlerFunc {
+func getAzureCompletionsHandler(prod, private bool, client http.Client, aoe azureEstimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_azure_completions_handler.requests", nil, 1)
@@ -76,7 +76,7 @@ func getAzureCompletionsHandler(prod, private bool, client http.Client, aoe azur
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, buildAzureUrl(c.FullPath(), c.Param("deployment_id"), c.Query("api-version"), c.GetString("resourceName")), c.Request.Body)
diff --git a/internal/server/web/proxy/azure_embedding.go b/internal/server/web/proxy/azure_embedding.go
index d15675d..f9790d2 100644
--- a/internal/server/web/proxy/azure_embedding.go
+++ b/internal/server/web/proxy/azure_embedding.go
@@ -14,7 +14,7 @@ import (
 	goopenai "github.com/sashabaranov/go-openai"
 )
 
-func getAzureEmbeddingsHandler(prod, private bool, client http.Client, aoe azureEstimator, timeOut time.Duration) gin.HandlerFunc {
+func getAzureEmbeddingsHandler(prod, private bool, client http.Client, aoe azureEstimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_azure_embeddings_handler.requests", nil, 1)
@@ -31,7 +31,7 @@ func getAzureEmbeddingsHandler(prod, private bool, client http.Client, aoe azure
 		// 	return
 		// }
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, c.Request.Method, buildAzureUrl(c.FullPath(), c.Param("deployment_id"), c.Query("api-version"), c.GetString("resourceName")), c.Request.Body)
diff --git a/internal/server/web/proxy/bedrock.go b/internal/server/web/proxy/bedrock.go
index 83ff891..d9dc977 100644
--- a/internal/server/web/proxy/bedrock.go
+++ b/internal/server/web/proxy/bedrock.go
@@ -27,7 +27,7 @@ func setAnthropicVersionIfExists(version string, req *anthropic.BedrockMessageRe
 	}
 }
 
-func getBedrockCompletionHandler(prod bool, e anthropicEstimator, timeOut time.Duration) gin.HandlerFunc {
+func getBedrockCompletionHandler(prod bool, e anthropicEstimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.requests", nil, 1)
@@ -82,7 +82,7 @@ func getBedrockCompletionHandler(prod bool, e anthropicEstimator, timeOut time.D
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 		cfg, err := config.LoadDefaultConfig(ctx,
 			config.WithCredentialsProvider(credentials.StaticCredentialsProvider{
@@ -103,7 +103,7 @@ func getBedrockCompletionHandler(prod bool, e anthropicEstimator, timeOut time.D
 		client := bedrockruntime.NewFromConfig(cfg)
 		stream := c.GetBool("stream")
 
-		ctx, cancel = context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel = context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		start := time.Now()
@@ -282,7 +282,7 @@ func getEventNameFromLine(line []byte) string {
 	return ""
 }
 
-func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Duration) gin.HandlerFunc {
+func getBedrockMessagesHandler(prod bool, e anthropicEstimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.requests", nil, 1)
@@ -335,7 +335,7 @@ func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Dur
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 		cfg, err := config.LoadDefaultConfig(ctx,
 			config.WithCredentialsProvider(credentials.StaticCredentialsProvider{
@@ -356,7 +356,7 @@ func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Dur
 		client := bedrockruntime.NewFromConfig(cfg)
 		stream := c.GetBool("stream")
 
-		ctx, cancel = context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel = context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		start := time.Now()
diff --git a/internal/server/web/proxy/chat_completion.go b/internal/server/web/proxy/chat_completion.go
index b33d7fe..19c6245 100644
--- a/internal/server/web/proxy/chat_completion.go
+++ b/internal/server/web/proxy/chat_completion.go
@@ -17,7 +17,7 @@ import (
 	goopenai "github.com/sashabaranov/go-openai"
 )
 
-func getChatCompletionHandler(prod, private bool, client http.Client, e estimator, timeOut time.Duration) gin.HandlerFunc {
+func getChatCompletionHandler(prod, private bool, client http.Client, e estimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_chat_completion_handler.requests", nil, 1)
@@ -27,7 +27,7 @@ func getChatCompletionHandler(prod, private bool, client http.Client, e estimato
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/chat/completions", c.Request.Body)
diff --git a/internal/server/web/proxy/custom_provider.go b/internal/server/web/proxy/custom_provider.go
index 4a3b5bc..6e1989d 100644
--- a/internal/server/web/proxy/custom_provider.go
+++ b/internal/server/web/proxy/custom_provider.go
@@ -46,7 +46,7 @@ type ErrorResponse struct {
 	Error *Error `json:"error"`
 }
 
-func getCustomProviderHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getCustomProviderHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		tags := []string{
 			fmt.Sprintf("path:%s", c.FullPath()),
@@ -68,7 +68,7 @@ func getCustomProviderHandler(prod bool, client http.Client, timeOut time.Durati
 		}
 
 		logWithCid := util.GetLogFromCtx(c)
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		body, err := io.ReadAll(c.Request.Body)
diff --git a/internal/server/web/proxy/deepinfra.go b/internal/server/web/proxy/deepinfra.go
index d5156c1..8aecc12 100644
--- a/internal/server/web/proxy/deepinfra.go
+++ b/internal/server/web/proxy/deepinfra.go
@@ -17,7 +17,7 @@ import (
 	goopenai "github.com/sashabaranov/go-openai"
 )
 
-func getDeepinfraCompletionsHandler(prod, private bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getDeepinfraCompletionsHandler(prod, private bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_deepinfra_completions_handler.requests", nil, 1)
@@ -26,7 +26,7 @@ func getDeepinfraCompletionsHandler(prod, private bool, client http.Client, time
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.deepinfra.com/v1/openai/completions", c.Request.Body)
@@ -221,7 +221,7 @@ func getDeepinfraCompletionsHandler(prod, private bool, client http.Client, time
 	}
 }
 
-func getDeepinfraChatCompletionsHandler(prod, private bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getDeepinfraChatCompletionsHandler(prod, private bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_deepinfra_chat_completions_handler.requests", nil, 1)
@@ -230,7 +230,7 @@ func getDeepinfraChatCompletionsHandler(prod, private bool, client http.Client,
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.deepinfra.com/v1/openai/chat/completions", c.Request.Body)
@@ -416,7 +416,7 @@ func getDeepinfraChatCompletionsHandler(prod, private bool, client http.Client,
 	}
 }
 
-func getDeepinfraEmbeddingsHandler(prod, private bool, client http.Client, e deepinfraEstimator, timeout time.Duration) gin.HandlerFunc {
+func getDeepinfraEmbeddingsHandler(prod, private bool, client http.Client, e deepinfraEstimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_deepinfra_embeddings_handler.requests", nil, 1)
@@ -425,7 +425,7 @@ func getDeepinfraEmbeddingsHandler(prod, private bool, client http.Client, e dee
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeout)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.deepinfra.com/v1/openai/embeddings", c.Request.Body)
diff --git a/internal/server/web/proxy/embedding.go b/internal/server/web/proxy/embedding.go
index 49c3384..1d689e3 100644
--- a/internal/server/web/proxy/embedding.go
+++ b/internal/server/web/proxy/embedding.go
@@ -30,7 +30,7 @@ type EmbeddingResponseBase64 struct {
 	Usage  goopenai.Usage             `json:"usage"`
 }
 
-func getEmbeddingHandler(prod, private bool, client http.Client, e estimator, timeOut time.Duration) gin.HandlerFunc {
+func getEmbeddingHandler(prod, private bool, client http.Client, e estimator) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_embedding_handler.requests", nil, 1)
@@ -47,7 +47,7 @@ func getEmbeddingHandler(prod, private bool, client http.Client, e estimator, ti
 		// 	return
 		// }
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, c.Request.Method, "https://api.openai.com/v1/embeddings", c.Request.Body)
diff --git a/internal/server/web/proxy/proxy.go b/internal/server/web/proxy/proxy.go
index c7d9641..e6078f4 100644
--- a/internal/server/web/proxy/proxy.go
+++ b/internal/server/web/proxy/proxy.go
@@ -79,12 +79,13 @@ func CorsMiddleware() gin.HandlerFunc {
 	}
 }
 
-func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyManager, rm routeManager, a authenticator, psm ProviderSettingsManager, cpm CustomProvidersManager, ks keyStorage, e estimator, ae anthropicEstimator, aoe azureEstimator, v validator, r recorder, pub publisher, rlm rateLimitManager, timeOut time.Duration, ac accessCache, uac userAccessCache, pm PoliciesManager, scanner Scanner, cd CustomPolicyDetector, die deepinfraEstimator, um userManager, removeAgentHeaders bool) (*ProxyServer, error) {
+func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyManager, rm routeManager, a authenticator, psm ProviderSettingsManager, cpm CustomProvidersManager, ks keyStorage, e estimator, ae anthropicEstimator, aoe azureEstimator, v validator, r recorder, pub publisher, rlm rateLimitManager, timeout time.Duration, ac accessCache, uac userAccessCache, pm PoliciesManager, scanner Scanner, cd CustomPolicyDetector, die deepinfraEstimator, um userManager, removeAgentHeaders bool) (*ProxyServer, error) {
 	router := gin.New()
 	prod := mode == "production"
 	private := privacyMode == "strict"
 
 	router.Use(CorsMiddleware())
+	router.Use(getTimeoutMiddleware(timeout))
 	router.Use(getMiddleware(cpm, rm, pm, a, prod, private, log, pub, "proxy", ac, uac, http.Client{}, scanner, cd, um, removeAgentHeaders))
 
 	client := http.Client{}
@@ -96,128 +97,128 @@ func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyMan
 	router.GET("/api/health", getGetHealthCheckHandler())
 
 	// audios
-	router.POST("/api/providers/openai/v1/audio/speech", getSpeechHandler(prod, client, timeOut))
-	router.POST("/api/providers/openai/v1/audio/transcriptions", getTranscriptionsHandler(prod, client, timeOut, e))
-	router.POST("/api/providers/openai/v1/audio/translations", getTranslationsHandler(prod, client, timeOut, e))
+	router.POST("/api/providers/openai/v1/audio/speech", getSpeechHandler(prod, client))
+	router.POST("/api/providers/openai/v1/audio/transcriptions", getTranscriptionsHandler(prod, client, e))
+	router.POST("/api/providers/openai/v1/audio/translations", getTranslationsHandler(prod, client, e))
 
 	// completions
-	router.POST("/api/providers/openai/v1/chat/completions", getChatCompletionHandler(prod, private, client, e, timeOut))
+	router.POST("/api/providers/openai/v1/chat/completions", getChatCompletionHandler(prod, private, client, e))
 
 	// embeddings
-	router.POST("/api/providers/openai/v1/embeddings", getEmbeddingHandler(prod, private, client, e, timeOut))
+	router.POST("/api/providers/openai/v1/embeddings", getEmbeddingHandler(prod, private, client, e))
 
 	// moderations
-	router.POST("/api/providers/openai/v1/moderations", getPassThroughHandler(prod, private, client, timeOut))
+	router.POST("/api/providers/openai/v1/moderations", getPassThroughHandler(prod, private, client))
 
 	// models
-	router.GET("/api/providers/openai/v1/models", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/models/:model", getPassThroughHandler(prod, private, client, timeOut))
-	router.DELETE("/api/providers/openai/v1/models/:model", getPassThroughHandler(prod, private, client, timeOut))
+	router.GET("/api/providers/openai/v1/models", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/models/:model", getPassThroughHandler(prod, private, client))
+	router.DELETE("/api/providers/openai/v1/models/:model", getPassThroughHandler(prod, private, client))
 
 	// assistants
-	router.POST("/api/providers/openai/v1/assistants", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/assistants/:assistant_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/openai/v1/assistants/:assistant_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.DELETE("/api/providers/openai/v1/assistants/:assistant_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/assistants", getPassThroughHandler(prod, private, client, timeOut))
+	router.POST("/api/providers/openai/v1/assistants", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/assistants/:assistant_id", getPassThroughHandler(prod, private, client))
+	router.POST("/api/providers/openai/v1/assistants/:assistant_id", getPassThroughHandler(prod, private, client))
+	router.DELETE("/api/providers/openai/v1/assistants/:assistant_id", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/assistants", getPassThroughHandler(prod, private, client))
 
 	// assistant files
-	router.POST("/api/providers/openai/v1/assistants/:assistant_id/files", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/assistants/:assistant_id/files/:file_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.DELETE("/api/providers/openai/v1/assistants/:assistant_id/files/:file_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/assistants/:assistant_id/files", getPassThroughHandler(prod, private, client, timeOut))
+	router.POST("/api/providers/openai/v1/assistants/:assistant_id/files", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/assistants/:assistant_id/files/:file_id", getPassThroughHandler(prod, private, client))
+	router.DELETE("/api/providers/openai/v1/assistants/:assistant_id/files/:file_id", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/assistants/:assistant_id/files", getPassThroughHandler(prod, private, client))
 
 	// threads
-	router.POST("/api/providers/openai/v1/threads", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/threads/:thread_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/openai/v1/threads/:thread_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.DELETE("/api/providers/openai/v1/threads/:thread_id", getPassThroughHandler(prod, private, client, timeOut))
+	router.POST("/api/providers/openai/v1/threads", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/threads/:thread_id", getPassThroughHandler(prod, private, client))
+	router.POST("/api/providers/openai/v1/threads/:thread_id", getPassThroughHandler(prod, private, client))
+	router.DELETE("/api/providers/openai/v1/threads/:thread_id", getPassThroughHandler(prod, private, client))
 
 	// messages
-	router.POST("/api/providers/openai/v1/threads/:thread_id/messages", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/threads/:thread_id/messages/:message_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/openai/v1/threads/:thread_id/messages/:message_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/threads/:thread_id/messages", getPassThroughHandler(prod, private, client, timeOut))
+	router.POST("/api/providers/openai/v1/threads/:thread_id/messages", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/threads/:thread_id/messages/:message_id", getPassThroughHandler(prod, private, client))
+	router.POST("/api/providers/openai/v1/threads/:thread_id/messages/:message_id", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/threads/:thread_id/messages", getPassThroughHandler(prod, private, client))
 
 	// message files
-	router.GET("/api/providers/openai/v1/threads/:thread_id/messages/:message_id/files/:file_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/threads/:thread_id/messages/:message_id/files", getPassThroughHandler(prod, private, client, timeOut))
+	router.GET("/api/providers/openai/v1/threads/:thread_id/messages/:message_id/files/:file_id", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/threads/:thread_id/messages/:message_id/files", getPassThroughHandler(prod, private, client))
 
 	// runs
-	router.POST("/api/providers/openai/v1/threads/:thread_id/runs", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/threads/:thread_id/runs/:run_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/openai/v1/threads/:thread_id/runs/:run_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/threads/:thread_id/runs", getPassThroughHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/submit_tool_outputs", getPassThroughHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/cancel", getPassThroughHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/openai/v1/threads/runs", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/steps/:step_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/steps", getPassThroughHandler(prod, private, client, timeOut))
+	router.POST("/api/providers/openai/v1/threads/:thread_id/runs", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/threads/:thread_id/runs/:run_id", getPassThroughHandler(prod, private, client))
+	router.POST("/api/providers/openai/v1/threads/:thread_id/runs/:run_id", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/threads/:thread_id/runs", getPassThroughHandler(prod, private, client))
+	router.POST("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/submit_tool_outputs", getPassThroughHandler(prod, private, client))
+	router.POST("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/cancel", getPassThroughHandler(prod, private, client))
+	router.POST("/api/providers/openai/v1/threads/runs", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/steps/:step_id", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/steps", getPassThroughHandler(prod, private, client))
 
 	// files
-	router.GET("/api/providers/openai/v1/files", getPassThroughHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/openai/v1/files", getPassThroughHandler(prod, private, client, timeOut))
-	router.DELETE("/api/providers/openai/v1/files/:file_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/files/:file_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/files/:file_id/content", getPassThroughHandler(prod, private, client, timeOut))
+	router.GET("/api/providers/openai/v1/files", getPassThroughHandler(prod, private, client))
+	router.POST("/api/providers/openai/v1/files", getPassThroughHandler(prod, private, client))
+	router.DELETE("/api/providers/openai/v1/files/:file_id", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/files/:file_id", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/files/:file_id/content", getPassThroughHandler(prod, private, client))
 
 	// batch
-	router.POST("/api/providers/openai/v1/batches", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/batches/:batch_id", getPassThroughHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/openai/v1/batches/:batch_id/cancel", getPassThroughHandler(prod, private, client, timeOut))
-	router.GET("/api/providers/openai/v1/batches", getPassThroughHandler(prod, private, client, timeOut))
+	router.POST("/api/providers/openai/v1/batches", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/batches/:batch_id", getPassThroughHandler(prod, private, client))
+	router.POST("/api/providers/openai/v1/batches/:batch_id/cancel", getPassThroughHandler(prod, private, client))
+	router.GET("/api/providers/openai/v1/batches", getPassThroughHandler(prod, private, client))
 
 	// images
-	router.POST("/api/providers/openai/v1/images/generations", getPassThroughHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/openai/v1/images/edits", getPassThroughHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/openai/v1/images/variations", getPassThroughHandler(prod, private, client, timeOut))
+	router.POST("/api/providers/openai/v1/images/generations", getPassThroughHandler(prod, private, client))
+	router.POST("/api/providers/openai/v1/images/edits", getPassThroughHandler(prod, private, client))
+	router.POST("/api/providers/openai/v1/images/variations", getPassThroughHandler(prod, private, client))
 
 	// azure
-	router.POST("/api/providers/azure/openai/deployments/:deployment_id/chat/completions", getAzureChatCompletionHandler(prod, private, client, aoe, timeOut))
-	router.POST("/api/providers/azure/openai/deployments/:deployment_id/embeddings", getAzureEmbeddingsHandler(prod, private, client, aoe, timeOut))
-	router.POST("/api/providers/azure/openai/deployments/:deployment_id/completions", getAzureCompletionsHandler(prod, private, client, aoe, timeOut))
+	router.POST("/api/providers/azure/openai/deployments/:deployment_id/chat/completions", getAzureChatCompletionHandler(prod, private, client, aoe))
+	router.POST("/api/providers/azure/openai/deployments/:deployment_id/embeddings", getAzureEmbeddingsHandler(prod, private, client, aoe))
+	router.POST("/api/providers/azure/openai/deployments/:deployment_id/completions", getAzureCompletionsHandler(prod, private, client, aoe))
 
 	// anthropic
-	router.POST("/api/providers/anthropic/v1/complete", getCompletionHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/anthropic/v1/messages", getMessagesHandler(prod, private, client, ae, timeOut))
+	router.POST("/api/providers/anthropic/v1/complete", getCompletionHandler(prod, private, client))
+	router.POST("/api/providers/anthropic/v1/messages", getMessagesHandler(prod, private, client, ae))
 
 	// bedrock anthropic
-	router.POST("/api/providers/bedrock/anthropic/v1/complete", getBedrockCompletionHandler(prod, ae, timeOut))
-	router.POST("/api/providers/bedrock/anthropic/v1/messages", getBedrockMessagesHandler(prod, ae, timeOut))
+	router.POST("/api/providers/bedrock/anthropic/v1/complete", getBedrockCompletionHandler(prod, ae))
+	router.POST("/api/providers/bedrock/anthropic/v1/messages", getBedrockMessagesHandler(prod, ae))
 
 	// vllm
-	router.POST("/api/providers/vllm/v1/chat/completions", getVllmChatCompletionsHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/vllm/v1/completions", getVllmCompletionsHandler(prod, private, client, timeOut))
+	router.POST("/api/providers/vllm/v1/chat/completions", getVllmChatCompletionsHandler(prod, private, client))
+	router.POST("/api/providers/vllm/v1/completions", getVllmCompletionsHandler(prod, private, client))
 
 	// deepinfra
-	router.POST("/api/providers/deepinfra/v1/chat/completions", getDeepinfraChatCompletionsHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/deepinfra/v1/completions", getDeepinfraCompletionsHandler(prod, private, client, timeOut))
-	router.POST("/api/providers/deepinfra/v1/embeddings", getDeepinfraEmbeddingsHandler(prod, private, client, die, timeOut))
+	router.POST("/api/providers/deepinfra/v1/chat/completions", getDeepinfraChatCompletionsHandler(prod, private, client))
+	router.POST("/api/providers/deepinfra/v1/completions", getDeepinfraCompletionsHandler(prod, private, client))
+	router.POST("/api/providers/deepinfra/v1/embeddings", getDeepinfraEmbeddingsHandler(prod, private, client, die))
 
 	// custom provider
-	router.POST("/api/custom/providers/:provider/*wildcard", getCustomProviderHandler(prod, client, timeOut))
+	router.POST("/api/custom/providers/:provider/*wildcard", getCustomProviderHandler(prod, client))
 
 	// custom route
 	router.POST("/api/routes/*route", getRouteHandler(prod, c, aoe, e, client, r))
 
 	// vector store
-	router.POST("/api/providers/openai/v1/vector_stores", getCreateVectorStoreHandler(prod, client, timeOut))
-	router.GET("/api/providers/openai/v1/vector_stores", getListVectorStoresHandler(prod, client, timeOut))
-	router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id", getGetVectorStoreHandler(prod, client, timeOut))
-	router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id", getModifyVectorStoreHandler(prod, client, timeOut))
-	router.DELETE("/api/providers/openai/v1/vector_stores/:vector_store_id", getDeleteVectorStoreHandler(prod, client, timeOut))
+	router.POST("/api/providers/openai/v1/vector_stores", getCreateVectorStoreHandler(prod, client))
+	router.GET("/api/providers/openai/v1/vector_stores", getListVectorStoresHandler(prod, client))
+	router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id", getGetVectorStoreHandler(prod, client))
+	router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id", getModifyVectorStoreHandler(prod, client))
+	router.DELETE("/api/providers/openai/v1/vector_stores/:vector_store_id", getDeleteVectorStoreHandler(prod, client))
 
 	// vector store files
-	router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/files", getCreateVectorStoreFileHandler(prod, client, timeOut))
-	router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/files", getListVectorStoreFilesHandler(prod, client, timeOut))
-	router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/files/:file_id", getGetVectorStoreFileHandler(prod, client, timeOut))
-	router.DELETE("/api/providers/openai/v1/vector_stores/:vector_store_id/files/:file_id", getDeleteVectorStoreFileHandler(prod, client, timeOut))
+	router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/files", getCreateVectorStoreFileHandler(prod, client))
+	router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/files", getListVectorStoreFilesHandler(prod, client))
+	router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/files/:file_id", getGetVectorStoreFileHandler(prod, client))
+	router.DELETE("/api/providers/openai/v1/vector_stores/:vector_store_id/files/:file_id", getDeleteVectorStoreFileHandler(prod, client))
 
 	// vector store file batches
-	router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches", getCreateVectorStoreFileBatchHandler(prod, client, timeOut))
-	router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id", getGetVectorStoreFileBatchHandler(prod, client, timeOut))
-	router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/cancel", getCancelVectorStoreFileBatchHandler(prod, client, timeOut))
-	router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/files", getListVectorStoreFileBatchFilesHandler(prod, client, timeOut))
+	router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches", getCreateVectorStoreFileBatchHandler(prod, client))
+	router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id", getGetVectorStoreFileBatchHandler(prod, client))
+	router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/cancel", getCancelVectorStoreFileBatchHandler(prod, client))
+	router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/files", getListVectorStoreFileBatchFilesHandler(prod, client))
 
 	srv := &http.Server{
 		Addr:    ":8002",
@@ -278,7 +279,7 @@ func writeFieldToBuffer(fields []string, c *gin.Context, writer *multipart.Write
 	return nil
 }
 
-func getPassThroughHandler(prod, private bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getPassThroughHandler(prod, private bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 
@@ -293,7 +294,7 @@ func getPassThroughHandler(prod, private bool, client http.Client, timeOut time.
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		targetUrl, err := buildProxyUrl(c)
diff --git a/internal/server/web/proxy/timeout_middleware.go b/internal/server/web/proxy/timeout_middleware.go
new file mode 100644
index 0000000..b588146
--- /dev/null
+++ b/internal/server/web/proxy/timeout_middleware.go
@@ -0,0 +1,33 @@
+package proxy
+
+import (
+	"net/http"
+	"time"
+
+	"github.com/gin-gonic/gin"
+)
+
+func getTimeoutMiddleware(timeout time.Duration) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		if c == nil || c.Request == nil {
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] request is empty")
+			c.Abort()
+			return
+		}
+
+		timeoutHeader := c.GetHeader("x-request-timeout")
+		parsedTimeout := timeout
+		if len(timeoutHeader) != 0 {
+			parsed, err := time.ParseDuration(timeoutHeader)
+			if err != nil {
+				JSON(c, http.StatusBadRequest, "[BricksLLM] invalid timeout")
+				c.Abort()
+				return
+			}
+
+			parsedTimeout = parsed
+		}
+
+		c.Set("requestTimeout", parsedTimeout)
+	}
+}
diff --git a/internal/server/web/proxy/vector_store.go b/internal/server/web/proxy/vector_store.go
index 02b25e6..423a821 100644
--- a/internal/server/web/proxy/vector_store.go
+++ b/internal/server/web/proxy/vector_store.go
@@ -13,7 +13,7 @@ import (
 	goopenai "github.com/sashabaranov/go-openai"
 )
 
-func getCreateVectorStoreHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getCreateVectorStoreHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_create_vector_store_handler.requests", nil, 1)
@@ -23,7 +23,7 @@ func getCreateVectorStoreHandler(prod bool, client http.Client, timeOut time.Dur
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores", c.Request.Body)
@@ -94,7 +94,7 @@ func getCreateVectorStoreHandler(prod bool, client http.Client, timeOut time.Dur
 	}
 }
 
-func getListVectorStoresHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getListVectorStoresHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_list_vector_stores_handler.requests", nil, 1)
@@ -104,7 +104,7 @@ func getListVectorStoresHandler(prod bool, client http.Client, timeOut time.Dura
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores", c.Request.Body)
@@ -175,7 +175,7 @@ func getListVectorStoresHandler(prod bool, client http.Client, timeOut time.Dura
 	}
 }
 
-func getGetVectorStoreHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getGetVectorStoreHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_get_vector_store_handler.requests", nil, 1)
@@ -185,7 +185,7 @@ func getGetVectorStoreHandler(prod bool, client http.Client, timeOut time.Durati
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id"), c.Request.Body)
@@ -256,7 +256,7 @@ func getGetVectorStoreHandler(prod bool, client http.Client, timeOut time.Durati
 	}
 }
 
-func getModifyVectorStoreHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getModifyVectorStoreHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_modify_vector_store_handler.requests", nil, 1)
@@ -266,7 +266,7 @@ func getModifyVectorStoreHandler(prod bool, client http.Client, timeOut time.Dur
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id"), c.Request.Body)
@@ -337,7 +337,7 @@ func getModifyVectorStoreHandler(prod bool, client http.Client, timeOut time.Dur
 	}
 }
 
-func getDeleteVectorStoreHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getDeleteVectorStoreHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_delete_vector_store_handler.requests", nil, 1)
@@ -347,7 +347,7 @@ func getDeleteVectorStoreHandler(prod bool, client http.Client, timeOut time.Dur
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodDelete, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id"), c.Request.Body)
diff --git a/internal/server/web/proxy/vector_store_file.go b/internal/server/web/proxy/vector_store_file.go
index 2ef5206..91282c6 100644
--- a/internal/server/web/proxy/vector_store_file.go
+++ b/internal/server/web/proxy/vector_store_file.go
@@ -13,7 +13,7 @@ import (
 	goopenai "github.com/sashabaranov/go-openai"
 )
 
-func getCreateVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getCreateVectorStoreFileHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_create_vector_store_file_handler.requests", nil, 1)
@@ -23,7 +23,7 @@ func getCreateVectorStoreFileHandler(prod bool, client http.Client, timeOut time
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/files", c.Request.Body)
@@ -94,7 +94,7 @@ func getCreateVectorStoreFileHandler(prod bool, client http.Client, timeOut time
 	}
 }
 
-func getListVectorStoreFilesHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getListVectorStoreFilesHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_list_vector_store_files_handler.requests", nil, 1)
@@ -104,7 +104,7 @@ func getListVectorStoreFilesHandler(prod bool, client http.Client, timeOut time.
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/files", c.Request.Body)
@@ -175,7 +175,7 @@ func getListVectorStoreFilesHandler(prod bool, client http.Client, timeOut time.
 	}
 }
 
-func getGetVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getGetVectorStoreFileHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_get_vector_store_file_handler.requests", nil, 1)
@@ -185,7 +185,7 @@ func getGetVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Du
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/files/"+c.Param("file_id"), c.Request.Body)
@@ -256,7 +256,7 @@ func getGetVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Du
 	}
 }
 
-func getDeleteVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getDeleteVectorStoreFileHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_delete_vector_store_file_handler.requests", nil, 1)
@@ -266,7 +266,7 @@ func getDeleteVectorStoreFileHandler(prod bool, client http.Client, timeOut time
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodDelete, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/files/"+c.Param("file_id"), c.Request.Body)
diff --git a/internal/server/web/proxy/vector_store_file_batch.go b/internal/server/web/proxy/vector_store_file_batch.go
index 12fce40..16e80eb 100644
--- a/internal/server/web/proxy/vector_store_file_batch.go
+++ b/internal/server/web/proxy/vector_store_file_batch.go
@@ -13,7 +13,7 @@ import (
 	goopenai "github.com/sashabaranov/go-openai"
 )
 
-func getCreateVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getCreateVectorStoreFileBatchHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_create_vector_store_file_batch_handler.requests", nil, 1)
@@ -23,7 +23,7 @@ func getCreateVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/file_batches", c.Request.Body)
@@ -94,7 +94,7 @@ func getCreateVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut
 	}
 }
 
-func getGetVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getGetVectorStoreFileBatchHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_get_vector_store_file_batch_handler.requests", nil, 1)
@@ -104,7 +104,7 @@ func getGetVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut ti
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/file_batches/"+c.Param("batch_id"), c.Request.Body)
@@ -175,7 +175,7 @@ func getGetVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut ti
 	}
 }
 
-func getCancelVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getCancelVectorStoreFileBatchHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_cancel_vector_store_file_batch_handler.requests", nil, 1)
@@ -185,7 +185,7 @@ func getCancelVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/file_batches/"+c.Param("batch_id")+"/cancel", c.Request.Body)
@@ -256,7 +256,7 @@ func getCancelVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut
 	}
 }
 
-func getListVectorStoreFileBatchFilesHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getListVectorStoreFileBatchFilesHandler(prod bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_list_vector_store_file_batch_files_handler.requests", nil, 1)
@@ -266,7 +266,7 @@ func getListVectorStoreFileBatchFilesHandler(prod bool, client http.Client, time
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/file_batches/"+c.Param("batch_id")+"/files", c.Request.Body)
diff --git a/internal/server/web/proxy/vllm.go b/internal/server/web/proxy/vllm.go
index 5415913..ec9ef93 100644
--- a/internal/server/web/proxy/vllm.go
+++ b/internal/server/web/proxy/vllm.go
@@ -20,7 +20,7 @@ import (
 	"go.uber.org/zap/zapcore"
 )
 
-func getVllmCompletionsHandler(prod, private bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getVllmCompletionsHandler(prod, private bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_vllm_completions_handler.requests", nil, 1)
@@ -36,7 +36,7 @@ func getVllmCompletionsHandler(prod, private bool, client http.Client, timeOut t
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, url+"/v1/completions", c.Request.Body)
@@ -368,7 +368,7 @@ func logVllmCompletionResponse(log *zap.Logger, cr *goopenai.CompletionResponse,
 	}
 }
 
-func getVllmChatCompletionsHandler(prod, private bool, client http.Client, timeOut time.Duration) gin.HandlerFunc {
+func getVllmChatCompletionsHandler(prod, private bool, client http.Client) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		log := util.GetLogFromCtx(c)
 		telemetry.Incr("bricksllm.proxy.get_vllm_chat_completions_handler.requests", nil, 1)
@@ -384,7 +384,7 @@ func getVllmChatCompletionsHandler(prod, private bool, client http.Client, timeO
 			return
 		}
 
-		ctx, cancel := context.WithTimeout(context.Background(), timeOut)
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
 		defer cancel()
 
 		req, err := http.NewRequestWithContext(ctx, http.MethodPost, url+"/v1/chat/completions", c.Request.Body)

From ca59ca699f21ecb3b7a44910cb861c4fd1b6847e Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Wed, 23 Oct 2024 19:07:26 -0700
Subject: [PATCH 25/51] update doc

---
 CHANGELOG.md    |   4 +
 docs/proxy.yaml | 305 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 309 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 39fe41f..cd6f57c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 1.37.0 - 2024-10-23
+### Added
+- Added request level timeout with HTTP header `x-request-timeout`
+
 ## 1.36.5 - 2024-10-16
 ### Changed
 - Updated `gpt-4o` pricing according to OpenAI updates
diff --git a/docs/proxy.yaml b/docs/proxy.yaml
index a0613ac..2e6c492 100644
--- a/docs/proxy.yaml
+++ b/docs/proxy.yaml
@@ -50,6 +50,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: OpenAI Chat Completions
@@ -68,6 +73,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Call OpenAI embeddings
@@ -86,6 +96,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Call OpenAI moderations
@@ -99,6 +114,11 @@ paths:
           schema:
             type: string
           description: Custom Id that can be used to retrieve an event associated with each proxy request.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Get OpenAI models
@@ -121,6 +141,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: model
           required: true
@@ -141,6 +166,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: List files
@@ -158,6 +188,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Upload a file
@@ -180,6 +215,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: file_id
           required: true
@@ -198,6 +238,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: file_id
           required: true
@@ -222,6 +267,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: file_id
           required: true
@@ -245,6 +295,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Create a batch
@@ -262,6 +317,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: List batches
@@ -284,6 +344,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: batch_id
           required: true
@@ -302,6 +367,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Cancel a batch
@@ -320,6 +390,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Generate images
@@ -338,6 +413,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Edit images
@@ -356,6 +436,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Create image variations
@@ -374,6 +459,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Create speech
@@ -392,6 +482,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Create transcriptions
@@ -410,6 +505,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Create translations
@@ -428,6 +528,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Create assistant
@@ -445,6 +550,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: List assistants
@@ -467,6 +577,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: assistant_id
           required: true
@@ -490,6 +605,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Modify assistant
@@ -512,6 +632,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Delete assistant
@@ -535,6 +660,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Create assistant file
@@ -557,6 +687,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: List assistant files
@@ -575,6 +710,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: assistant_id
           required: true
@@ -612,6 +752,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Delete assistant file
@@ -630,6 +775,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Create thread
@@ -652,6 +802,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: thread_id
           required: true
@@ -670,6 +825,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: thread_id
           required: true
@@ -692,6 +852,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Delete thread
@@ -710,6 +875,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: thread_id
           required: true
@@ -732,6 +902,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: thread_id
           required: true
@@ -760,6 +935,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: message_id
           required: true
@@ -782,6 +962,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: message_id
           required: true
@@ -814,6 +999,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: file_id
           required: true
@@ -853,6 +1043,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: List message files
@@ -871,6 +1066,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: thread_id
           required: true
@@ -892,6 +1092,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: thread_id
           required: true
@@ -920,6 +1125,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: run_id
           required: true
@@ -954,6 +1164,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Modify run
@@ -972,6 +1187,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: thread_id
           required: true
@@ -1000,6 +1220,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - OpenAI
       summary: Create thread and run
@@ -1022,6 +1247,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: step_id
           required: true
@@ -1052,6 +1282,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: thread_id
           required: true
@@ -1084,6 +1319,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: deployment_id
           required: true
@@ -1114,6 +1354,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: deployment_id
           required: true
@@ -1143,6 +1388,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: deployment_id
           required: true
@@ -1169,6 +1419,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: header
           name: Content-Type
           schema:
@@ -1197,6 +1452,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: header
           name: Content-Type
           schema:
@@ -1225,6 +1485,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - Bedrock
       summary: Create Bedrock Anthropic completion
@@ -1238,6 +1503,11 @@ paths:
           schema:
             type: string
           description: Custom Id that can be used to retrieve an event associated with each proxy request.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: header
           name: Content-Type
           schema:
@@ -1261,6 +1531,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - vLLM
       summary: Create chat completions
@@ -1279,6 +1554,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - vLLM
       summary: Create completions
@@ -1297,6 +1577,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - DeepInfra
       summary: Create chat completions
@@ -1315,6 +1600,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - DeepInfra
       summary: Create completions
@@ -1333,6 +1623,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - DeepInfra
       summary: Create embeddings
@@ -1351,6 +1646,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
         - in: path
           name: provider
           required: true
@@ -1375,6 +1675,11 @@ paths:
           schema:
             type: string
           description: Metadata in stringified JSON format.
+        - in: header
+          name: X-REQUEST-TIMEOUT
+          schema:
+            type: string
+          description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc.
       tags:
         - Route
       summary: Call a route

From 768ab1debfceaa4655431be150dd12a2cf52693f Mon Sep 17 00:00:00 2001
From: Lei Lei <lei.lei@shanda.com>
Date: Wed, 30 Oct 2024 12:50:21 +0800
Subject: [PATCH 26/51] add support for AWS elastic cache.

---
 cmd/bricksllm/main.go     | 92 +++++++++++++--------------------------
 internal/config/config.go |  2 +
 2 files changed, 32 insertions(+), 62 deletions(-)

diff --git a/cmd/bricksllm/main.go b/cmd/bricksllm/main.go
index e88022f..84c1170 100644
--- a/cmd/bricksllm/main.go
+++ b/cmd/bricksllm/main.go
@@ -2,6 +2,7 @@ package main
 
 import (
 	"context"
+	"crypto/tls"
 	"flag"
 	"fmt"
 	"os"
@@ -173,22 +174,25 @@ func main() {
 	}
 	rMemStore.Listen()
 
-	rateLimitRedisCache := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
-		Password: cfg.RedisPassword,
-		DB:       0,
-	})
+	defaultRedisOption := func(cfg *config.Config, dbIndex int) *redis.Options {
+		return &redis.Options{
+			Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
+			Password: cfg.RedisPassword,
+			DB:       cfg.RedisDBStartIndex + dbIndex,
+			TLSConfig: &tls.Config{
+				InsecureSkipVerify: cfg.RedisInsecureSkipVerify,
+			},
+		}
+	}
+
+	rateLimitRedisCache := redis.NewClient(defaultRedisOption(cfg, 0))
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()
 	if err := rateLimitRedisCache.Ping(ctx).Err(); err != nil {
 		log.Sugar().Fatalf("error connecting to rate limit redis cache: %v", err)
 	}
 
-	costLimitRedisCache := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
-		Password: cfg.RedisPassword,
-		DB:       1,
-	})
+	costLimitRedisCache := redis.NewClient(defaultRedisOption(cfg, 1))
 
 	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()
@@ -196,11 +200,7 @@ func main() {
 		log.Sugar().Fatalf("error connecting to cost limit redis cache: %v", err)
 	}
 
-	costRedisStorage := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
-		Password: cfg.RedisPassword,
-		DB:       2,
-	})
+	costRedisStorage := redis.NewClient(defaultRedisOption(cfg, 2))
 
 	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()
@@ -208,11 +208,7 @@ func main() {
 		log.Sugar().Fatalf("error connecting to cost limit redis storage: %v", err)
 	}
 
-	apiRedisCache := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
-		Password: cfg.RedisPassword,
-		DB:       3,
-	})
+	apiRedisCache := redis.NewClient(defaultRedisOption(cfg, 3))
 
 	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()
@@ -220,87 +216,59 @@ func main() {
 		log.Sugar().Fatalf("error connecting to api redis cache: %v", err)
 	}
 
-	accessRedisCache := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
-		Password: cfg.RedisPassword,
-		DB:       4,
-	})
+	accessRedisCache := redis.NewClient(defaultRedisOption(cfg, 4))
 
 	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()
-	if err := apiRedisCache.Ping(ctx).Err(); err != nil {
+	if err := accessRedisCache.Ping(ctx).Err(); err != nil {
 		log.Sugar().Fatalf("error connecting to api redis cache: %v", err)
 	}
 
-	userRateLimitRedisCache := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
-		Password: cfg.RedisPassword,
-		DB:       5,
-	})
+	userRateLimitRedisCache := redis.NewClient(defaultRedisOption(cfg, 5))
 
 	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()
-	if err := rateLimitRedisCache.Ping(ctx).Err(); err != nil {
+	if err := userRateLimitRedisCache.Ping(ctx).Err(); err != nil {
 		log.Sugar().Fatalf("error connecting to user rate limit redis cache: %v", err)
 	}
 
-	userCostLimitRedisCache := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
-		Password: cfg.RedisPassword,
-		DB:       6,
-	})
+	userCostLimitRedisCache := redis.NewClient(defaultRedisOption(cfg, 6))
 
 	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()
-	if err := costLimitRedisCache.Ping(ctx).Err(); err != nil {
+	if err := userCostLimitRedisCache.Ping(ctx).Err(); err != nil {
 		log.Sugar().Fatalf("error connecting to user cost limit redis cache: %v", err)
 	}
 
-	userCostRedisStorage := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
-		Password: cfg.RedisPassword,
-		DB:       7,
-	})
+	userCostRedisStorage := redis.NewClient(defaultRedisOption(cfg, 7))
 
 	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()
-	if err := apiRedisCache.Ping(ctx).Err(); err != nil {
+	if err := userCostRedisStorage.Ping(ctx).Err(); err != nil {
 		log.Sugar().Fatalf("error connecting to user cost redis cache: %v", err)
 	}
 
-	userAccessRedisCache := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
-		Password: cfg.RedisPassword,
-		DB:       8,
-	})
+	userAccessRedisCache := redis.NewClient(defaultRedisOption(cfg, 8))
 
 	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()
-	if err := costRedisStorage.Ping(ctx).Err(); err != nil {
+	if err := userAccessRedisCache.Ping(ctx).Err(); err != nil {
 		log.Sugar().Fatalf("error connecting to user access redis storage: %v", err)
 	}
 
-	providerSettingsRedisCache := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
-		Password: cfg.RedisPassword,
-		DB:       9,
-	})
+	providerSettingsRedisCache := redis.NewClient(defaultRedisOption(cfg, 9))
 
 	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()
-	if err := costRedisStorage.Ping(ctx).Err(); err != nil {
+	if err := providerSettingsRedisCache.Ping(ctx).Err(); err != nil {
 		log.Sugar().Fatalf("error connecting to provider settings redis storage: %v", err)
 	}
 
-	keysRedisCache := redis.NewClient(&redis.Options{
-		Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
-		Password: cfg.RedisPassword,
-		DB:       10,
-	})
+	keysRedisCache := redis.NewClient(defaultRedisOption(cfg, 10))
 
 	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()
-	if err := costRedisStorage.Ping(ctx).Err(); err != nil {
+	if err := keysRedisCache.Ping(ctx).Err(); err != nil {
 		log.Sugar().Fatalf("error connecting to keys redis storage: %v", err)
 	}
 
diff --git a/internal/config/config.go b/internal/config/config.go
index 2f2fc8c..fdc3028 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -25,6 +25,8 @@ type Config struct {
 	RedisPort                     string        `koanf:"redis_port" env:"REDIS_PORT" envDefault:"6379"`
 	RedisUsername                 string        `koanf:"redis_username" env:"REDIS_USERNAME"`
 	RedisPassword                 string        `koanf:"redis_password" env:"REDIS_PASSWORD"`
+	RedisInsecureSkipVerify       bool          `koanf:"redis_insecure_skip_verify" env:"REDIS_INSECURE_SKIP_VERIFY" envDefault:"false"`
+	RedisDBStartIndex             int           `koanf:"redis_db_start_index" env:"REDIS_DB_START_INDEX" envDefault:"0"`
 	RedisReadTimeout              time.Duration `koanf:"redis_read_time_out" env:"REDIS_READ_TIME_OUT" envDefault:"1s"`
 	RedisWriteTimeout             time.Duration `koanf:"redis_write_time_out" env:"REDIS_WRITE_TIME_OUT" envDefault:"500ms"`
 	PostgresqlReadTimeout         time.Duration `koanf:"postgresql_read_time_out" env:"POSTGRESQL_READ_TIME_OUT" envDefault:"10m"`

From 3009774abea1e18870788759dfedbb07aec6b1db Mon Sep 17 00:00:00 2001
From: Lei Lei <lei.lei@shanda.com>
Date: Wed, 30 Oct 2024 12:52:14 +0800
Subject: [PATCH 27/51] run `go mod tidy`.

---
 go.mod | 11 ++++-------
 go.sum | 15 ---------------
 2 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/go.mod b/go.mod
index b099bce..bef3be1 100644
--- a/go.mod
+++ b/go.mod
@@ -6,6 +6,7 @@ require (
 	github.com/DataDog/datadog-go/v5 v5.3.0
 	github.com/asticode/go-astisub v0.26.2
 	github.com/aws/aws-sdk-go-v2/config v1.27.7
+	github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.16.2
 	github.com/aws/aws-sdk-go-v2/service/comprehend v1.31.2
 	github.com/caarlos0/env v3.5.0+incompatible
 	github.com/cenkalti/backoff/v4 v4.3.0
@@ -25,29 +26,25 @@ require (
 
 require (
 	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 // indirect
-	github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.16.2 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/fsnotify/fsnotify v1.6.0 // indirect
 	github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 // indirect
-	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/knadh/koanf/maps v0.1.1 // indirect
-	github.com/kr/pretty v0.3.1 // indirect
+	github.com/kr/text v0.2.0 // indirect
 	github.com/mitchellh/copystructure v1.2.0 // indirect
 	github.com/mitchellh/reflectwalk v1.0.2 // indirect
 	github.com/prometheus/client_model v0.5.0 // indirect
 	github.com/prometheus/common v0.48.0 // indirect
 	github.com/prometheus/procfs v0.12.0 // indirect
-	github.com/rogpeppe/go-internal v1.10.0 // indirect
 	go.uber.org/atomic v1.7.0 // indirect
-	gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
 )
 
 require (
 	github.com/Microsoft/go-winio v0.5.0 // indirect
 	github.com/asticode/go-astikit v0.20.0 // indirect
 	github.com/asticode/go-astits v1.8.0 // indirect
-	github.com/aws/aws-sdk-go-v2 v1.30.5 // indirect
-	github.com/aws/aws-sdk-go-v2/credentials v1.17.7 // indirect
+	github.com/aws/aws-sdk-go-v2 v1.30.5
+	github.com/aws/aws-sdk-go-v2/credentials v1.17.7
 	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.3 // indirect
 	github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 // indirect
 	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 // indirect
diff --git a/go.sum b/go.sum
index aaf0e9b..716e361 100644
--- a/go.sum
+++ b/go.sum
@@ -8,8 +8,6 @@ github.com/asticode/go-astisub v0.26.2 h1:cdEXcm+SUSmYCEPTQYbbfCECnmQoIFfH6pF8wD
 github.com/asticode/go-astisub v0.26.2/go.mod h1:WTkuSzFB+Bp7wezuSf2Oxulj5A8zu2zLRVFf6bIFQK8=
 github.com/asticode/go-astits v1.8.0 h1:rf6aiiGn/QhlFjNON1n5plqF3Fs025XLUwiQ0NB6oZg=
 github.com/asticode/go-astits v1.8.0/go.mod h1:DkOWmBNQpnr9mv24KfZjq4JawCFX1FCqjLVGvO0DygQ=
-github.com/aws/aws-sdk-go-v2 v1.25.3 h1:xYiLpZTQs1mzvz5PaI6uR0Wh57ippuEthxS4iK5v0n0=
-github.com/aws/aws-sdk-go-v2 v1.25.3/go.mod h1:35hUlJVYd+M++iLI3ALmVwMOyRYMmRqUXpTtRGW+K9I=
 github.com/aws/aws-sdk-go-v2 v1.30.5 h1:mWSRTwQAb0aLE17dSzztCVJWI9+cRMgqebndjwDyK0g=
 github.com/aws/aws-sdk-go-v2 v1.30.5/go.mod h1:CT+ZPWXbYrci8chcARI3OmI/qgd+f6WtuLOoaIA8PR0=
 github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 h1:70PVAiL15/aBMh5LThwgXdSQorVr91L127ttckI9QQU=
@@ -20,12 +18,8 @@ github.com/aws/aws-sdk-go-v2/credentials v1.17.7 h1:WJd+ubWKoBeRh7A5iNMnxEOs982S
 github.com/aws/aws-sdk-go-v2/credentials v1.17.7/go.mod h1:UQi7LMR0Vhvs+44w5ec8Q+VS+cd10cjwgHwiVkE0YGU=
 github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.3 h1:p+y7FvkK2dxS+FEwRIDHDe//ZX+jDhP8HHE50ppj4iI=
 github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.3/go.mod h1:/fYB+FZbDlwlAiynK9KDXlzZl3ANI9JkD0Uhz5FjNT4=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.3 h1:ifbIbHZyGl1alsAhPIYsHOg5MuApgqOvVeI8wIugXfs=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.3/go.mod h1:oQZXg3c6SNeY6OZrDY+xHcF4VGIEoNotX2B4PrDeoJI=
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 h1:pI7Bzt0BJtYA0N/JEC6B8fJ4RBrEMi1LBrkMdFYNSnQ=
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17/go.mod h1:Dh5zzJYMtxfIjYW+/evjQ8uj2OyR/ve2KROHGHlSFqE=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.3 h1:Qvodo9gHG9F3E8SfYOspPeBt0bjSbsevK8WhRAUHcoY=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.3/go.mod h1:vCKrdLXtybdf/uQd/YfVR2r5pcbNuEYKzMQpcxmeSJw=
 github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 h1:Mqr/V5gvrhA2gvgnF42Zh5iMiQNcOYthFYwCyrnuWlc=
 github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17/go.mod h1:aLJpZlCmjE+V+KtN1q1uyZkfnUWpQGpbsn89XPKyzfU=
 github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 h1:hT8rVHwugYE2lEfdFE0QWVo81lF7jMrYJVDWI+f+VxU=
@@ -44,8 +38,6 @@ github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.2 h1:pi0Skl6mNl2w8qWZXcdOyg19
 github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.2/go.mod h1:JYzLoEVeLXk+L4tn1+rrkfhkxl6mLDEVaDSvGq9og90=
 github.com/aws/aws-sdk-go-v2/service/sts v1.28.4 h1:Ppup1nVNAOWbBOrcoOxaxPeEnSFB2RnnQdguhXpmeQk=
 github.com/aws/aws-sdk-go-v2/service/sts v1.28.4/go.mod h1:+K1rNPVyGxkRuv9NNiaZ4YhBFuyw2MMA9SlIJ1Zlpz8=
-github.com/aws/smithy-go v1.20.1 h1:4SZlSlMr36UEqC7XOyRVb27XMeZubNcBNN+9IgEPIQw=
-github.com/aws/smithy-go v1.20.1/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E=
 github.com/aws/smithy-go v1.20.4 h1:2HK1zBdPgRbjFOHlfeQZfpC4r72MOb9bZkiFwggKO+4=
 github.com/aws/smithy-go v1.20.4/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg=
 github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
@@ -119,11 +111,8 @@ github.com/knadh/koanf/providers/file v0.1.0 h1:fs6U7nrV58d3CFAFh8VTde8TM262ObYf
 github.com/knadh/koanf/providers/file v0.1.0/go.mod h1:rjJ/nHQl64iYCtAW2QQnF0eSmDEX/YZ/eNFj5yR6BvA=
 github.com/knadh/koanf/v2 v2.1.1 h1:/R8eXqasSTsmDCsAyYj+81Wteg8AqrV9CP6gvsTsOmM=
 github.com/knadh/koanf/v2 v2.1.1/go.mod h1:4mnTRbZCK+ALuBXHZMjDfG9y714L7TykVnZkXbMU3Es=
-github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
-github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
@@ -146,7 +135,6 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
 github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
-github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/profile v1.4.0/go.mod h1:NWz/XGvpEW1FyYQ7fCx4dqYBLlfTcE+A9FLAkNKqjFE=
@@ -164,11 +152,8 @@ github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k
 github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
 github.com/redis/go-redis/v9 v9.0.5 h1:CuQcn5HIEeK7BgElubPP8CGtE0KakrnbBSTLjathl5o=
 github.com/redis/go-redis/v9 v9.0.5/go.mod h1:WqMKv5vnQbRuZstUwxQI195wHy+t4PuXDOjzMvcuQHk=
-github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
 github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
 github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
-github.com/sashabaranov/go-openai v1.24.0 h1:4H4Pg8Bl2RH/YSnU8DYumZbuHnnkfioor/dtNlB20D4=
-github.com/sashabaranov/go-openai v1.24.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/sashabaranov/go-openai v1.26.3 h1:Tjnh4rcvsSU68f66r05mys+Zou4vo4qyvkne6AIRJPI=
 github.com/sashabaranov/go-openai v1.26.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=

From 8e07ef4fdcc769e57507545bd0fe53856627e8f3 Mon Sep 17 00:00:00 2001
From: Lei Lei <lei.lei@shanda.com>
Date: Wed, 30 Oct 2024 13:40:06 +0800
Subject: [PATCH 28/51] add gpt-4o latest model.

---
 internal/manager/route.go       | 4 ++++
 internal/provider/azure/cost.go | 8 ++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/internal/manager/route.go b/internal/manager/route.go
index 6a2d07b..904f776 100644
--- a/internal/manager/route.go
+++ b/internal/manager/route.go
@@ -101,6 +101,7 @@ func checkModelValidity(provider, model string) bool {
 
 var (
 	azureSupportedModels = []string{
+		"gpt-4o-2024-08-26",
 		"gpt-4o-2024-05-13",
 		"gpt-4o",
 		"gpt-4o-mini",
@@ -128,6 +129,7 @@ var (
 	}
 
 	openaiSupportedModels = []string{
+		"gpt-4o-2024-08-16",
 		"gpt-4o-2024-05-13",
 		"gpt-4o",
 		"gpt-4o-mini",
@@ -158,6 +160,7 @@ var (
 	}
 
 	supportedModels = []string{
+		"gpt-4o-2024-08-16",
 		"gpt-4o-2024-05-13",
 		"gpt-4o",
 		"gpt-4o-mini",
@@ -212,6 +215,7 @@ var (
 		"gpt-35-turbo-0613",
 		"gpt-35-turbo-16k",
 		"gpt-35-turbo-16k-0613",
+		"gpt-4o-2024-08-16",
 		"gpt-4o-2024-05-13",
 		"gpt-4o",
 		"gpt-4o-mini",
diff --git a/internal/provider/azure/cost.go b/internal/provider/azure/cost.go
index 1b7a8ed..eebd64a 100644
--- a/internal/provider/azure/cost.go
+++ b/internal/provider/azure/cost.go
@@ -9,8 +9,11 @@ import (
 )
 
 var AzureOpenAiPerThousandTokenCost = map[string]map[string]float64{
+	// updated according to this link:
+	// https://azure.microsoft.com/en-gb/pricing/details/cognitive-services/openai-service/
 	"prompt": {
-		"gpt-4o":                 0.005,
+		"gpt-4o":                 0.0025,
+		"gpt-4o-2024-08-16":      0.0025,
 		"gpt-4o-2024-05-13":      0.005,
 		"gpt-4-turbo":            0.01,
 		"gpt-4-turbo-2024-04-09": 0.01,
@@ -28,7 +31,8 @@ var AzureOpenAiPerThousandTokenCost = map[string]map[string]float64{
 		"text-embedding-3-small": 0.00002,
 	},
 	"completion": {
-		"gpt-4o":                 0.015,
+		"gpt-4o":                 0.01,
+		"gpt-4o-2024-08-16":      0.01,
 		"gpt-4o-2024-05-13":      0.015,
 		"gpt-4-turbo":            0.03,
 		"gpt-4-turbo-2024-04-09": 0.03,

From 2ace998c97d58c7265a8d6ef34211dd046d9c7fd Mon Sep 17 00:00:00 2001
From: Lei Lei <lei.lei@shanda.com>
Date: Wed, 30 Oct 2024 13:40:27 +0800
Subject: [PATCH 29/51] upgrade dependency of goopenai to support structured
 output.

---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index b099bce..280a469 100644
--- a/go.mod
+++ b/go.mod
@@ -17,7 +17,7 @@ require (
 	github.com/mattn/go-colorable v0.1.13
 	github.com/pkoukk/tiktoken-go v0.1.7
 	github.com/redis/go-redis/v9 v9.0.5
-	github.com/sashabaranov/go-openai v1.26.3
+	github.com/sashabaranov/go-openai v1.32.5
 	github.com/stretchr/testify v1.8.4
 	github.com/tidwall/gjson v1.17.0
 	go.uber.org/zap v1.24.0
diff --git a/go.sum b/go.sum
index aaf0e9b..faa3b51 100644
--- a/go.sum
+++ b/go.sum
@@ -171,6 +171,8 @@ github.com/sashabaranov/go-openai v1.24.0 h1:4H4Pg8Bl2RH/YSnU8DYumZbuHnnkfioor/d
 github.com/sashabaranov/go-openai v1.24.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/sashabaranov/go-openai v1.26.3 h1:Tjnh4rcvsSU68f66r05mys+Zou4vo4qyvkne6AIRJPI=
 github.com/sashabaranov/go-openai v1.26.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/sashabaranov/go-openai v1.32.5 h1:/eNVa8KzlE7mJdKPZDj6886MUzZQjoVHyn0sLvIt5qA=
+github.com/sashabaranov/go-openai v1.32.5/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=

From a4baa74179b799c0fd42f541650b7a1725430345 Mon Sep 17 00:00:00 2001
From: Lei Lei <lei.lei@shanda.com>
Date: Thu, 31 Oct 2024 23:04:37 +0800
Subject: [PATCH 30/51] revert changes to gpt-4o cost.

---
 internal/provider/azure/cost.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/provider/azure/cost.go b/internal/provider/azure/cost.go
index eebd64a..bec7eb7 100644
--- a/internal/provider/azure/cost.go
+++ b/internal/provider/azure/cost.go
@@ -12,7 +12,7 @@ var AzureOpenAiPerThousandTokenCost = map[string]map[string]float64{
 	// updated according to this link:
 	// https://azure.microsoft.com/en-gb/pricing/details/cognitive-services/openai-service/
 	"prompt": {
-		"gpt-4o":                 0.0025,
+		"gpt-4o":                 0.005,
 		"gpt-4o-2024-08-16":      0.0025,
 		"gpt-4o-2024-05-13":      0.005,
 		"gpt-4-turbo":            0.01,
@@ -31,7 +31,7 @@ var AzureOpenAiPerThousandTokenCost = map[string]map[string]float64{
 		"text-embedding-3-small": 0.00002,
 	},
 	"completion": {
-		"gpt-4o":                 0.01,
+		"gpt-4o":                 0.015,
 		"gpt-4o-2024-08-16":      0.01,
 		"gpt-4o-2024-05-13":      0.015,
 		"gpt-4-turbo":            0.03,

From a0f7edbf049c38218aecb402936bcb0b45fe01f7 Mon Sep 17 00:00:00 2001
From: Lei Lei <lei.lei@shanda.com>
Date: Fri, 1 Nov 2024 18:56:32 +0800
Subject: [PATCH 31/51] fix model version number.

---
 internal/manager/route.go       | 6 +++---
 internal/provider/azure/cost.go | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/internal/manager/route.go b/internal/manager/route.go
index 904f776..0d96127 100644
--- a/internal/manager/route.go
+++ b/internal/manager/route.go
@@ -129,7 +129,7 @@ var (
 	}
 
 	openaiSupportedModels = []string{
-		"gpt-4o-2024-08-16",
+		"gpt-4o-2024-08-06",
 		"gpt-4o-2024-05-13",
 		"gpt-4o",
 		"gpt-4o-mini",
@@ -160,7 +160,7 @@ var (
 	}
 
 	supportedModels = []string{
-		"gpt-4o-2024-08-16",
+		"gpt-4o-2024-08-06",
 		"gpt-4o-2024-05-13",
 		"gpt-4o",
 		"gpt-4o-mini",
@@ -215,7 +215,7 @@ var (
 		"gpt-35-turbo-0613",
 		"gpt-35-turbo-16k",
 		"gpt-35-turbo-16k-0613",
-		"gpt-4o-2024-08-16",
+		"gpt-4o-2024-08-06",
 		"gpt-4o-2024-05-13",
 		"gpt-4o",
 		"gpt-4o-mini",
diff --git a/internal/provider/azure/cost.go b/internal/provider/azure/cost.go
index bec7eb7..2ed5556 100644
--- a/internal/provider/azure/cost.go
+++ b/internal/provider/azure/cost.go
@@ -13,7 +13,7 @@ var AzureOpenAiPerThousandTokenCost = map[string]map[string]float64{
 	// https://azure.microsoft.com/en-gb/pricing/details/cognitive-services/openai-service/
 	"prompt": {
 		"gpt-4o":                 0.005,
-		"gpt-4o-2024-08-16":      0.0025,
+		"gpt-4o-2024-08-06":      0.0025,
 		"gpt-4o-2024-05-13":      0.005,
 		"gpt-4-turbo":            0.01,
 		"gpt-4-turbo-2024-04-09": 0.01,
@@ -32,7 +32,7 @@ var AzureOpenAiPerThousandTokenCost = map[string]map[string]float64{
 	},
 	"completion": {
 		"gpt-4o":                 0.015,
-		"gpt-4o-2024-08-16":      0.01,
+		"gpt-4o-2024-08-06":      0.01,
 		"gpt-4o-2024-05-13":      0.015,
 		"gpt-4-turbo":            0.03,
 		"gpt-4-turbo-2024-04-09": 0.03,

From 4796a61a2c9036bd35ac5f1760a235c5a5084fa2 Mon Sep 17 00:00:00 2001
From: Andrew Rothstein <andrew.rothstein@gmail.com>
Date: Mon, 4 Nov 2024 21:44:36 +0000
Subject: [PATCH 32/51] go:1.23.2 setup-go@5 checkout@v4 docker/login-action@v3
 docker/setup-buildx-action@v3 docker/metadata-action@v5
 docker/build-push-action@v6 alpine:3.20 for upstream

---
 .github/workflows/release.yml | 24 ++++++++++++------------
 Dockerfile.dev                |  4 ++--
 Dockerfile.prod               |  4 ++--
 docker-compose.yml            |  6 +++---
 4 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 55277ee..5884f46 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -10,13 +10,13 @@ jobs:
     runs-on: macos-latest
     steps:
       - name: Install Go
-        uses: actions/setup-go@v4
+        uses: actions/setup-go@v5
         with:
-          go-version: 1.22.x
+          go-version: '^1.23.2'
           check-latest: true
 
       - name: Check Out Repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Install the Apple certificate and provisioning profile
         env:
@@ -58,27 +58,27 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Install Go
-        uses: actions/setup-go@v3
+        uses: actions/setup-go@v5
         with:
-          go-version: 1.22.x
+          go-version: '^1.23.2'
           check-latest: true
 
       - name: Check Out Repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Login to Docker Hub
-        uses: docker/login-action@v1
+        uses: docker/login-action@v3
         with:
           username: ${{ secrets.DOCKER_HUB_USERNAME }}
           password: ${{ secrets.DOCKER_HUB_PASSWORD }}
 
       - name: Install Buildx
         id: buildx
-        uses: docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@v3
 
       - name: Docker meta
         id: meta
-        uses: docker/metadata-action@v4
+        uses: docker/metadata-action@v5
         with:
           images: luyuanxin1995/bricksllm
           tags: |
@@ -88,7 +88,7 @@ jobs:
 
       - name: Docker meta
         id: meta-datadog
-        uses: docker/metadata-action@v4
+        uses: docker/metadata-action@v5
         with:
           images: luyuanxin1995/bricksllm-datadog
           tags: |
@@ -97,7 +97,7 @@ jobs:
             type=semver,pattern={{major}}
 
       - name: Build and push
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v6
         with:
           context: ./
           file: ./Dockerfile.prod
@@ -107,7 +107,7 @@ jobs:
           tags: ${{ steps.meta.outputs.tags }}
 
       - name: Build and push
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v6
         with:
           context: ./
           file: ./Dockerfile.datadog
diff --git a/Dockerfile.dev b/Dockerfile.dev
index a0e5954..33b3f4d 100644
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@@ -1,4 +1,4 @@
-FROM golang:1.22.1 AS build
+FROM golang:1.23.2 AS build
 ENV CGO_ENABLED=0
 ENV GOOS=linux
 
@@ -6,7 +6,7 @@ WORKDIR /go/src/github.com/bricks-cloud/bricksllm/
 COPY . /go/src/github.com/bricks-cloud/bricksllm/
 RUN go build -ldflags="-s -w" -o ./bin/bricksllm ./cmd/bricksllm/main.go
 
-FROM alpine:3.17
+FROM alpine:3.20
 RUN apk --no-cache add ca-certificates
 WORKDIR /usr/bin
 COPY --from=build /go/src/github.com/bricks-cloud/bricksllm/bin /go/bin
diff --git a/Dockerfile.prod b/Dockerfile.prod
index 64e16de..1eb16a4 100644
--- a/Dockerfile.prod
+++ b/Dockerfile.prod
@@ -1,4 +1,4 @@
-FROM golang:1.22.1 AS build
+FROM golang:1.23.2 AS build
 ENV CGO_ENABLED=0
 ENV GOOS=linux
 
@@ -6,7 +6,7 @@ WORKDIR /go/src/github.com/bricks-cloud/bricksllm/
 COPY . /go/src/github.com/bricks-cloud/bricksllm/
 RUN go build -ldflags="-s -w" -o ./bin/bricksllm ./cmd/bricksllm/main.go
 
-FROM alpine:3.17
+FROM alpine:3.20
 RUN apk --no-cache add ca-certificates
 WORKDIR /usr/bin
 COPY --from=build /go/src/github.com/bricks-cloud/bricksllm/bin /go/bin
diff --git a/docker-compose.yml b/docker-compose.yml
index a044fa1..a29949a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -5,7 +5,7 @@ services:
     ports:
       - '6379:6379'
     command: redis-server --save 20 1 --loglevel warning --requirepass eYVX7EwVmmxKPCDmwMtyKVge8oLd2t81
-    volumes: 
+    volumes:
       - redis:/data
   postgresql:
     image: postgres:14.1-alpine
@@ -15,10 +15,10 @@ services:
       - POSTGRES_PASSWORD=postgres
     ports:
       - '5432:5432'
-    volumes: 
+    volumes:
       - postgresql:/var/lib/postgresql/data
   # bricksllm:
-  #   depends_on: 
+  #   depends_on:
   #     - redis
   #     - postgresql
   #   image: luyuanxin1995/bricksllm

From b70ed3388c72adf1446212f68795f8e26f12d570 Mon Sep 17 00:00:00 2001
From: Lei Lei <lei.lei@shanda.com>
Date: Thu, 7 Nov 2024 10:33:08 +0800
Subject: [PATCH 33/51] add fix

---
 go.mod                                  | 1 +
 go.sum                                  | 5 +++--
 internal/server/web/proxy/middleware.go | 9 ++++++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 15bf54c..bab0c2e 100644
--- a/go.mod
+++ b/go.mod
@@ -21,6 +21,7 @@ require (
 	github.com/sashabaranov/go-openai v1.32.5
 	github.com/stretchr/testify v1.8.4
 	github.com/tidwall/gjson v1.17.0
+	github.com/tidwall/sjson v1.2.5
 	go.uber.org/zap v1.24.0
 )
 
diff --git a/go.sum b/go.sum
index 324f3bc..3607d7d 100644
--- a/go.sum
+++ b/go.sum
@@ -154,8 +154,6 @@ github.com/redis/go-redis/v9 v9.0.5 h1:CuQcn5HIEeK7BgElubPP8CGtE0KakrnbBSTLjathl
 github.com/redis/go-redis/v9 v9.0.5/go.mod h1:WqMKv5vnQbRuZstUwxQI195wHy+t4PuXDOjzMvcuQHk=
 github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
 github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
-github.com/sashabaranov/go-openai v1.26.3 h1:Tjnh4rcvsSU68f66r05mys+Zou4vo4qyvkne6AIRJPI=
-github.com/sashabaranov/go-openai v1.26.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/sashabaranov/go-openai v1.32.5 h1:/eNVa8KzlE7mJdKPZDj6886MUzZQjoVHyn0sLvIt5qA=
 github.com/sashabaranov/go-openai v1.32.5/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
@@ -174,6 +172,7 @@ github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o
 github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/gjson v1.17.0 h1:/Jocvlh98kcTfpN2+JzGQWQcqrPQwDrVEMApx/M5ZwM=
 github.com/tidwall/gjson v1.17.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
@@ -181,6 +180,8 @@ github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JT
 github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
 github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
 github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
 github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
 github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
 github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go
index 30b9eb0..9ce91ab 100644
--- a/internal/server/web/proxy/middleware.go
+++ b/internal/server/web/proxy/middleware.go
@@ -23,6 +23,7 @@ import (
 	"github.com/bricks-cloud/bricksllm/internal/util"
 	"github.com/gin-gonic/gin"
 	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
 	"go.uber.org/zap"
 
 	goopenai "github.com/sashabaranov/go-openai"
@@ -763,7 +764,13 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag
 
 		if c.FullPath() == "/api/providers/openai/v1/chat/completions" {
 			ccr := &goopenai.ChatCompletionRequest{}
-			err = json.Unmarshal(body, ccr)
+			// this is a hack around an open issue in go-openai.
+			// https://github.com/sashabaranov/go-openai/issues/884
+			cleaned, err := sjson.Delete(string(body), "response_format.json_schema")
+			if err != nil {
+				logWithCid.Warn("removing response_format.json_schema", zap.Error(err))
+			}
+			err = json.Unmarshal([]byte(cleaned), ccr)
 			if err != nil {
 				logError(logWithCid, "error when unmarshalling chat completion request", prod, err)
 				return

From 0643668eed8ccba2c74a19f9cb6bb3af39f1e8b8 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sat, 9 Nov 2024 18:17:04 -0800
Subject: [PATCH 34/51] add new env variables for enabling redis tls

---
 cmd/bricksllm/.env              |  2 ++
 cmd/bricksllm/config_local.json |  2 ++
 cmd/bricksllm/main.go           | 12 +++++++++---
 internal/config/config.go       |  1 +
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/cmd/bricksllm/.env b/cmd/bricksllm/.env
index 09ae4ee..f24859a 100644
--- a/cmd/bricksllm/.env
+++ b/cmd/bricksllm/.env
@@ -7,6 +7,8 @@ POSTGRESQL_PASSWORD=
 POSTGRESQL_SSL_MODE=disable
 POSTGRESQL_PORT=5432
 REDIS_HOSTS=localhost
+REDIS_ENABLE_TLS=false
+REDIS_INSECURE_SKIP_VERIFY=false
 REDIS_PORT=6379
 REDIS_USERNAME=
 REDIS_PASSWORD=
diff --git a/cmd/bricksllm/config_local.json b/cmd/bricksllm/config_local.json
index ae92a01..b1ec5bd 100644
--- a/cmd/bricksllm/config_local.json
+++ b/cmd/bricksllm/config_local.json
@@ -7,6 +7,8 @@
     "postgresql_port": "5432",
     "redis_hosts": "localhost",
     "redis_port": "6379",
+    "redis_enable_tls": false,
+    "redis_insecure_skip_verify": false,
     "redis_username": "",
     "redis_password": "",
     "redis_read_time_out": "1s",
diff --git a/cmd/bricksllm/main.go b/cmd/bricksllm/main.go
index 84c1170..46166ce 100644
--- a/cmd/bricksllm/main.go
+++ b/cmd/bricksllm/main.go
@@ -175,14 +175,20 @@ func main() {
 	rMemStore.Listen()
 
 	defaultRedisOption := func(cfg *config.Config, dbIndex int) *redis.Options {
-		return &redis.Options{
+
+		options := &redis.Options{
 			Addr:     fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort),
 			Password: cfg.RedisPassword,
 			DB:       cfg.RedisDBStartIndex + dbIndex,
-			TLSConfig: &tls.Config{
+		}
+
+		if cfg.RedisEnableTLS {
+			options.TLSConfig = &tls.Config{
 				InsecureSkipVerify: cfg.RedisInsecureSkipVerify,
-			},
+			}
 		}
+
+		return options
 	}
 
 	rateLimitRedisCache := redis.NewClient(defaultRedisOption(cfg, 0))
diff --git a/internal/config/config.go b/internal/config/config.go
index fdc3028..b1d3f49 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -25,6 +25,7 @@ type Config struct {
 	RedisPort                     string        `koanf:"redis_port" env:"REDIS_PORT" envDefault:"6379"`
 	RedisUsername                 string        `koanf:"redis_username" env:"REDIS_USERNAME"`
 	RedisPassword                 string        `koanf:"redis_password" env:"REDIS_PASSWORD"`
+	RedisEnableTLS                bool          `koanf:"redis_enable_tls" env:"REDIS_ENABLE_TLS" envDefault:"false"`
 	RedisInsecureSkipVerify       bool          `koanf:"redis_insecure_skip_verify" env:"REDIS_INSECURE_SKIP_VERIFY" envDefault:"false"`
 	RedisDBStartIndex             int           `koanf:"redis_db_start_index" env:"REDIS_DB_START_INDEX" envDefault:"0"`
 	RedisReadTimeout              time.Duration `koanf:"redis_read_time_out" env:"REDIS_READ_TIME_OUT" envDefault:"1s"`

From 9afbfb34eee156731d3c6de3166fdb720b177803 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sat, 9 Nov 2024 18:29:36 -0800
Subject: [PATCH 35/51] update cost

---
 internal/provider/anthropic/cost.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/internal/provider/anthropic/cost.go b/internal/provider/anthropic/cost.go
index 2fb0a58..4ec546b 100644
--- a/internal/provider/anthropic/cost.go
+++ b/internal/provider/anthropic/cost.go
@@ -13,6 +13,7 @@ var AnthropicPerMillionTokenCost = map[string]map[string]float64{
 		"claude-3-opus":     15,
 		"claude-3-sonnet":   3,
 		"claude-3.5-sonnet": 3,
+		"claude-3.5-haiku":  1,
 		"claude-3-haiku":    0.25,
 	},
 	"completion": {
@@ -21,6 +22,7 @@ var AnthropicPerMillionTokenCost = map[string]map[string]float64{
 		"claude-3-opus":     75,
 		"claude-3-sonnet":   15,
 		"claude-3.5-sonnet": 15,
+		"claude-3.5-haiku":  5,
 		"claude-3-haiku":    1.25,
 	},
 }
@@ -77,8 +79,10 @@ func selectModel(model string) string {
 		return "claude-3-opus"
 	} else if strings.HasPrefix(model, "claude-3-sonnet") {
 		return "claude-3-sonnet"
-	} else if strings.HasPrefix(model, "claude-3.5-sonnet") {
+	} else if strings.HasPrefix(model, "claude-3.5-sonnet") || strings.HasPrefix(model, "claude-3-5-sonnet") {
 		return "claude-3.5-sonnet"
+	} else if strings.HasPrefix(model, "claude-3.5-haiku") || strings.HasPrefix(model, "claude-3-5-haiku") {
+		return "claude-3.5-haiku"
 	} else if strings.HasPrefix(model, "claude-3-haiku") {
 		return "claude-3-haiku"
 	} else if strings.HasPrefix(model, "claude-instant") {

From 78d1f24d87dc9768c5423af64d6e0e48b39e11c2 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sat, 9 Nov 2024 18:34:09 -0800
Subject: [PATCH 36/51] update CHANGELOG

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cd6f57c..0116c18 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+## 1.38.0 - 2024-11-09
+### Added
+- Added support for `claude-3-5-haiku`
+- Added support for Redis TLS config
+- Added support for `gpt-4o-2024-08-06`
+
 ## 1.37.0 - 2024-10-23
 ### Added
 - Added request level timeout with HTTP header `x-request-timeout`

From d3e7cc8ec680333db8d83c1a5413eec2569cc8bc Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Fri, 15 Nov 2024 22:24:14 -0800
Subject: [PATCH 37/51] add encryption

---
 .gitignore                              |   3 +-
 CHANGELOG.md                            |   8 ++
 cmd/bricksllm/.env                      |   2 -
 cmd/bricksllm/config_local.json         |   2 -
 cmd/bricksllm/main.go                   |  14 +--
 internal/authenticator/authenticator.go |  50 ++++++++--
 internal/config/config.go               |  11 ++-
 internal/encryptor/encryptor.go         | 120 ++++++++++++++++++++++++
 internal/manager/provider_setting.go    |  57 ++++++++++-
 9 files changed, 237 insertions(+), 30 deletions(-)
 create mode 100644 internal/encryptor/encryptor.go

diff --git a/.gitignore b/.gitignore
index ba36687..0f708b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 release_notes.md
 target
 .DS_STORE
-.vscode/launch.json
\ No newline at end of file
+.vscode/launch.json
+.env
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0116c18..7b89999 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+## 1.39.0 - 2024-11-15
+### Added
+- Added encryption integration
+
+### Changed
+- Removed support for Redis TLS config
+
+
 ## 1.38.0 - 2024-11-09
 ### Added
 - Added support for `claude-3-5-haiku`
diff --git a/cmd/bricksllm/.env b/cmd/bricksllm/.env
index f24859a..09ae4ee 100644
--- a/cmd/bricksllm/.env
+++ b/cmd/bricksllm/.env
@@ -7,8 +7,6 @@ POSTGRESQL_PASSWORD=
 POSTGRESQL_SSL_MODE=disable
 POSTGRESQL_PORT=5432
 REDIS_HOSTS=localhost
-REDIS_ENABLE_TLS=false
-REDIS_INSECURE_SKIP_VERIFY=false
 REDIS_PORT=6379
 REDIS_USERNAME=
 REDIS_PASSWORD=
diff --git a/cmd/bricksllm/config_local.json b/cmd/bricksllm/config_local.json
index b1ec5bd..ae92a01 100644
--- a/cmd/bricksllm/config_local.json
+++ b/cmd/bricksllm/config_local.json
@@ -7,8 +7,6 @@
     "postgresql_port": "5432",
     "redis_hosts": "localhost",
     "redis_port": "6379",
-    "redis_enable_tls": false,
-    "redis_insecure_skip_verify": false,
     "redis_username": "",
     "redis_password": "",
     "redis_read_time_out": "1s",
diff --git a/cmd/bricksllm/main.go b/cmd/bricksllm/main.go
index 46166ce..e5d2b7a 100644
--- a/cmd/bricksllm/main.go
+++ b/cmd/bricksllm/main.go
@@ -2,7 +2,6 @@ package main
 
 import (
 	"context"
-	"crypto/tls"
 	"flag"
 	"fmt"
 	"os"
@@ -13,6 +12,7 @@ import (
 	auth "github.com/bricks-cloud/bricksllm/internal/authenticator"
 	"github.com/bricks-cloud/bricksllm/internal/cache"
 	"github.com/bricks-cloud/bricksllm/internal/config"
+	"github.com/bricks-cloud/bricksllm/internal/encryptor"
 	"github.com/bricks-cloud/bricksllm/internal/logger/zap"
 	"github.com/bricks-cloud/bricksllm/internal/manager"
 	"github.com/bricks-cloud/bricksllm/internal/message"
@@ -182,12 +182,6 @@ func main() {
 			DB:       cfg.RedisDBStartIndex + dbIndex,
 		}
 
-		if cfg.RedisEnableTLS {
-			options.TLSConfig = &tls.Config{
-				InsecureSkipVerify: cfg.RedisInsecureSkipVerify,
-			}
-		}
-
 		return options
 	}
 
@@ -292,9 +286,11 @@ func main() {
 	psCache := redisStorage.NewProviderSettingsCache(providerSettingsRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
 	keysCache := redisStorage.NewKeysCache(keysRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
 
+	encryptor := encryptor.NewEncryptor(cfg.DecryptionEndpoint, cfg.EncryptionEndpoint, cfg.EnableEncrytion, cfg.EncryptionTimeout)
+
 	m := manager.NewManager(store, costLimitCache, rateLimitCache, accessCache, keysCache)
 	krm := manager.NewReportingManager(costStorage, store, store)
-	psm := manager.NewProviderSettingsManager(store, psCache)
+	psm := manager.NewProviderSettingsManager(store, psCache, encryptor)
 	cpm := manager.NewCustomProvidersManager(store, cpMemStore)
 	rm := manager.NewRouteManager(store, store, rMemStore, psm)
 	pm := manager.NewPolicyManager(store, rMemStore)
@@ -332,7 +328,7 @@ func main() {
 
 	rec := recorder.NewRecorder(costStorage, userCostStorage, costLimitCache, userCostLimitCache, ce, store)
 	rlm := manager.NewRateLimitManager(rateLimitCache, userRateLimitCache)
-	a := auth.NewAuthenticator(psm, m, rm, store)
+	a := auth.NewAuthenticator(psm, m, rm, store, encryptor)
 
 	c := cache.NewCache(apiCache)
 
diff --git a/internal/authenticator/authenticator.go b/internal/authenticator/authenticator.go
index 38c53b8..68959d3 100644
--- a/internal/authenticator/authenticator.go
+++ b/internal/authenticator/authenticator.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"math/rand"
 	"net/http"
+	"strconv"
 	"strings"
 
 	internal_errors "github.com/bricks-cloud/bricksllm/internal/errors"
@@ -34,19 +35,26 @@ type keyStorage interface {
 	GetKeyByHash(hash string) (*key.ResponseKey, error)
 }
 
+type Decryptor interface {
+	Decrypt(input string, headers map[string]string) (string, error)
+	Enabled() bool
+}
+
 type Authenticator struct {
-	psm providerSettingsManager
-	kc  keysCache
-	rm  routesManager
-	ks  keyStorage
+	psm       providerSettingsManager
+	kc        keysCache
+	rm        routesManager
+	ks        keyStorage
+	decryptor Decryptor
 }
 
-func NewAuthenticator(psm providerSettingsManager, kc keysCache, rm routesManager, ks keyStorage) *Authenticator {
+func NewAuthenticator(psm providerSettingsManager, kc keysCache, rm routesManager, ks keyStorage, decryptor Decryptor) *Authenticator {
 	return &Authenticator{
-		psm: psm,
-		kc:  kc,
-		rm:  rm,
-		ks:  ks,
+		psm:       psm,
+		kc:        kc,
+		rm:        rm,
+		ks:        ks,
+		decryptor: decryptor,
 	}
 }
 
@@ -268,6 +276,30 @@ func (a *Authenticator) AuthenticateHttpRequest(req *http.Request) (*key.Respons
 			used = selected[rand.Intn(len(selected))]
 		}
 
+		if a.decryptor.Enabled() {
+			encryptedParam := ""
+			if used.Provider == "amazon" {
+				encryptedParam = used.Setting["awsSecretAccessKey"]
+			} else if len(used.Setting["apikey"]) != 0 {
+				encryptedParam = used.Setting["apikey"]
+			}
+
+			if len(encryptedParam) != 0 {
+				decryptedSecret, err := a.decryptor.Decrypt(encryptedParam, map[string]string{"X-UPDATED-AT": strconv.FormatInt(used.UpdatedAt, 10)})
+				if err == nil {
+					if used.Provider == "amazon" {
+						used.Setting["awsSecretAccessKey"] = decryptedSecret
+					} else {
+						used.Setting["apikey"] = decryptedSecret
+					}
+				}
+
+				if err != nil {
+					fmt.Println(fmt.Printf("error when encrypting %v", err))
+				}
+			}
+		}
+
 		err := rewriteHttpAuthHeader(req, used)
 		if err != nil {
 			return nil, nil, err
diff --git a/internal/config/config.go b/internal/config/config.go
index b1d3f49..ed55462 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -1,6 +1,7 @@
 package config
 
 import (
+	"errors"
 	"os"
 	"path/filepath"
 	"time"
@@ -25,8 +26,6 @@ type Config struct {
 	RedisPort                     string        `koanf:"redis_port" env:"REDIS_PORT" envDefault:"6379"`
 	RedisUsername                 string        `koanf:"redis_username" env:"REDIS_USERNAME"`
 	RedisPassword                 string        `koanf:"redis_password" env:"REDIS_PASSWORD"`
-	RedisEnableTLS                bool          `koanf:"redis_enable_tls" env:"REDIS_ENABLE_TLS" envDefault:"false"`
-	RedisInsecureSkipVerify       bool          `koanf:"redis_insecure_skip_verify" env:"REDIS_INSECURE_SKIP_VERIFY" envDefault:"false"`
 	RedisDBStartIndex             int           `koanf:"redis_db_start_index" env:"REDIS_DB_START_INDEX" envDefault:"0"`
 	RedisReadTimeout              time.Duration `koanf:"redis_read_time_out" env:"REDIS_READ_TIME_OUT" envDefault:"1s"`
 	RedisWriteTimeout             time.Duration `koanf:"redis_write_time_out" env:"REDIS_WRITE_TIME_OUT" envDefault:"500ms"`
@@ -47,6 +46,10 @@ type Config struct {
 	AmazonRequestTimeout          time.Duration `koanf:"amazon_request_timeout" env:"AMAZON_REQUEST_TIMEOUT" envDefault:"5s"`
 	AmazonConnectionTimeout       time.Duration `koanf:"amazon_connection_timeout" env:"AMAZON_CONNECTION_TIMEOUT" envDefault:"10s"`
 	RemoveUserAgent               bool          `koanf:"remove_user_agent" env:"REMOVE_USER_AGENT" envDefault:"false"`
+	EnableEncrytion               bool          `koanf:"enable_encryption" env:"ENABLE_ENCRYPTION" envDefault:"false"`
+	EncryptionEndpoint            string        `koanf:"encryption_endpoint" env:"ENCRYPTION_ENDPOINT"`
+	DecryptionEndpoint            string        `koanf:"decryption_endpoint" env:"DECRYPTION_ENDPOINT"`
+	EncryptionTimeout             time.Duration `koanf:"encryption_timeout" env:"ENCRYPTION_TIMEOUT" envDefault:"5s"`
 }
 
 func prepareDotEnv(envFilePath string) error {
@@ -82,6 +85,10 @@ func LoadConfig(log *zap.Logger) (*Config, error) {
 		return nil, err
 	}
 
+	if cfg.EnableEncrytion && len(cfg.EncryptionEndpoint) == 0 {
+		return nil, errors.New("encryption endpoint cannot be empty")
+	}
+
 	err = prepareDotEnv(".env")
 	if err != nil {
 		log.Sugar().Infof("error loading config from .env file: %v", err)
diff --git a/internal/encryptor/encryptor.go b/internal/encryptor/encryptor.go
new file mode 100644
index 0000000..eef5256
--- /dev/null
+++ b/internal/encryptor/encryptor.go
@@ -0,0 +1,120 @@
+package encryptor
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"time"
+)
+
+type Encryptor struct {
+	decryptionURL string
+	encryptionURL string
+	enabled       bool
+	client        http.Client
+	timeout       time.Duration
+}
+
+type Secret struct {
+	Secret string `json:"secret"`
+}
+
+type EncryptionResponse struct {
+	EncryptedSecret string `json:"encryptedSecret"`
+}
+
+type DecryptionResponse struct {
+	DecryptedSecret string `json:"decryptedSecret"`
+}
+
+func NewEncryptor(decryptionURL string, encryptionURL string, enabled bool, timeout time.Duration) Encryptor {
+	return Encryptor{
+		decryptionURL: decryptionURL,
+		encryptionURL: encryptionURL,
+		client:        http.Client{},
+		enabled:       enabled,
+		timeout:       timeout,
+	}
+}
+
+func (e Encryptor) Encrypt(input string, headers map[string]string) (string, error) {
+	data, err := json.Marshal(Secret{
+		Secret: input,
+	})
+	if err != nil {
+		return "", err
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
+	defer cancel()
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, e.encryptionURL, bytes.NewBuffer(data))
+	if err != nil {
+		return "", err
+	}
+
+	for header, value := range headers {
+		req.Header.Add(header, value)
+	}
+
+	res, err := e.client.Do(req)
+	if err != nil {
+		return "", err
+	}
+
+	bytes, err := io.ReadAll(res.Body)
+	if err != nil {
+		return "", err
+	}
+
+	encryptionResponse := EncryptionResponse{}
+	err = json.Unmarshal(bytes, &encryptionResponse)
+	if err != nil {
+		return "", err
+	}
+
+	return encryptionResponse.EncryptedSecret, nil
+}
+
+func (e Encryptor) Enabled() bool {
+	return e.enabled && len(e.decryptionURL) != 0 && len(e.encryptionURL) != 0
+}
+
+func (e Encryptor) Decrypt(input string, headers map[string]string) (string, error) {
+	data, err := json.Marshal(Secret{
+		Secret: input,
+	})
+	if err != nil {
+		return "", err
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
+	defer cancel()
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, e.decryptionURL, bytes.NewBuffer(data))
+	if err != nil {
+		return "", err
+	}
+
+	for header, value := range headers {
+		req.Header.Add(header, value)
+	}
+
+	res, err := e.client.Do(req)
+	if err != nil {
+		return "", err
+	}
+
+	bytes, err := io.ReadAll(res.Body)
+	if err != nil {
+		return "", err
+	}
+
+	decryptionSecret := DecryptionResponse{}
+	err = json.Unmarshal(bytes, &decryptionSecret)
+	if err != nil {
+		return "", err
+	}
+
+	return decryptionSecret.DecryptedSecret, nil
+}
diff --git a/internal/manager/provider_setting.go b/internal/manager/provider_setting.go
index 5b246d1..8ed9bca 100644
--- a/internal/manager/provider_setting.go
+++ b/internal/manager/provider_setting.go
@@ -3,6 +3,7 @@ package manager
 import (
 	"encoding/json"
 	"fmt"
+	"strconv"
 	"strings"
 	"time"
 
@@ -27,15 +28,22 @@ type ProviderSettingsCache interface {
 	Delete(pid string) error
 }
 
+type Encryptor interface {
+	Encrypt(input string, headers map[string]string) (string, error)
+	Enabled() bool
+}
+
 type ProviderSettingsManager struct {
-	Storage ProviderSettingsStorage
-	Cache   ProviderSettingsCache
+	Storage   ProviderSettingsStorage
+	Cache     ProviderSettingsCache
+	Encryptor Encryptor
 }
 
-func NewProviderSettingsManager(s ProviderSettingsStorage, cache ProviderSettingsCache) *ProviderSettingsManager {
+func NewProviderSettingsManager(s ProviderSettingsStorage, cache ProviderSettingsCache, encryptor Encryptor) *ProviderSettingsManager {
 	return &ProviderSettingsManager{
-		Storage: s,
-		Cache:   cache,
+		Storage:   s,
+		Cache:     cache,
+		Encryptor: encryptor,
 	}
 }
 
@@ -118,6 +126,27 @@ func (m *ProviderSettingsManager) validateSettings(providerName string, setting
 	return nil
 }
 
+func (m *ProviderSettingsManager) EncryptParams(updatedAt int64, provider string, params map[string]string) (map[string]string, error) {
+	if provider == "amazon" {
+		encryted, err := m.Encryptor.Encrypt(params["awsSecretAccessKey"], map[string]string{"X-UPDATED-AT": strconv.FormatInt(updatedAt, 10)})
+		if err != nil {
+			return nil, err
+		}
+
+		params["awsSecretAccessKey"] = encryted
+
+	} else if provider == "openai" || provider == "anthropic" || provider == "deepinfra" || provider == "azure" {
+		encryted, err := m.Encryptor.Encrypt(params["apikey"], map[string]string{"X-UPDATED-AT": strconv.FormatInt(updatedAt, 10)})
+		if err != nil {
+			return nil, err
+		}
+
+		params["apikey"] = encryted
+	}
+
+	return params, nil
+}
+
 func (m *ProviderSettingsManager) CreateSetting(setting *provider.Setting) (*provider.Setting, error) {
 	if len(setting.Provider) == 0 {
 		return nil, internal_errors.NewValidationError("provider field cannot be empty")
@@ -131,6 +160,15 @@ func (m *ProviderSettingsManager) CreateSetting(setting *provider.Setting) (*pro
 	setting.CreatedAt = time.Now().Unix()
 	setting.UpdatedAt = time.Now().Unix()
 
+	if m.Encryptor.Enabled() {
+		params, err := m.EncryptParams(setting.UpdatedAt, setting.Provider, setting.Setting)
+		if err != nil {
+			return nil, err
+		}
+
+		setting.Setting = params
+	}
+
 	return m.Storage.CreateProviderSetting(setting)
 }
 
@@ -164,6 +202,15 @@ func (m *ProviderSettingsManager) UpdateSetting(id string, setting *provider.Upd
 		telemetry.Incr("bricksllm.provider_settings_manager.update_setting.delete_cache_error", nil, 1)
 	}
 
+	if m.Encryptor.Enabled() {
+		params, err := m.EncryptParams(existing.UpdatedAt, existing.Provider, setting.Setting)
+		if err != nil {
+			return nil, err
+		}
+
+		setting.Setting = params
+	}
+
 	return m.Storage.UpdateProviderSetting(id, setting)
 }
 

From c755d64f046d3f9c0a22e9c05e226b36fa5b8d2c Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Fri, 15 Nov 2024 23:14:34 -0800
Subject: [PATCH 38/51] add debug log

---
 internal/encryptor/encryptor.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/internal/encryptor/encryptor.go b/internal/encryptor/encryptor.go
index eef5256..4a05e7f 100644
--- a/internal/encryptor/encryptor.go
+++ b/internal/encryptor/encryptor.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"fmt"
 	"io"
 	"net/http"
 	"time"
@@ -110,6 +111,8 @@ func (e Encryptor) Decrypt(input string, headers map[string]string) (string, err
 		return "", err
 	}
 
+	fmt.Println(string(bytes))
+
 	decryptionSecret := DecryptionResponse{}
 	err = json.Unmarshal(bytes, &decryptionSecret)
 	if err != nil {

From f2da1bb8894212027b7e5b74cd6084a6d8fb2fbc Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sun, 17 Nov 2024 14:50:14 -0800
Subject: [PATCH 39/51] add auth integration

---
 cmd/bricksllm/main.go                   |   5 +-
 go.mod                                  |  33 ++++--
 go.sum                                  | 145 +++++++++++++++++++++---
 internal/authenticator/authenticator.go |   4 -
 internal/config/config.go               |   1 +
 internal/encryptor/encryptor.go         |  21 ++--
 6 files changed, 175 insertions(+), 34 deletions(-)

diff --git a/cmd/bricksllm/main.go b/cmd/bricksllm/main.go
index e5d2b7a..66c337c 100644
--- a/cmd/bricksllm/main.go
+++ b/cmd/bricksllm/main.go
@@ -286,7 +286,10 @@ func main() {
 	psCache := redisStorage.NewProviderSettingsCache(providerSettingsRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
 	keysCache := redisStorage.NewKeysCache(keysRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
 
-	encryptor := encryptor.NewEncryptor(cfg.DecryptionEndpoint, cfg.EncryptionEndpoint, cfg.EnableEncrytion, cfg.EncryptionTimeout)
+	encryptor, err := encryptor.NewEncryptor(cfg.DecryptionEndpoint, cfg.EncryptionEndpoint, cfg.EnableEncrytion, cfg.EncryptionTimeout, cfg.Audience)
+	if cfg.EnableEncrytion && err != nil {
+		log.Sugar().Fatalf("error creating encryption client: %v", err)
+	}
 
 	m := manager.NewManager(store, costLimitCache, rateLimitCache, accessCache, keysCache)
 	krm := manager.NewReportingManager(costStorage, store, store)
diff --git a/go.mod b/go.mod
index bab0c2e..d917dea 100644
--- a/go.mod
+++ b/go.mod
@@ -19,17 +19,28 @@ require (
 	github.com/pkoukk/tiktoken-go v0.1.7
 	github.com/redis/go-redis/v9 v9.0.5
 	github.com/sashabaranov/go-openai v1.32.5
-	github.com/stretchr/testify v1.8.4
+	github.com/stretchr/testify v1.9.0
 	github.com/tidwall/gjson v1.17.0
 	github.com/tidwall/sjson v1.2.5
 	go.uber.org/zap v1.24.0
+	google.golang.org/api v0.206.0
 )
 
 require (
+	cloud.google.com/go/auth v0.10.2 // indirect
+	cloud.google.com/go/auth/oauth2adapt v0.2.5 // indirect
+	cloud.google.com/go/compute/metadata v0.5.2 // indirect
 	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
+	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/fsnotify/fsnotify v1.6.0 // indirect
+	github.com/go-logr/logr v1.4.2 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 // indirect
+	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
+	github.com/google/s2a-go v0.1.8 // indirect
+	github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
+	github.com/googleapis/gax-go/v2 v2.14.0 // indirect
 	github.com/knadh/koanf/maps v0.1.1 // indirect
 	github.com/kr/text v0.2.0 // indirect
 	github.com/mitchellh/copystructure v1.2.0 // indirect
@@ -37,7 +48,15 @@ require (
 	github.com/prometheus/client_model v0.5.0 // indirect
 	github.com/prometheus/common v0.48.0 // indirect
 	github.com/prometheus/procfs v0.12.0 // indirect
+	go.opencensus.io v0.24.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect
+	go.opentelemetry.io/otel v1.29.0 // indirect
+	go.opentelemetry.io/otel/metric v1.29.0 // indirect
+	go.opentelemetry.io/otel/trace v1.29.0 // indirect
 	go.uber.org/atomic v1.7.0 // indirect
+	golang.org/x/oauth2 v0.24.0 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect
+	google.golang.org/grpc v1.67.1 // indirect
 )
 
 require (
@@ -57,7 +76,7 @@ require (
 	github.com/aws/aws-sdk-go-v2/service/sts v1.28.4 // indirect
 	github.com/aws/smithy-go v1.20.4 // indirect
 	github.com/bytedance/sonic v1.9.1 // indirect
-	github.com/cespare/xxhash/v2 v2.2.0 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
@@ -86,10 +105,10 @@ require (
 	github.com/ugorji/go/codec v1.2.11 // indirect
 	go.uber.org/multierr v1.10.0 // indirect
 	golang.org/x/arch v0.3.0 // indirect
-	golang.org/x/crypto v0.18.0 // indirect
-	golang.org/x/net v0.20.0 // indirect
-	golang.org/x/sys v0.17.0 // indirect
-	golang.org/x/text v0.14.0 // indirect
-	google.golang.org/protobuf v1.33.0 // indirect
+	golang.org/x/crypto v0.29.0 // indirect
+	golang.org/x/net v0.31.0 // indirect
+	golang.org/x/sys v0.27.0 // indirect
+	golang.org/x/text v0.20.0 // indirect
+	google.golang.org/protobuf v1.35.1 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/go.sum b/go.sum
index 3607d7d..4e61f2c 100644
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,11 @@
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+cloud.google.com/go/auth v0.10.2 h1:oKF7rgBfSHdp/kuhXtqU/tNDr0mZqhYbEh+6SiqzkKo=
+cloud.google.com/go/auth v0.10.2/go.mod h1:xxA5AqpDrvS+Gkmo9RqrGGRh6WSNKKOXhY3zNOr38tI=
+cloud.google.com/go/auth/oauth2adapt v0.2.5 h1:2p29+dePqsCHPP1bqDJcKj4qxRyYCcbzKpFyKGt3MTk=
+cloud.google.com/go/auth/oauth2adapt v0.2.5/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8=
+cloud.google.com/go/compute/metadata v0.5.2 h1:UxK4uu/Tn+I3p2dYWTfiX4wva7aYlKixAHn3fyqngqo=
+cloud.google.com/go/compute/metadata v0.5.2/go.mod h1:C66sj2AluDcIqakBq/M8lw8/ybHgOZqin2obFxa/E5k=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/DataDog/datadog-go/v5 v5.3.0 h1:2q2qjFOb3RwAZNU+ez27ZVDwErJv5/VpbBPprz7Z+s8=
 github.com/DataDog/datadog-go/v5 v5.3.0/go.mod h1:XRDJk1pTc00gm+ZDiBKsjh7oOOtJfYfglVCmFb8C2+Q=
 github.com/Microsoft/go-winio v0.5.0 h1:Elr9Wn+sGKPlkaBvwu4mTrxtmOp3F3yV9qhaHbXGjwU=
@@ -55,11 +63,14 @@ github.com/caarlos0/env v3.5.0+incompatible h1:Yy0UN8o9Wtr/jGHZDpCBLpNrzcFLLM2yi
 github.com/caarlos0/env v3.5.0+incompatible/go.mod h1:tdCsowwCzMLdkqRYDlHpZCp2UooDD3MspDBjZ2AD02Y=
 github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
 github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
-github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
-github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
 github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
 github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
@@ -68,8 +79,14 @@ github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/r
 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
 github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI=
 github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
+github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
 github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw=
+github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
+github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
 github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
 github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
 github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
@@ -78,6 +95,11 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE
 github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
 github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
 github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
+github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
 github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
 github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
@@ -90,12 +112,39 @@ github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 h1:TQcrn6Wq+sKGkpyPvppOz99zsM
 github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
 github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
 github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
 github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
+github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
+github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
+github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
+github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
+github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
+github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM=
+github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA=
+github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
+github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
+github.com/googleapis/gax-go/v2 v2.14.0 h1:f+jMrjBPl+DL9nI4IQzLUxMq7XrAqFYB7hBPqMNIe8o=
+github.com/googleapis/gax-go/v2 v2.14.0/go.mod h1:lhBCnjdLrWRaPvLWhmc8IS24m9mr07qSYnHncrgo+zk=
 github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
 github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
@@ -144,6 +193,7 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
 github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
+github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw=
 github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI=
 github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSzKKE=
@@ -159,8 +209,9 @@ github.com/sashabaranov/go-openai v1.32.5/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adO
 github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
-github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
@@ -170,8 +221,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
 github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
-github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
-github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/gjson v1.17.0 h1:/Jocvlh98kcTfpN2+JzGQWQcqrPQwDrVEMApx/M5ZwM=
 github.com/tidwall/gjson v1.17.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
@@ -187,6 +238,18 @@ github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2
 github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
 github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
+go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
+go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0/go.mod h1:B9yO6b04uB80CzjedvewuqDhxJxi11s7/GtiGa8bAjI=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8=
+go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw=
+go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8=
+go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc=
+go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8=
+go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4=
+go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ=
 go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
 go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
 go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI=
@@ -201,21 +264,39 @@ golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc=
-golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
+golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ=
+golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
-golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo=
-golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
+golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo=
+golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE=
+golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
+golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -224,22 +305,54 @@ golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
-golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s=
+golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
-golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
+golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4=
+golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg=
+golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
-google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/api v0.206.0 h1:A27GClesCSheW5P2BymVHjpEeQ2XHH8DI8Srs2HI2L8=
+google.golang.org/api v0.206.0/go.mod h1:BtB8bfjTYIrai3d8UyvPmV9REGgox7coh+ZRwm0b+W8=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
+google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
+google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
+google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E=
+google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA=
+google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
+google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
+google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
+google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
+google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
+google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
+google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA=
+google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
@@ -247,4 +360,6 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/internal/authenticator/authenticator.go b/internal/authenticator/authenticator.go
index 68959d3..bec413c 100644
--- a/internal/authenticator/authenticator.go
+++ b/internal/authenticator/authenticator.go
@@ -293,10 +293,6 @@ func (a *Authenticator) AuthenticateHttpRequest(req *http.Request) (*key.Respons
 						used.Setting["apikey"] = decryptedSecret
 					}
 				}
-
-				if err != nil {
-					fmt.Println(fmt.Printf("error when encrypting %v", err))
-				}
 			}
 		}
 
diff --git a/internal/config/config.go b/internal/config/config.go
index ed55462..7abedde 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -50,6 +50,7 @@ type Config struct {
 	EncryptionEndpoint            string        `koanf:"encryption_endpoint" env:"ENCRYPTION_ENDPOINT"`
 	DecryptionEndpoint            string        `koanf:"decryption_endpoint" env:"DECRYPTION_ENDPOINT"`
 	EncryptionTimeout             time.Duration `koanf:"encryption_timeout" env:"ENCRYPTION_TIMEOUT" envDefault:"5s"`
+	Audience                      string        `koanf:"audience" env:"AUDIENCE"`
 }
 
 func prepareDotEnv(envFilePath string) error {
diff --git a/internal/encryptor/encryptor.go b/internal/encryptor/encryptor.go
index 4a05e7f..7f53d8b 100644
--- a/internal/encryptor/encryptor.go
+++ b/internal/encryptor/encryptor.go
@@ -4,17 +4,18 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
-	"fmt"
 	"io"
 	"net/http"
 	"time"
+
+	"google.golang.org/api/idtoken"
 )
 
 type Encryptor struct {
 	decryptionURL string
 	encryptionURL string
 	enabled       bool
-	client        http.Client
+	client        *http.Client
 	timeout       time.Duration
 }
 
@@ -30,14 +31,22 @@ type DecryptionResponse struct {
 	DecryptedSecret string `json:"decryptedSecret"`
 }
 
-func NewEncryptor(decryptionURL string, encryptionURL string, enabled bool, timeout time.Duration) Encryptor {
+func NewEncryptor(decryptionURL string, encryptionURL string, enabled bool, timeout time.Duration, audience string) (Encryptor, error) {
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	client, err := idtoken.NewClient(ctx, audience)
+	if err != nil {
+		return Encryptor{}, err
+	}
+
 	return Encryptor{
 		decryptionURL: decryptionURL,
 		encryptionURL: encryptionURL,
-		client:        http.Client{},
 		enabled:       enabled,
 		timeout:       timeout,
-	}
+		client:        client,
+	}, nil
 }
 
 func (e Encryptor) Encrypt(input string, headers map[string]string) (string, error) {
@@ -111,8 +120,6 @@ func (e Encryptor) Decrypt(input string, headers map[string]string) (string, err
 		return "", err
 	}
 
-	fmt.Println(string(bytes))
-
 	decryptionSecret := DecryptionResponse{}
 	err = json.Unmarshal(bytes, &decryptionSecret)
 	if err != nil {

From 25627cce4c9a71cc339928bd814bc9c867f89e24 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Mon, 18 Nov 2024 09:32:00 -0800
Subject: [PATCH 40/51] fix bug

---
 internal/manager/provider_setting.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/manager/provider_setting.go b/internal/manager/provider_setting.go
index 8ed9bca..d02e699 100644
--- a/internal/manager/provider_setting.go
+++ b/internal/manager/provider_setting.go
@@ -203,7 +203,7 @@ func (m *ProviderSettingsManager) UpdateSetting(id string, setting *provider.Upd
 	}
 
 	if m.Encryptor.Enabled() {
-		params, err := m.EncryptParams(existing.UpdatedAt, existing.Provider, setting.Setting)
+		params, err := m.EncryptParams(setting.UpdatedAt, existing.Provider, setting.Setting)
 		if err != nil {
 			return nil, err
 		}

From 89021844048edc16d670b516cefcc885f3c326f1 Mon Sep 17 00:00:00 2001
From: Andrew Rothstein <andrew.rothstein@gmail.com>
Date: Tue, 5 Nov 2024 02:18:28 +0000
Subject: [PATCH 41/51] first swing. broken.

---
 kubernetes/helm-charts/bricksllm/.gitignore   |   1 +
 kubernetes/helm-charts/bricksllm/.helmignore  |  23 ++++
 kubernetes/helm-charts/bricksllm/Chart.lock   |   9 ++
 kubernetes/helm-charts/bricksllm/Chart.yaml   |  21 ++++
 .../helm-charts/bricksllm/templates/NOTES.txt |   0
 .../bricksllm/templates/_helpers.tpl          |  62 ++++++++++
 .../bricksllm/templates/deployment.yaml       |  77 ++++++++++++
 .../helm-charts/bricksllm/templates/hpa.yaml  |  32 +++++
 .../bricksllm/templates/ingress.yaml          |  43 +++++++
 .../bricksllm/templates/service.yaml          |  21 ++++
 .../bricksllm/templates/serviceaccount.yaml   |  13 ++
 .../templates/tests/test-connection.yaml      |  19 +++
 kubernetes/helm-charts/bricksllm/values.yaml  | 116 ++++++++++++++++++
 13 files changed, 437 insertions(+)
 create mode 100644 kubernetes/helm-charts/bricksllm/.gitignore
 create mode 100644 kubernetes/helm-charts/bricksllm/.helmignore
 create mode 100644 kubernetes/helm-charts/bricksllm/Chart.lock
 create mode 100644 kubernetes/helm-charts/bricksllm/Chart.yaml
 create mode 100644 kubernetes/helm-charts/bricksllm/templates/NOTES.txt
 create mode 100644 kubernetes/helm-charts/bricksllm/templates/_helpers.tpl
 create mode 100644 kubernetes/helm-charts/bricksllm/templates/deployment.yaml
 create mode 100644 kubernetes/helm-charts/bricksllm/templates/hpa.yaml
 create mode 100644 kubernetes/helm-charts/bricksllm/templates/ingress.yaml
 create mode 100644 kubernetes/helm-charts/bricksllm/templates/service.yaml
 create mode 100644 kubernetes/helm-charts/bricksllm/templates/serviceaccount.yaml
 create mode 100644 kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml
 create mode 100644 kubernetes/helm-charts/bricksllm/values.yaml

diff --git a/kubernetes/helm-charts/bricksllm/.gitignore b/kubernetes/helm-charts/bricksllm/.gitignore
new file mode 100644
index 0000000..948259a
--- /dev/null
+++ b/kubernetes/helm-charts/bricksllm/.gitignore
@@ -0,0 +1 @@
+charts/*.tgz
diff --git a/kubernetes/helm-charts/bricksllm/.helmignore b/kubernetes/helm-charts/bricksllm/.helmignore
new file mode 100644
index 0000000..0e8a0eb
--- /dev/null
+++ b/kubernetes/helm-charts/bricksllm/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/kubernetes/helm-charts/bricksllm/Chart.lock b/kubernetes/helm-charts/bricksllm/Chart.lock
new file mode 100644
index 0000000..322694a
--- /dev/null
+++ b/kubernetes/helm-charts/bricksllm/Chart.lock
@@ -0,0 +1,9 @@
+dependencies:
+- name: redis
+  repository: oci://registry-1.docker.io/bitnamicharts
+  version: 20.2.1
+- name: postgresql
+  repository: oci://registry-1.docker.io/bitnamicharts
+  version: 16.1.1
+digest: sha256:d380aeee84575489c7b48727ff37b9e47747e8c7e855655fc815455243421660
+generated: "2024-11-04T22:16:50.627919824Z"
diff --git a/kubernetes/helm-charts/bricksllm/Chart.yaml b/kubernetes/helm-charts/bricksllm/Chart.yaml
new file mode 100644
index 0000000..7b918a3
--- /dev/null
+++ b/kubernetes/helm-charts/bricksllm/Chart.yaml
@@ -0,0 +1,21 @@
+---
+apiVersion: v2
+name: bricksllm
+description: A Helm chart for deploying BricksLLM and its dependencies
+
+type: application
+
+version: 0.1.0
+
+# bricksllm version
+appVersion: "1.37.0"
+
+dependencies:
+  - condition: redis.enabled
+    name: redis
+    repository: oci://registry-1.docker.io/bitnamicharts
+    version: ~20
+  - conditions: postgresql.enabled
+    name: postgresql
+    repository: oci://registry-1.docker.io/bitnamicharts
+    version: ~16
diff --git a/kubernetes/helm-charts/bricksllm/templates/NOTES.txt b/kubernetes/helm-charts/bricksllm/templates/NOTES.txt
new file mode 100644
index 0000000..e69de29
diff --git a/kubernetes/helm-charts/bricksllm/templates/_helpers.tpl b/kubernetes/helm-charts/bricksllm/templates/_helpers.tpl
new file mode 100644
index 0000000..7a986ec
--- /dev/null
+++ b/kubernetes/helm-charts/bricksllm/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "bricksllm.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "bricksllm.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "bricksllm.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "bricksllm.labels" -}}
+helm.sh/chart: {{ include "bricksllm.chart" . }}
+{{ include "bricksllm.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "bricksllm.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "bricksllm.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "bricksllm.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "bricksllm.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/kubernetes/helm-charts/bricksllm/templates/deployment.yaml b/kubernetes/helm-charts/bricksllm/templates/deployment.yaml
new file mode 100644
index 0000000..4c2f7c6
--- /dev/null
+++ b/kubernetes/helm-charts/bricksllm/templates/deployment.yaml
@@ -0,0 +1,77 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "bricksllm.fullname" . }}
+  labels:
+    {{- include "bricksllm.labels" . | nindent 4 }}
+spec:
+  {{- if not .Values.autoscaling.enabled }}
+  replicas: {{ .Values.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "bricksllm.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "bricksllm.labels" . | nindent 8 }}
+        {{- with .Values.podLabels }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "bricksllm.serviceAccountName" . }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          ports:
+            - name: control
+              containerPort: {{ .Values.services.control.port }}
+              protocol: TCP
+            - name: data
+              containerPort: {{ .Values.services.data.port }}
+              protocol: TCP
+          {{- with .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.readinesProbe }}
+          readinessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.volumeMounts }}
+          volumeMounts:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+      {{- with .Values.volumes }}
+      volumes:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
diff --git a/kubernetes/helm-charts/bricksllm/templates/hpa.yaml b/kubernetes/helm-charts/bricksllm/templates/hpa.yaml
new file mode 100644
index 0000000..bd8bff1
--- /dev/null
+++ b/kubernetes/helm-charts/bricksllm/templates/hpa.yaml
@@ -0,0 +1,32 @@
+{{- if .Values.autoscaling.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "bricksllm.fullname" . }}
+  labels:
+    {{- include "bricksllm.labels" . | nindent 4 }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "bricksllm.fullname" . }}
+  minReplicas: {{ .Values.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  metrics:
+    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
+    {{- end }}
+    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    {{- end }}
+{{- end }}
diff --git a/kubernetes/helm-charts/bricksllm/templates/ingress.yaml b/kubernetes/helm-charts/bricksllm/templates/ingress.yaml
new file mode 100644
index 0000000..672c0e3
--- /dev/null
+++ b/kubernetes/helm-charts/bricksllm/templates/ingress.yaml
@@ -0,0 +1,43 @@
+{{- if .Values.ingress.enabled -}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "bricksllm.fullname" . }}
+  labels:
+    {{- include "bricksllm.labels" . | nindent 4 }}
+  {{- with .Values.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- with .Values.ingress.className }}
+  ingressClassName: {{ . }}
+  {{- end }}
+  {{- if .Values.ingress.tls }}
+  tls:
+    {{- range .Values.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
+  {{- end }}
+  rules:
+    {{- range .Values.ingress.hosts }}
+    - host: {{ .host | quote }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            {{- with .pathType }}
+            pathType: {{ . }}
+            {{- end }}
+            backend:
+              service:
+                name: {{ include "bricksllm.fullname" $ }}
+                port:
+                  number: {{ $.Values.service.port }}
+          {{- end }}
+    {{- end }}
+{{- end }}
diff --git a/kubernetes/helm-charts/bricksllm/templates/service.yaml b/kubernetes/helm-charts/bricksllm/templates/service.yaml
new file mode 100644
index 0000000..94606e3
--- /dev/null
+++ b/kubernetes/helm-charts/bricksllm/templates/service.yaml
@@ -0,0 +1,21 @@
+{{- $fullName := include "bricksllm.fullname" . }}
+{{- $labels := include "bricksllm.labels" . }}
+{{- $selectorLabels := include "bricksllm.selectorLabels" . }}
+{{- range $name, $v := .Values.services -}}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ $fullName }}-{{ $name }}
+  labels:
+    {{- $labels | nindent 4 }}
+spec:
+  type: {{ $v.type }}
+  ports:
+    - port: {{ $v.port }}
+      targetPort: http
+      protocol: TCP
+      name: {{ $name | quote }}
+  selector:
+    {{- $selectorLabels | nindent 4 }}
+{{- end }}
diff --git a/kubernetes/helm-charts/bricksllm/templates/serviceaccount.yaml b/kubernetes/helm-charts/bricksllm/templates/serviceaccount.yaml
new file mode 100644
index 0000000..453e1e0
--- /dev/null
+++ b/kubernetes/helm-charts/bricksllm/templates/serviceaccount.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.serviceAccount.create -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "bricksllm.serviceAccountName" . }}
+  labels:
+    {{- include "bricksllm.labels" . | nindent 4 }}
+  {{- with .Values.serviceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
+{{- end }}
diff --git a/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml b/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml
new file mode 100644
index 0000000..ab6f4e2
--- /dev/null
+++ b/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml
@@ -0,0 +1,19 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "bricksllm.fullname" . }}-test-connection"
+  labels:
+    {{- include "bricksllm.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+spec:
+  containers:
+    - name: wget
+      image: busybox
+      command: ['wget']
+      args: ['{{ include "bricksllm.fullname" . }}-control:{{ .Values.services.control.port }}']
+    - name: wget
+      image: busybox
+      command: ['wget']
+      args: ['{{ include "bricksllm.fullname" . }}-data:{{ .Values.services.data.port }}']
+  restartPolicy: Never
diff --git a/kubernetes/helm-charts/bricksllm/values.yaml b/kubernetes/helm-charts/bricksllm/values.yaml
new file mode 100644
index 0000000..a4a8289
--- /dev/null
+++ b/kubernetes/helm-charts/bricksllm/values.yaml
@@ -0,0 +1,116 @@
+---
+# replica count for the bricksllm Deployment
+replicaCount: 1
+
+# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
+image:
+  repository: luyuanxin1995/bricksllm
+  # This sets the pull policy for images.
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: ""
+
+# for private registries hosting the imagery
+imagePullSecrets: []
+
+# This is to override the chart name.
+nameOverride: ""
+fullnameOverride: ""
+
+# establish a dedicated service account
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # Automatically mount a ServiceAccount's API credentials?
+  automount: true
+  # Annotations to add to the service account
+  annotations: {}
+  # The name of the service account to use.
+  # If not set and create is true, a name is generated using the fullname template
+  name: ""
+
+# additional pod annotations
+podAnnotations: {}
+
+# additional pod labels
+podLabels: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext: {}
+  # capabilities:
+  #   drop:
+  #   - ALL
+  # readOnlyRootFilesystem: true
+  # runAsNonRoot: true
+  # runAsUser: 1000
+
+services:
+  control:
+    type: ClusterIP
+    port: 8001
+  data:
+    type: ClusterIP
+    port: 8002
+
+# Configuring Ingress
+ingress:
+  enabled: false
+  className: ""
+  annotations: {}
+    # kubernetes.io/ingress.class: nginx
+    # kubernetes.io/tls-acme: "true"
+  hosts:
+    - host: chart-example.local
+      paths:
+        - path: /
+          pathType: ImplementationSpecific
+  tls: []
+  #  - secretName: chart-example-tls
+  #    hosts:
+  #      - chart-example.local
+
+#resources:
+#  limits:
+#    cpu: 100m
+#    memory: 128Mi
+#  requests:
+#    cpu: 100m
+#    memory: 128Mi
+
+#livenessProbe:
+#  httpGet:
+#    path: /
+#    port: http
+
+#readinessProbe:
+#  httpGet:
+#    path: /
+#    port: http
+
+autoscaling:
+  enabled: false
+  minReplicas: 1
+  maxReplicas: 100
+  targetCPUUtilizationPercentage: 80
+  # targetMemoryUtilizationPercentage: 80
+
+# Additional volumes for the Deployments
+#volumes:
+#  - name: foo
+#    secret:
+#      secretName: mysecret
+#      optional: false
+
+# Additional volumeMounts for the deployments
+#volumeMounts:
+#  - name: foo
+#    mountPath: "/etc/foo"
+#    readOnly: true
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}

From ae55b11097aed9a9d81d7c31a348643e99f6813e Mon Sep 17 00:00:00 2001
From: Andrew Rothstein <andrew.rothstein@gmail.com>
Date: Wed, 6 Nov 2024 03:15:39 +0000
Subject: [PATCH 42/51] gussy up the services with named ports and named
 ingresses

---
 kubernetes/helm-charts/Taskfile.yml           | 12 ++++
 .../bricksllm/templates/deployment.yaml       | 30 ++++++--
 .../bricksllm/templates/ingress.yaml          | 29 +++++---
 .../templates/tests/test-connection.yaml      | 16 +++--
 kubernetes/helm-charts/bricksllm/values.yaml  | 72 ++++++++++++-------
 5 files changed, 109 insertions(+), 50 deletions(-)
 create mode 100644 kubernetes/helm-charts/Taskfile.yml

diff --git a/kubernetes/helm-charts/Taskfile.yml b/kubernetes/helm-charts/Taskfile.yml
new file mode 100644
index 0000000..d50108a
--- /dev/null
+++ b/kubernetes/helm-charts/Taskfile.yml
@@ -0,0 +1,12 @@
+---
+version: '3'
+tasks:
+  default:
+    cmds:
+      - |
+        helm upgrade \
+          --create-namespace \
+          -n bricksllm \
+          --install \
+          bricksllm \
+          ./bricksllm
diff --git a/kubernetes/helm-charts/bricksllm/templates/deployment.yaml b/kubernetes/helm-charts/bricksllm/templates/deployment.yaml
index 4c2f7c6..8a97665 100644
--- a/kubernetes/helm-charts/bricksllm/templates/deployment.yaml
+++ b/kubernetes/helm-charts/bricksllm/templates/deployment.yaml
@@ -1,7 +1,9 @@
+{{ $fullname := include "bricksllm.fullname" . -}}
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: {{ include "bricksllm.fullname" . }}
+  name: {{ $fullname }}
   labels:
     {{- include "bricksllm.labels" . | nindent 4 }}
 spec:
@@ -35,14 +37,30 @@ spec:
           securityContext:
             {{- toYaml .Values.securityContext | nindent 12 }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          env:
+            - name: POSTGRESQL_HOSTS
+              value: '{{ $fullname }}-postgresql'
+            - name: POSTGRESQL_USERNAME
+              value: postgres
+            - name: POSTGRESQL_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: '{{ $fullname }}-postgresql'
+                  key: postgres-password
+            - name: REDIS_HOSTS
+              value: '{{ $fullname }}-redis-master'
+            - name: REDIS_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: '{{ $fullname }}-redis'
+                  key: redis-password
           imagePullPolicy: {{ .Values.image.pullPolicy }}
           ports:
-            - name: control
-              containerPort: {{ .Values.services.control.port }}
-              protocol: TCP
-            - name: data
-              containerPort: {{ .Values.services.data.port }}
+            {{- range $name, $v := .Values.services }}
+            - name: {{ $name }}
+              containerPort: {{ $v.port }}
               protocol: TCP
+            {{- end }}
           {{- with .Values.livenessProbe }}
           livenessProbe:
             {{- toYaml . | nindent 12 }}
diff --git a/kubernetes/helm-charts/bricksllm/templates/ingress.yaml b/kubernetes/helm-charts/bricksllm/templates/ingress.yaml
index 672c0e3..c3039f7 100644
--- a/kubernetes/helm-charts/bricksllm/templates/ingress.yaml
+++ b/kubernetes/helm-charts/bricksllm/templates/ingress.yaml
@@ -1,30 +1,36 @@
-{{- if .Values.ingress.enabled -}}
+{{- $fullname := include "bricksllm.fullname" . }}
+{{- $labels := include "bricksllm.labels" . }}
+{{- range $name, $v := .Values.ingresses }}
+{{- if $v.enabled }}
+---
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
-  name: {{ include "bricksllm.fullname" . }}
+  name: {{ $fullname }}-{{ $name }}
   labels:
-    {{- include "bricksllm.labels" . | nindent 4 }}
-  {{- with .Values.ingress.annotations }}
+    {{- $labels | nindent 4 }}
+  {{- with $v.annotations }}
   annotations:
     {{- toYaml . | nindent 4 }}
   {{- end }}
 spec:
-  {{- with .Values.ingress.className }}
+  {{- with $v.className }}
   ingressClassName: {{ . }}
   {{- end }}
-  {{- if .Values.ingress.tls }}
+  {{- if $v.tls }}
   tls:
-    {{- range .Values.ingress.tls }}
+    {{- range $v.tls }}
     - hosts:
         {{- range .hosts }}
         - {{ . | quote }}
         {{- end }}
-      secretName: {{ .secretName }}
+      {{- with .secretName }}
+      secretName: {{ . }}
+      {{- end }}
     {{- end }}
   {{- end }}
   rules:
-    {{- range .Values.ingress.hosts }}
+    {{- range $v.hosts }}
     - host: {{ .host | quote }}
       http:
         paths:
@@ -35,9 +41,10 @@ spec:
             {{- end }}
             backend:
               service:
-                name: {{ include "bricksllm.fullname" $ }}
+                name: {{ $fullname }}-{{ $name }}
                 port:
-                  number: {{ $.Values.service.port }}
+                  number: {{ get (get $.Values.services $name) "port" }}
           {{- end }}
     {{- end }}
 {{- end }}
+{{- end }}
diff --git a/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml b/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml
index ab6f4e2..db01831 100644
--- a/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml
+++ b/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml
@@ -1,19 +1,21 @@
+{{ $fullname := include "bricksllm.fullname" . -}}
+---
 apiVersion: v1
 kind: Pod
 metadata:
-  name: "{{ include "bricksllm.fullname" . }}-test-connection"
+  name: "{{ $fullname }}-test-connection"
   labels:
     {{- include "bricksllm.labels" . | nindent 4 }}
   annotations:
     "helm.sh/hook": test
 spec:
   containers:
+{{- range $name, $v := .Values.services }}
     - name: wget
       image: busybox
-      command: ['wget']
-      args: ['{{ include "bricksllm.fullname" . }}-control:{{ .Values.services.control.port }}']
-    - name: wget
-      image: busybox
-      command: ['wget']
-      args: ['{{ include "bricksllm.fullname" . }}-data:{{ .Values.services.data.port }}']
+      command:
+        - wget
+      args:
+        - '{{ $fullname }}-{{ $name }}:{{ $v.port }}'
+{{- end }}
   restartPolicy: Never
diff --git a/kubernetes/helm-charts/bricksllm/values.yaml b/kubernetes/helm-charts/bricksllm/values.yaml
index a4a8289..c080514 100644
--- a/kubernetes/helm-charts/bricksllm/values.yaml
+++ b/kubernetes/helm-charts/bricksllm/values.yaml
@@ -47,29 +47,49 @@ securityContext: {}
   # runAsUser: 1000
 
 services:
-  control:
+  admin:
     type: ClusterIP
     port: 8001
-  data:
+  proxy:
     type: ClusterIP
     port: 8002
 
-# Configuring Ingress
-ingress:
-  enabled: false
-  className: ""
-  annotations: {}
-    # kubernetes.io/ingress.class: nginx
-    # kubernetes.io/tls-acme: "true"
-  hosts:
-    - host: chart-example.local
-      paths:
-        - path: /
-          pathType: ImplementationSpecific
-  tls: []
-  #  - secretName: chart-example-tls
-  #    hosts:
-  #      - chart-example.local
+# Configuring Ingresses
+ingresses:
+  admin:
+    enabled: true
+    className: tailscale
+    #annotations:
+    #  kubernetes.io/ingress.class: nginx
+    #  kubernetes.io/tls-acme: "true"
+    hosts:
+      - host: bricksllm-admin.elephant-frog.ts.net
+        paths:
+          - path: /
+            pathType: ImplementationSpecific
+    tls:
+      - hosts:
+          - bricksllm-admin.elephant-frog.ts.net
+    #  - secretName: chart-example-tls
+    #    hosts:
+    #      - chart-example.local
+  proxy:
+    enabled: true
+    className: tailscale
+    #annotations:
+    #  kubernetes.io/ingress.class: nginx
+    #  kubernetes.io/tls-acme: "true"
+    hosts:
+      - host: bricksllm-proxy.elephant-frog.ts.net
+        paths:
+          - path: /
+            pathType: ImplementationSpecific
+    tls:
+      - hosts:
+          - bricksllm-proxy.elephant-frog.ts.net
+    #  - secretName: chart-example-tls
+    #    hosts:
+    #      - chart-example.local
 
 #resources:
 #  limits:
@@ -79,15 +99,15 @@ ingress:
 #    cpu: 100m
 #    memory: 128Mi
 
-#livenessProbe:
-#  httpGet:
-#    path: /
-#    port: http
+livenessProbe:
+  httpGet:
+    path: /api/health
+    port: proxy
 
-#readinessProbe:
-#  httpGet:
-#    path: /
-#    port: http
+readinessProbe:
+  httpGet:
+    path: /api/health
+    port: proxy
 
 autoscaling:
   enabled: false

From 6807c1bdddb3c0ec4c264c54965781bb0a12796c Mon Sep 17 00:00:00 2001
From: Andrew Rothstein <andrew.rothstein@gmail.com>
Date: Wed, 6 Nov 2024 03:51:01 +0000
Subject: [PATCH 43/51] thats one service with multiple ports tho multiple
 ingresses

---
 kubernetes/helm-charts/Taskfile.yml              |  4 ++++
 .../bricksllm/templates/deployment.yaml          |  6 +++---
 .../helm-charts/bricksllm/templates/ingress.yaml |  4 ++--
 .../helm-charts/bricksllm/templates/service.yaml | 16 ++++++++--------
 .../templates/tests/test-connection.yaml         |  4 ++--
 kubernetes/helm-charts/bricksllm/values.yaml     | 10 ++++------
 6 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/kubernetes/helm-charts/Taskfile.yml b/kubernetes/helm-charts/Taskfile.yml
index d50108a..33821a8 100644
--- a/kubernetes/helm-charts/Taskfile.yml
+++ b/kubernetes/helm-charts/Taskfile.yml
@@ -10,3 +10,7 @@ tasks:
           --install \
           bricksllm \
           ./bricksllm
+  delete:
+    cmds:
+      - helm delete -n bricksllm bricksllm
+
diff --git a/kubernetes/helm-charts/bricksllm/templates/deployment.yaml b/kubernetes/helm-charts/bricksllm/templates/deployment.yaml
index 8a97665..f015f69 100644
--- a/kubernetes/helm-charts/bricksllm/templates/deployment.yaml
+++ b/kubernetes/helm-charts/bricksllm/templates/deployment.yaml
@@ -56,9 +56,9 @@ spec:
                   key: redis-password
           imagePullPolicy: {{ .Values.image.pullPolicy }}
           ports:
-            {{- range $name, $v := .Values.services }}
-            - name: {{ $name }}
-              containerPort: {{ $v.port }}
+            {{- range $n, $p := .Values.services.ports }}
+            - name: {{ $n }}
+              containerPort: {{ $p }}
               protocol: TCP
             {{- end }}
           {{- with .Values.livenessProbe }}
diff --git a/kubernetes/helm-charts/bricksllm/templates/ingress.yaml b/kubernetes/helm-charts/bricksllm/templates/ingress.yaml
index c3039f7..fcbb9dc 100644
--- a/kubernetes/helm-charts/bricksllm/templates/ingress.yaml
+++ b/kubernetes/helm-charts/bricksllm/templates/ingress.yaml
@@ -41,9 +41,9 @@ spec:
             {{- end }}
             backend:
               service:
-                name: {{ $fullname }}-{{ $name }}
+                name: {{ $fullname }}
                 port:
-                  number: {{ get (get $.Values.services $name) "port" }}
+                  name: {{ $name | quote }}
           {{- end }}
     {{- end }}
 {{- end }}
diff --git a/kubernetes/helm-charts/bricksllm/templates/service.yaml b/kubernetes/helm-charts/bricksllm/templates/service.yaml
index 94606e3..f8441d1 100644
--- a/kubernetes/helm-charts/bricksllm/templates/service.yaml
+++ b/kubernetes/helm-charts/bricksllm/templates/service.yaml
@@ -1,21 +1,21 @@
-{{- $fullName := include "bricksllm.fullname" . }}
+{{- $fullname := include "bricksllm.fullname" . }}
 {{- $labels := include "bricksllm.labels" . }}
 {{- $selectorLabels := include "bricksllm.selectorLabels" . }}
-{{- range $name, $v := .Values.services -}}
 ---
 apiVersion: v1
 kind: Service
 metadata:
-  name: {{ $fullName }}-{{ $name }}
+  name: {{ $fullname }}
   labels:
     {{- $labels | nindent 4 }}
 spec:
-  type: {{ $v.type }}
+  type: {{ .Values.services.type }}
   ports:
-    - port: {{ $v.port }}
-      targetPort: http
+{{- range $n, $p := .Values.services.ports }}
+    - port: {{ $p }}
+      targetPort: {{ $p }}
       protocol: TCP
-      name: {{ $name | quote }}
+      name: {{ $n | quote }}
+{{- end }}
   selector:
     {{- $selectorLabels | nindent 4 }}
-{{- end }}
diff --git a/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml b/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml
index db01831..32b4f42 100644
--- a/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml
+++ b/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml
@@ -10,12 +10,12 @@ metadata:
     "helm.sh/hook": test
 spec:
   containers:
-{{- range $name, $v := .Values.services }}
+{{- range $n, $p := .Values.services.ports }}
     - name: wget
       image: busybox
       command:
         - wget
       args:
-        - '{{ $fullname }}-{{ $name }}:{{ $v.port }}'
+        - '{{ $fullname }}:{{ $p }}'
 {{- end }}
   restartPolicy: Never
diff --git a/kubernetes/helm-charts/bricksllm/values.yaml b/kubernetes/helm-charts/bricksllm/values.yaml
index c080514..eca72c4 100644
--- a/kubernetes/helm-charts/bricksllm/values.yaml
+++ b/kubernetes/helm-charts/bricksllm/values.yaml
@@ -47,12 +47,10 @@ securityContext: {}
   # runAsUser: 1000
 
 services:
-  admin:
-    type: ClusterIP
-    port: 8001
-  proxy:
-    type: ClusterIP
-    port: 8002
+  type: ClusterIP
+  ports:
+    admin: 8001
+    proxy: 8002
 
 # Configuring Ingresses
 ingresses:

From 0bbcd747603f46862bf18e82a2818aa61b1331eb Mon Sep 17 00:00:00 2001
From: Andrew Rothstein <andrew.rothstein@gmail.com>
Date: Wed, 27 Nov 2024 14:45:17 +0000
Subject: [PATCH 44/51] default values

---
 kubernetes/helm-charts/bricksllm/values.yaml | 46 +++++++++-----------
 1 file changed, 20 insertions(+), 26 deletions(-)

diff --git a/kubernetes/helm-charts/bricksllm/values.yaml b/kubernetes/helm-charts/bricksllm/values.yaml
index eca72c4..72bc6ea 100644
--- a/kubernetes/helm-charts/bricksllm/values.yaml
+++ b/kubernetes/helm-charts/bricksllm/values.yaml
@@ -55,39 +55,33 @@ services:
 # Configuring Ingresses
 ingresses:
   admin:
-    enabled: true
-    className: tailscale
+    enabled: false
+    #className: "tailscale"
     #annotations:
     #  kubernetes.io/ingress.class: nginx
     #  kubernetes.io/tls-acme: "true"
-    hosts:
-      - host: bricksllm-admin.elephant-frog.ts.net
-        paths:
-          - path: /
-            pathType: ImplementationSpecific
-    tls:
-      - hosts:
-          - bricksllm-admin.elephant-frog.ts.net
-    #  - secretName: chart-example-tls
-    #    hosts:
-    #      - chart-example.local
+    #hosts:
+    #  - host: bricksllm-admin.elephant-frog.ts.net
+    #    paths:
+    #      - path: /
+    #        pathType: ImplementationSpecific
+    #tls:
+    #  - hosts:
+    #      - bricksllm-admin.elephant-frog.ts.net
   proxy:
-    enabled: true
-    className: tailscale
+    enabled: false
+    #className: tailscale
     #annotations:
     #  kubernetes.io/ingress.class: nginx
     #  kubernetes.io/tls-acme: "true"
-    hosts:
-      - host: bricksllm-proxy.elephant-frog.ts.net
-        paths:
-          - path: /
-            pathType: ImplementationSpecific
-    tls:
-      - hosts:
-          - bricksllm-proxy.elephant-frog.ts.net
-    #  - secretName: chart-example-tls
-    #    hosts:
-    #      - chart-example.local
+    #hosts:
+    #  - host: bricksllm-proxy.elephant-frog.ts.net
+    #    paths:
+    #      - path: /
+    #        pathType: ImplementationSpecific
+    #tls:
+    #  - hosts:
+    #      - bricksllm-proxy.elephant-frog.ts.net
 
 #resources:
 #  limits:

From d88f94b5549700b994cd9146cf0f60159b6ec596 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Thu, 26 Dec 2024 22:23:12 -0800
Subject: [PATCH 45/51] add support for amazon bedrock model

---
 internal/provider/anthropic/cost.go | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/internal/provider/anthropic/cost.go b/internal/provider/anthropic/cost.go
index 4ec546b..6785fea 100644
--- a/internal/provider/anthropic/cost.go
+++ b/internal/provider/anthropic/cost.go
@@ -94,13 +94,28 @@ func selectModel(model string) string {
 	return ""
 }
 
+func convertAmazonModelToAnthropicModel(model string) string {
+	parts := strings.Split(model, ".")
+	if len(parts) < 3 {
+		return model
+	}
+
+	return selectModel(parts[2])
+}
+
 func (ce *CostEstimator) EstimateCompletionCost(model string, tks int) (float64, error) {
 	costMap, ok := ce.tokenCostMap["completion"]
 	if !ok {
 		return 0, errors.New("prompt token cost is not provided")
 	}
 
-	selected := selectModel(model)
+	selected := ""
+	if strings.HasPrefix(model, "us") {
+		selected = convertAmazonModelToAnthropicModel(model)
+	} else {
+		selected = selectModel(model)
+	}
+
 	cost, ok := costMap[selected]
 	if !ok {
 		return 0, errors.New("model is not present in the cost map provided")

From 621578881bb34c0d2ebd6deda49f8a32a1fe8aec Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Fri, 27 Dec 2024 08:04:16 -0800
Subject: [PATCH 46/51] add cost tracking for o1

---
 internal/provider/openai/cost.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go
index dfa9040..641a577 100644
--- a/internal/provider/openai/cost.go
+++ b/internal/provider/openai/cost.go
@@ -34,6 +34,8 @@ func parseFinetuneModel(model string) string {
 
 var OpenAiPerThousandTokenCost = map[string]map[string]float64{
 	"prompt": {
+		"o1":                          0.015,
+		"o1-2024-12-17":               0.015,
 		"o1-preview":                  0.015,
 		"o1-preview-2024-09-12":       0.015,
 		"gpt-4o":                      0.0025,
@@ -98,6 +100,8 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
 		"tts-1-hd":  0.03,
 	},
 	"completion": {
+		"o1":                          0.06,
+		"o1-2024-12-17":               0.06,
 		"o1-preview":                  0.06,
 		"o1-preview-2024-09-12":       0.06,
 		"gpt-3.5-turbo-1106":          0.002,

From 2a8a1dfd84386f6f43577051aa04198a1c047fdc Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Wed, 1 Jan 2025 19:52:05 -0800
Subject: [PATCH 47/51] add pushing to aws

---
 .github/workflows/release.yml | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 5884f46..46ef2b7 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -115,3 +115,34 @@ jobs:
           platforms: linux/amd64,linux/arm64
           push: true
           tags: ${{ steps.meta-datadog.outputs.tags }}
+
+  aws:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Install Go
+        uses: actions/setup-go@v3
+        with:
+          go-version: 1.21.x
+          check-latest: true
+
+      - name: Check Out Repo
+        uses: actions/checkout@v3
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-region: ${{ secrets.AWS_DEFAULT_REGION }}
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Build and push
+        uses: mr-smithers-excellent/docker-build-push@v6
+        with:
+          image: bricksllm
+          tags: latest
+          dockerfile: Dockerfile.aws.datadog
+          registry: ${{ steps.login-ecr.outputs.registry }}
\ No newline at end of file

From 34ebf3401808fe6580fbc843a07999076918d316 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Wed, 1 Jan 2025 20:03:37 -0800
Subject: [PATCH 48/51] update workflow

---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 46ef2b7..f7424c7 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -144,5 +144,5 @@ jobs:
         with:
           image: bricksllm
           tags: latest
-          dockerfile: Dockerfile.aws.datadog
+          dockerfile: Dockerfile.datadog
           registry: ${{ steps.login-ecr.outputs.registry }}
\ No newline at end of file

From 5fb4f931c9a91a0f43fe3c2dd5ecdba11db8b5ee Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sun, 5 Jan 2025 15:16:10 -0800
Subject: [PATCH 49/51] fix bug

---
 internal/provider/anthropic/cost.go | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/internal/provider/anthropic/cost.go b/internal/provider/anthropic/cost.go
index 6785fea..36680ca 100644
--- a/internal/provider/anthropic/cost.go
+++ b/internal/provider/anthropic/cost.go
@@ -64,7 +64,13 @@ func (ce *CostEstimator) EstimatePromptCost(model string, tks int) (float64, err
 
 	}
 
-	selected := selectModel(model)
+	selected := ""
+	if strings.HasPrefix(model, "us") {
+		selected = convertAmazonModelToAnthropicModel(model)
+	} else {
+		selected = selectModel(model)
+	}
+
 	cost, ok := costMap[selected]
 	if !ok {
 		return 0, fmt.Errorf("%s is not present in the cost map provided", model)

From c7d80a62c2de95733c1e6c95a4eaedc50afaad75 Mon Sep 17 00:00:00 2001
From: Spike Lu <luyuanxin1995@gmail.com>
Date: Sun, 5 Jan 2025 15:47:30 -0800
Subject: [PATCH 50/51] update encryptor initialization logic

---
 internal/encryptor/encryptor.go | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/internal/encryptor/encryptor.go b/internal/encryptor/encryptor.go
index 7f53d8b..c576b2b 100644
--- a/internal/encryptor/encryptor.go
+++ b/internal/encryptor/encryptor.go
@@ -35,6 +35,16 @@ func NewEncryptor(decryptionURL string, encryptionURL string, enabled bool, time
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
+	if len(audience) == 0 {
+		return Encryptor{
+			decryptionURL: decryptionURL,
+			encryptionURL: encryptionURL,
+			enabled:       enabled,
+			timeout:       timeout,
+			client:        &http.Client{},
+		}, nil
+	}
+
 	client, err := idtoken.NewClient(ctx, audience)
 	if err != nil {
 		return Encryptor{}, err

From 80bb56b8ca23f8fd8d820c84298b0289c2df766b Mon Sep 17 00:00:00 2001
From: Sergei Bronnikov <48258384+sergei-bronnikov@users.noreply.github.com>
Date: Thu, 6 Mar 2025 11:10:24 +0000
Subject: [PATCH 51/51] wip (#8)

---
 cmd/bricksllm/main.go                   |  16 +++-
 internal/authenticator/authenticator.go |  39 ++++++++-
 internal/errors/expiration_err.go       |   5 +-
 internal/event/key_reporting.go         |  21 +++--
 internal/key/key.go                     |  11 +++
 internal/manager/key.go                 |  34 +++++---
 internal/manager/provider_setting.go    |  71 +++++++++++++++--
 internal/manager/reporting.go           |  10 +--
 internal/message/consumer.go            |   1 +
 internal/message/handler.go             |   6 ++
 internal/provider/xcustom/xcustom.go    | 101 ++++++++++++++++++++++++
 internal/recorder/recorder.go           |  32 +++++---
 internal/server/web/proxy/middleware.go |   5 +-
 internal/server/web/proxy/proxy.go      |   3 +
 internal/server/web/proxy/x_custom.go   |  74 +++++++++++++++++
 internal/storage/postgresql/event.go    |  40 ++++++----
 internal/storage/postgresql/key.go      |  26 ++++--
 internal/validator/validator.go         |  40 ++++++++--
 18 files changed, 454 insertions(+), 81 deletions(-)
 create mode 100644 internal/provider/xcustom/xcustom.go
 create mode 100644 internal/server/web/proxy/x_custom.go

diff --git a/cmd/bricksllm/main.go b/cmd/bricksllm/main.go
index 388e4e9..0bdd4b3 100644
--- a/cmd/bricksllm/main.go
+++ b/cmd/bricksllm/main.go
@@ -272,6 +272,14 @@ func main() {
 		log.Sugar().Fatalf("error connecting to keys redis storage: %v", err)
 	}
 
+	requestsLimitRedisStorage := redis.NewClient(defaultRedisOption(cfg, 11))
+
+	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+	if err := requestsLimitRedisStorage.Ping(ctx).Err(); err != nil {
+		log.Sugar().Fatalf("error connecting to requests limit redis storage: %v", err)
+	}
+
 	rateLimitCache := redisStorage.NewCache(rateLimitRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
 	costLimitCache := redisStorage.NewCache(costLimitRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
 	costStorage := redisStorage.NewStore(costRedisStorage, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
@@ -285,15 +293,15 @@ func main() {
 
 	psCache := redisStorage.NewProviderSettingsCache(providerSettingsRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
 	keysCache := redisStorage.NewKeysCache(keysRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
+	requestsLimitStorage := redisStorage.NewStore(requestsLimitRedisStorage, cfg.RedisWriteTimeout, cfg.RedisReadTimeout)
 
 	encryptor, err := encryptor.NewEncryptor(cfg.DecryptionEndpoint, cfg.EncryptionEndpoint, cfg.EnableEncrytion, cfg.EncryptionTimeout, cfg.Audience)
 	if cfg.EnableEncrytion && err != nil {
 		log.Sugar().Fatalf("error creating encryption client: %v", err)
 	}
-	v := validator.NewValidator(costLimitCache, rateLimitCache, costStorage)
-
+	v := validator.NewValidator(costLimitCache, rateLimitCache, costStorage, requestsLimitStorage)
 
-	m := manager.NewManager(store, costLimitCache, rateLimitCache, accessCache, keysCache)
+	m := manager.NewManager(store, costLimitCache, rateLimitCache, accessCache, keysCache, requestsLimitStorage)
 	krm := manager.NewReportingManager(costStorage, store, store, v)
 	psm := manager.NewProviderSettingsManager(store, psCache, encryptor)
 	cpm := manager.NewCustomProvidersManager(store, cpMemStore)
@@ -330,7 +338,7 @@ func main() {
 
 	uv := validator.NewUserValidator(userCostLimitCache, userRateLimitCache, userCostStorage)
 
-	rec := recorder.NewRecorder(costStorage, userCostStorage, costLimitCache, userCostLimitCache, ce, store)
+	rec := recorder.NewRecorder(costStorage, userCostStorage, costLimitCache, userCostLimitCache, ce, store, requestsLimitStorage)
 	rlm := manager.NewRateLimitManager(rateLimitCache, userRateLimitCache)
 	a := auth.NewAuthenticator(psm, m, rm, store, encryptor)
 
diff --git a/internal/authenticator/authenticator.go b/internal/authenticator/authenticator.go
index bec413c..638a9ee 100644
--- a/internal/authenticator/authenticator.go
+++ b/internal/authenticator/authenticator.go
@@ -3,6 +3,7 @@ package auth
 import (
 	"errors"
 	"fmt"
+	"github.com/bricks-cloud/bricksllm/internal/provider/xcustom"
 	"math/rand"
 	"net/http"
 	"strconv"
@@ -204,8 +205,20 @@ func anonymize(input string) string {
 	return string(input[0:5]) + "**********************************************"
 }
 
-func (a *Authenticator) AuthenticateHttpRequest(req *http.Request) (*key.ResponseKey, []*provider.Setting, error) {
-	raw, err := getApiKey(req)
+func (a *Authenticator) AuthenticateHttpRequest(req *http.Request, xCustomProviderId string) (*key.ResponseKey, []*provider.Setting, error) {
+	var raw string
+	var err error
+	var settings []*provider.Setting
+	if xcustom.IsXCustomRequest(req) {
+		providerSetting, er := a.psm.GetSettingViaCache(xCustomProviderId)
+		if er != nil {
+			return nil, nil, er
+		}
+		settings = []*provider.Setting{providerSetting}
+		raw, err = xcustom.ExtractApiKey(req, providerSetting)
+	} else {
+		raw, err = getApiKey(req)
+	}
 	if err != nil {
 		return nil, nil, err
 	}
@@ -238,6 +251,28 @@ func (a *Authenticator) AuthenticateHttpRequest(req *http.Request) (*key.Respons
 		return nil, nil, internal_errors.NewAuthError(fmt.Sprintf("key %s has been revoked", anonymize(raw)))
 	}
 
+	if xcustom.IsXCustomRequest(req) {
+		pSetting := settings[0]
+		authString := strings.Replace(
+			pSetting.GetParam(xcustom.XCustomSettingFields.AuthMask),
+			"{{apikey}}",
+			pSetting.GetParam(xcustom.XCustomSettingFields.ApiKey), -1,
+		)
+		location := xcustom.GetAuthLocation(pSetting.GetParam(xcustom.XCustomSettingFields.AuthLocation))
+		target := pSetting.GetParam(xcustom.XCustomSettingFields.AuthTarget)
+		switch location {
+		case xcustom.AuthLocations.Query:
+			params := req.URL.Query()
+			params.Set(target, authString)
+			req.URL.RawQuery = params.Encode()
+		case xcustom.AuthLocations.Header:
+			req.Header.Set(target, authString)
+		default:
+			return nil, nil, errors.New("invalid xCustomAuth location")
+		}
+		return key, settings, nil
+	}
+
 	if strings.HasPrefix(req.URL.Path, "/api/routes") {
 		err = a.canKeyAccessCustomRoute(req.URL.Path, key.KeyId)
 		if err != nil {
diff --git a/internal/errors/expiration_err.go b/internal/errors/expiration_err.go
index cce8380..59aec34 100644
--- a/internal/errors/expiration_err.go
+++ b/internal/errors/expiration_err.go
@@ -1,8 +1,9 @@
 package errors
 
 const (
-	TtlExpiration       string = "ttl"
-	CostLimitExpiration string = "cost-limit"
+	TtlExpiration           string = "ttl"
+	CostLimitExpiration     string = "cost-limit"
+	RequestsLimitExpiration string = "requests-limit"
 )
 
 type ExpirationError struct {
diff --git a/internal/event/key_reporting.go b/internal/event/key_reporting.go
index 9550861..6ebf3f4 100644
--- a/internal/event/key_reporting.go
+++ b/internal/event/key_reporting.go
@@ -29,11 +29,13 @@ type KeyRingReportingRequest struct {
 	Limit   int      `json:"limit"`
 	Offset  int      `json:"offset"`
 	Revoked *bool    `json:"revoked"`
+	TopBy   string   `json:"topBy"`
 }
 
 type KeyRingDataPoint struct {
 	KeyRing   string  `json:"keyRing"`
 	CostInUsd float64 `json:"costInUsd"`
+	Requests  int     `json:"requests"`
 }
 
 type KeyRingReportingResponse struct {
@@ -47,8 +49,13 @@ type SpentKeyReportingRequest struct {
 	Offset int      `json:"offset"`
 }
 
+type SpentKey struct {
+	KeyRing     string `json:"keyRing"`
+	LinkedKeyId string `json:"linkedKeyId"`
+}
+
 type SpentKeyReportingResponse struct {
-	KeyRings []string `json:"keyRings"`
+	Keys []SpentKey `json:"keys"`
 }
 
 type UsageReportingRequest struct {
@@ -56,10 +63,14 @@ type UsageReportingRequest struct {
 }
 
 type UsageData struct {
-	LastDayUsage   float64 `json:"lastDayUsage"`
-	LastWeekUsage  float64 `json:"lastWeekUsage"`
-	LastMonthUsage float64 `json:"lastMonthUsage"`
-	TotalUsage     float64 `json:"totalUsage"`
+	LastDayUsage           float64 `json:"lastDayUsage"`
+	LastWeekUsage          float64 `json:"lastWeekUsage"`
+	LastMonthUsage         float64 `json:"lastMonthUsage"`
+	TotalUsage             float64 `json:"totalUsage"`
+	LastDayUsageRequests   int     `json:"lastDayUsageRequests"`
+	LastWeekUsageRequests  int     `json:"lastWeekUsageRequests"`
+	LastMonthUsageRequests int     `json:"lastMonthUsageRequests"`
+	TotalUsageRequests     int     `json:"totalUsageRequests"`
 }
 
 type UsageReportingResponse struct {
diff --git a/internal/key/key.go b/internal/key/key.go
index 59c7b18..94eb537 100644
--- a/internal/key/key.go
+++ b/internal/key/key.go
@@ -25,6 +25,7 @@ type UpdateKey struct {
 	CostLimitInUsdUnit     *TimeUnit     `json:"costLimitInUsdUnit"`
 	RateLimitOverTime      *int          `json:"rateLimitOverTime"`
 	RateLimitUnit          *TimeUnit     `json:"rateLimitUnit"`
+	RequestsLimit          *int          `json:"requestsLimit"`
 	AllowedPaths           *[]PathConfig `json:"allowedPaths,omitempty"`
 	ShouldLogRequest       *bool         `json:"shouldLogRequest"`
 	ShouldLogResponse      *bool         `json:"shouldLogResponse"`
@@ -51,6 +52,10 @@ func (uk *UpdateKey) Validate() error {
 		invalid = append(invalid, "costLimitInUsd")
 	}
 
+	if uk.RequestsLimit != nil && *uk.RequestsLimit < 0 {
+		invalid = append(invalid, "requestsLimit")
+	}
+
 	if uk.UpdatedAt <= 0 {
 		invalid = append(invalid, "updatedAt")
 	}
@@ -173,6 +178,7 @@ type RequestKey struct {
 	RotationEnabled        bool         `json:"rotationEnabled"`
 	PolicyId               string       `json:"policyId"`
 	IsKeyNotHashed         bool         `json:"isKeyNotHashed"`
+	RequestsLimit          int          `json:"requestsLimit"`
 }
 
 func (rk *RequestKey) Validate() error {
@@ -237,6 +243,10 @@ func (rk *RequestKey) Validate() error {
 		invalid = append(invalid, "rateLimitOverTime")
 	}
 
+	if rk.RequestsLimit < 0 {
+		invalid = append(invalid, "requestsLimit")
+	}
+
 	if len(rk.Ttl) != 0 {
 		_, err := time.ParseDuration(rk.Ttl)
 		if err != nil {
@@ -317,6 +327,7 @@ type ResponseKey struct {
 	CostLimitInUsdUnit     TimeUnit     `json:"costLimitInUsdUnit"`
 	RateLimitOverTime      int          `json:"rateLimitOverTime"`
 	RateLimitUnit          TimeUnit     `json:"rateLimitUnit"`
+	RequestsLimit          int          `json:"requestsLimit"`
 	Ttl                    string       `json:"ttl"`
 	KeyRing                string       `json:"keyRing"`
 	SettingId              string       `json:"settingId"`
diff --git a/internal/manager/key.go b/internal/manager/key.go
index 3401db4..f4f5b2d 100644
--- a/internal/manager/key.go
+++ b/internal/manager/key.go
@@ -46,21 +46,27 @@ type keyCache interface {
 	Get(keyId string) (*key.ResponseKey, error)
 }
 
+type requestsLimitStorage interface {
+	DeleteCounter(keyId string) error
+}
+
 type Manager struct {
-	s   Storage
-	clc costLimitCache
-	rlc rateLimitCache
-	ac  accessCache
-	kc  keyCache
+	s    Storage
+	clc  costLimitCache
+	rlc  rateLimitCache
+	ac   accessCache
+	kc   keyCache
+	rqls requestsLimitStorage
 }
 
-func NewManager(s Storage, clc costLimitCache, rlc rateLimitCache, ac accessCache, kc keyCache) *Manager {
+func NewManager(s Storage, clc costLimitCache, rlc rateLimitCache, ac accessCache, kc keyCache, rqls requestsLimitStorage) *Manager {
 	return &Manager{
-		s:   s,
-		clc: clc,
-		rlc: rlc,
-		ac:  ac,
-		kc:  kc,
+		s:    s,
+		clc:  clc,
+		rlc:  rlc,
+		ac:   ac,
+		kc:   kc,
+		rqls: rqls,
 	}
 }
 
@@ -175,6 +181,12 @@ func (m *Manager) UpdateKey(id string, uk *key.UpdateKey) (*key.ResponseKey, err
 			return nil, err
 		}
 	}
+	if uk.RequestsLimit != nil {
+		err := m.rqls.DeleteCounter(id)
+		if err != nil {
+			return nil, err
+		}
+	}
 
 	if uk.PolicyId != nil {
 		if len(*uk.PolicyId) != 0 {
diff --git a/internal/manager/provider_setting.go b/internal/manager/provider_setting.go
index d02e699..c67fc0a 100644
--- a/internal/manager/provider_setting.go
+++ b/internal/manager/provider_setting.go
@@ -3,6 +3,8 @@ package manager
 import (
 	"encoding/json"
 	"fmt"
+	"github.com/bricks-cloud/bricksllm/internal/provider/xcustom"
+	"slices"
 	"strconv"
 	"strings"
 	"time"
@@ -39,6 +41,8 @@ type ProviderSettingsManager struct {
 	Encryptor Encryptor
 }
 
+var nativelySupportedProviders = []string{"openai", "anthropic", "azure", "vllm", "deepinfra", "bedrock", "xCustom"}
+
 func NewProviderSettingsManager(s ProviderSettingsStorage, cache ProviderSettingsCache, encryptor Encryptor) *ProviderSettingsManager {
 	return &ProviderSettingsManager{
 		Storage:   s,
@@ -48,7 +52,7 @@ func NewProviderSettingsManager(s ProviderSettingsStorage, cache ProviderSetting
 }
 
 func isProviderNativelySupported(provider string) bool {
-	return provider == "openai" || provider == "anthropic" || provider == "azure" || provider == "vllm" || provider == "deepinfra" || provider == "bedrock"
+	return slices.Contains(nativelySupportedProviders, provider)
 }
 
 func findMissingAuthParams(providerName string, params map[string]string) string {
@@ -99,6 +103,25 @@ func findMissingAuthParams(providerName string, params map[string]string) string
 		}
 	}
 
+	if providerName == "xCustom" {
+		val := params["apikey"]
+		if len(val) == 0 {
+			missingFields = append(missingFields, "apikey")
+		}
+		val = params["endpoint"]
+		if len(val) == 0 {
+			missingFields = append(missingFields, "endpoint")
+		}
+		val = params["authLocation"]
+		if len(val) == 0 {
+			missingFields = append(missingFields, "authLocation")
+		}
+		val = params["authTemplate"]
+		if !strings.Contains(val, "{{apikey}}") {
+			missingFields = append(missingFields, "authTemplate")
+		}
+	}
+
 	return strings.Join(missingFields, ",")
 }
 
@@ -160,6 +183,18 @@ func (m *ProviderSettingsManager) CreateSetting(setting *provider.Setting) (*pro
 	setting.CreatedAt = time.Now().Unix()
 	setting.UpdatedAt = time.Now().Unix()
 
+	if setting.Provider == "xCustom" {
+		advancedSetting, err := xcustom.AdvancedXCustomSetting(setting.Setting)
+		if err != nil {
+			return nil, err
+		}
+		merged := setting.Setting
+		for k, v := range advancedSetting {
+			merged[k] = v
+		}
+		setting.Setting = merged
+	}
+
 	if m.Encryptor.Enabled() {
 		params, err := m.EncryptParams(setting.UpdatedAt, setting.Provider, setting.Setting)
 		if err != nil {
@@ -183,15 +218,10 @@ func (m *ProviderSettingsManager) UpdateSetting(id string, setting *provider.Upd
 	}
 
 	if len(setting.Setting) != 0 {
-		if err := m.validateSettings(existing.Provider, setting.Setting); err != nil {
+		merged, err := m.getMergedSettings(existing, setting.Setting)
+		if err != nil {
 			return nil, err
 		}
-
-		merged := existing.Setting
-		for k, v := range setting.Setting {
-			merged[k] = v
-		}
-
 		setting.Setting = merged
 	}
 
@@ -214,6 +244,31 @@ func (m *ProviderSettingsManager) UpdateSetting(id string, setting *provider.Upd
 	return m.Storage.UpdateProviderSetting(id, setting)
 }
 
+func (m *ProviderSettingsManager) getMergedSettings(existing *provider.Setting, setting map[string]string) (map[string]string, error) {
+	merged := existing.Setting
+	apikey, ok := setting["apikey"]
+	if ok && apikey == "revoked" {
+		merged["apikey"] = apikey
+		return merged, nil
+	}
+	for k, v := range setting {
+		merged[k] = v
+	}
+	if existing.Provider == "xCustom" {
+		advancedSetting, err := xcustom.AdvancedXCustomSetting(setting)
+		if err != nil {
+			return nil, err
+		}
+		for k, v := range advancedSetting {
+			merged[k] = v
+		}
+	}
+	if err := m.validateSettings(existing.Provider, merged); err != nil {
+		return nil, err
+	}
+	return merged, nil
+}
+
 func (m *ProviderSettingsManager) GetSettingViaCache(id string) (*provider.Setting, error) {
 	setting, _ := m.Cache.Get(id)
 
diff --git a/internal/manager/reporting.go b/internal/manager/reporting.go
index 068fcfc..2560a37 100644
--- a/internal/manager/reporting.go
+++ b/internal/manager/reporting.go
@@ -14,7 +14,7 @@ type costStorage interface {
 
 type keyStorage interface {
 	GetKey(keyId string) (*key.ResponseKey, error)
-	GetSpentKeyRings(tags []string, order string, limit, offset int, validator func(*key.ResponseKey) bool) ([]string, error)
+	GetSpentKeys(tags []string, order string, limit, offset int, validator func(*key.ResponseKey) bool) ([]event.SpentKey, error)
 }
 
 type keyValidator interface {
@@ -31,7 +31,7 @@ type eventStorage interface {
 	GetCustomIds(keyId string) ([]string, error)
 	GetTopKeyDataPoints(start, end int64, tags, keyIds []string, order string, limit, offset int, name string, revoked *bool) ([]*event.KeyDataPoint, error)
 
-	GetTopKeyRingDataPoints(start, end int64, tags []string, order string, limit, offset int, revoked *bool) ([]*event.KeyRingDataPoint, error)
+	GetTopKeyRingDataPoints(start, end int64, tags []string, order string, limit, offset int, revoked *bool, topBy string) ([]*event.KeyRingDataPoint, error)
 	GetUsageData(tags []string) (*event.UsageData, error)
 }
 
@@ -130,7 +130,7 @@ func (rm *ReportingManager) GetTopKeyRingReporting(r *event.KeyRingReportingRequ
 		return nil, internal_errors.NewValidationError("key reporting request order can only be desc or asc")
 	}
 
-	dataPoints, err := rm.es.GetTopKeyRingDataPoints(r.Start, r.End, r.Tags, r.Order, r.Limit, r.Offset, r.Revoked)
+	dataPoints, err := rm.es.GetTopKeyRingDataPoints(r.Start, r.End, r.Tags, r.Order, r.Limit, r.Offset, r.Revoked, r.TopBy)
 	if err != nil {
 		return nil, err
 	}
@@ -160,12 +160,12 @@ func (rm *ReportingManager) GetSpentKeyReporting(r *event.SpentKeyReportingReque
 		return true
 	}
 
-	spentKeys, err := rm.ks.GetSpentKeyRings(r.Tags, r.Order, r.Limit, r.Offset, validator)
+	spentKeys, err := rm.ks.GetSpentKeys(r.Tags, r.Order, r.Limit, r.Offset, validator)
 	if err != nil {
 		return nil, err
 	}
 	return &event.SpentKeyReportingResponse{
-		KeyRings: spentKeys,
+		Keys: spentKeys,
 	}, nil
 }
 
diff --git a/internal/message/consumer.go b/internal/message/consumer.go
index 115f590..5d9dcd2 100644
--- a/internal/message/consumer.go
+++ b/internal/message/consumer.go
@@ -18,6 +18,7 @@ type recorder interface {
 	RecordKeySpend(keyId string, micros int64, costLimitUnit key.TimeUnit) error
 	RecordUserSpend(userId string, micros int64, costLimitUnit key.TimeUnit) error
 	RecordEvent(e *event.Event) error
+	RecordKeyRequestSpent(keyId string) error
 }
 
 func NewConsumer(mc <-chan Message, log *zap.Logger, num int, handle func(Message) error) *Consumer {
diff --git a/internal/message/handler.go b/internal/message/handler.go
index 1e578c1..f03f3b8 100644
--- a/internal/message/handler.go
+++ b/internal/message/handler.go
@@ -331,6 +331,12 @@ func (h *Handler) HandleEventWithRequestAndResponse(m Message) error {
 
 		var u *user.User
 
+		err = h.recorder.RecordKeyRequestSpent(e.Event.KeyId)
+		if err != nil {
+			telemetry.Incr("bricksllm.message.handler.handle_event_with_request_and_response.record_key_request_spend_error", nil, 1)
+			h.log.Debug("error when recording key request spend", zap.Error(err))
+		}
+
 		if e.Event.CostInUsd != 0 {
 			micros := int64(e.Event.CostInUsd * 1000000)
 			err = h.recorder.RecordKeySpend(e.Event.KeyId, micros, e.Key.CostLimitInUsdUnit)
diff --git a/internal/provider/xcustom/xcustom.go b/internal/provider/xcustom/xcustom.go
new file mode 100644
index 0000000..746ce86
--- /dev/null
+++ b/internal/provider/xcustom/xcustom.go
@@ -0,0 +1,101 @@
+package xcustom
+
+import (
+	"fmt"
+	"github.com/bricks-cloud/bricksllm/internal/provider"
+	"net/http"
+	"regexp"
+
+	"strings"
+)
+
+var XCustomSettingFields = struct {
+	ApiKey       string
+	Endpoint     string
+	AuthLocation string
+	AuthTemplate string
+	AuthTarget   string
+	AuthMask     string
+}{
+	ApiKey:       "apikey",
+	Endpoint:     "endpoint",
+	AuthLocation: "authLocation",
+	AuthTemplate: "authTemplate",
+	AuthTarget:   "authTarget",
+	AuthMask:     "authMask",
+}
+
+type AuthLocation string
+
+var AuthLocations = struct {
+	Header  AuthLocation
+	Query   AuthLocation
+	Unknown AuthLocation
+}{
+	Header:  AuthLocation("header"),
+	Query:   AuthLocation("query"),
+	Unknown: AuthLocation("unknown"),
+}
+
+const XProviderIdParam = "x_provider_id"
+
+func IsXCustomRequest(req *http.Request) bool {
+	return strings.HasPrefix(req.URL.RequestURI(), "/api/providers/xCustom/")
+}
+
+func AdvancedXCustomSetting(src map[string]string) (map[string]string, error) {
+	rawLocation := src[XCustomSettingFields.AuthLocation]
+	location := GetAuthLocation(rawLocation)
+	var templateSeparator string
+	switch location {
+	case AuthLocations.Header:
+		templateSeparator = ":"
+	case AuthLocations.Query:
+		templateSeparator = "="
+	default:
+		return nil, fmt.Errorf("unknown auth location: %s", location)
+	}
+	templateArr := strings.Split(src[XCustomSettingFields.AuthTemplate], templateSeparator)
+	if len(templateArr) != 2 {
+		return nil, fmt.Errorf("invalid auth template: %s", src[XCustomSettingFields.AuthTemplate])
+	}
+	target := strings.TrimSpace(templateArr[0])
+	mask := strings.TrimSpace(templateArr[1])
+	return map[string]string{
+		XCustomSettingFields.AuthTarget: target,
+		XCustomSettingFields.AuthMask:   mask,
+	}, nil
+}
+
+func ExtractApiKey(req *http.Request, pSetting *provider.Setting) (string, error) {
+	location := GetAuthLocation(pSetting.GetParam(XCustomSettingFields.AuthLocation))
+	target := strings.TrimSpace(pSetting.GetParam(XCustomSettingFields.AuthTarget))
+	var reqAuthStr string
+	switch location {
+	case AuthLocations.Header:
+		reqAuthStr = req.Header.Get(target)
+	case AuthLocations.Query:
+		reqAuthStr = req.URL.Query().Get(target)
+	default:
+		return "", fmt.Errorf("unknown auth location: %s", location)
+	}
+	mask := strings.TrimSpace(pSetting.GetParam(XCustomSettingFields.AuthMask))
+	regexStr := strings.Replace(mask, "{{apikey}}", "(?P<key>.*)", -1)
+	regex := regexp.MustCompile(regexStr)
+	matches := regex.FindStringSubmatch(reqAuthStr)
+	if len(matches) < 2 {
+		return "", fmt.Errorf("error extracting apikey: %s", pSetting.Id)
+	}
+	return strings.TrimSpace(matches[1]), nil
+}
+
+func GetAuthLocation(raw string) AuthLocation {
+	switch raw {
+	case "header":
+		return AuthLocations.Header
+	case "query":
+		return AuthLocations.Query
+	default:
+		return AuthLocations.Unknown
+	}
+}
diff --git a/internal/recorder/recorder.go b/internal/recorder/recorder.go
index a48f210..5c8b73f 100644
--- a/internal/recorder/recorder.go
+++ b/internal/recorder/recorder.go
@@ -6,12 +6,13 @@ import (
 )
 
 type Recorder struct {
-	s  Store
-	c  Cache
-	us Store
-	uc Cache
-	ce CostEstimator
-	es EventsStore
+	s             Store
+	c             Cache
+	us            Store
+	uc            Cache
+	ce            CostEstimator
+	es            EventsStore
+	reqLimitStore Store
 }
 
 type EventsStore interface {
@@ -31,14 +32,15 @@ type CostEstimator interface {
 	EstimateCompletionCost(model string, tks int) (float64, error)
 }
 
-func NewRecorder(s, us Store, c, uc Cache, ce CostEstimator, es EventsStore) *Recorder {
+func NewRecorder(s, us Store, c, uc Cache, ce CostEstimator, es EventsStore, reqLimitStore Store) *Recorder {
 	return &Recorder{
-		s:  s,
-		c:  c,
-		us: us,
-		uc: uc,
-		ce: ce,
-		es: es,
+		s:             s,
+		c:             c,
+		us:            us,
+		uc:            uc,
+		ce:            ce,
+		es:            es,
+		reqLimitStore: reqLimitStore,
 	}
 }
 
@@ -74,6 +76,10 @@ func (r *Recorder) RecordKeySpend(keyId string, micros int64, costLimitUnit key.
 	return nil
 }
 
+func (r *Recorder) RecordKeyRequestSpent(keyId string) error {
+	return r.reqLimitStore.IncrementCounter(keyId, 1)
+}
+
 func (r *Recorder) RecordEvent(e *event.Event) error {
 	return r.es.InsertEvent(e)
 }
diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go
index 6e2a75e..5a215f5 100644
--- a/internal/server/web/proxy/middleware.go
+++ b/internal/server/web/proxy/middleware.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
+	"github.com/bricks-cloud/bricksllm/internal/provider/xcustom"
 	"io"
 	"net/http"
 	"strconv"
@@ -67,7 +68,7 @@ type deepinfraEstimator interface {
 }
 
 type authenticator interface {
-	AuthenticateHttpRequest(req *http.Request) (*key.ResponseKey, []*provider.Setting, error)
+	AuthenticateHttpRequest(req *http.Request, xCustomProviderId string) (*key.ResponseKey, []*provider.Setting, error)
 }
 
 type validator interface {
@@ -304,7 +305,7 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag
 			return
 		}
 
-		kc, settings, err := a.AuthenticateHttpRequest(c.Request)
+		kc, settings, err := a.AuthenticateHttpRequest(c.Request, c.Param(xcustom.XProviderIdParam))
 		enrichedEvent.Key = kc
 		_, ok := err.(notAuthorizedError)
 		if ok {
diff --git a/internal/server/web/proxy/proxy.go b/internal/server/web/proxy/proxy.go
index e6078f4..8bf70b9 100644
--- a/internal/server/web/proxy/proxy.go
+++ b/internal/server/web/proxy/proxy.go
@@ -220,6 +220,9 @@ func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyMan
 	router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/cancel", getCancelVectorStoreFileBatchHandler(prod, client))
 	router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/files", getListVectorStoreFileBatchFilesHandler(prod, client))
 
+	// codio xCustom
+	router.Any("/api/providers/xCustom/:x_provider_id/*wildcard", getXCustomHandler(prod))
+
 	srv := &http.Server{
 		Addr:    ":8002",
 		Handler: router,
diff --git a/internal/server/web/proxy/x_custom.go b/internal/server/web/proxy/x_custom.go
new file mode 100644
index 0000000..c0132f6
--- /dev/null
+++ b/internal/server/web/proxy/x_custom.go
@@ -0,0 +1,74 @@
+package proxy
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"github.com/bricks-cloud/bricksllm/internal/provider"
+	"github.com/bricks-cloud/bricksllm/internal/provider/xcustom"
+	"github.com/bricks-cloud/bricksllm/internal/telemetry"
+	"github.com/bricks-cloud/bricksllm/internal/util"
+	"github.com/gin-gonic/gin"
+	"net/http"
+	"net/http/httputil"
+	"net/url"
+	"strings"
+)
+
+func getXCustomHandler(prod bool) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		log := util.GetLogFromCtx(c)
+		telemetry.Incr("bricksllm.proxy.get_x_custom_handler.requests", nil, 1)
+
+		if c == nil || c.Request == nil {
+			JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty")
+			return
+		}
+
+		ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout"))
+		defer cancel()
+
+		providerId := c.Param(xcustom.XProviderIdParam)
+		rawProviderSettings, exists := c.Get("settings")
+		if !exists {
+			logError(log, "error provider setting", prod, errors.New("provider setting not found"))
+			c.JSON(http.StatusInternalServerError, "[BricksLLM] no settings found")
+			return
+		}
+		settings, ok := rawProviderSettings.([]*provider.Setting)
+		if !ok {
+			logError(log, "error provider setting", prod, errors.New("incorrect setting"))
+			c.JSON(http.StatusInternalServerError, "[BricksLLM] incorrect provider setting")
+			return
+		}
+		var providerSetting *provider.Setting
+		for _, setting := range settings {
+			if setting.Id == providerId {
+				providerSetting = setting
+			}
+		}
+		if providerSetting == nil {
+			logError(log, "error provider setting", prod, errors.New("provider setting not found"))
+			c.JSON(http.StatusInternalServerError, "[BricksLLM] no settings found")
+			return
+		}
+		wildcard := c.Param("wildcard")
+		endpoint := strings.TrimSuffix(providerSetting.GetParam("endpoint"), "/")
+		targetUrl := fmt.Sprintf("%s%s", endpoint, wildcard)
+		target, e := url.Parse(targetUrl)
+		if e != nil {
+			logError(log, "error parsing target url", prod, e)
+			c.JSON(http.StatusInternalServerError, "[BricksLLM] invalid endpoint")
+			return
+		}
+
+		proxy := &httputil.ReverseProxy{
+			Rewrite: func(r *httputil.ProxyRequest) {
+				r.SetURL(target)
+				r.Out.URL.Path, r.Out.URL.RawPath = target.Path, target.RawPath
+				r.Out.WithContext(ctx)
+			},
+		}
+		proxy.ServeHTTP(c.Writer, c.Request)
+	}
+}
diff --git a/internal/storage/postgresql/event.go b/internal/storage/postgresql/event.go
index 50d5a41..274d19b 100644
--- a/internal/storage/postgresql/event.go
+++ b/internal/storage/postgresql/event.go
@@ -6,6 +6,7 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
+	"slices"
 	"strings"
 	"time"
 
@@ -13,6 +14,8 @@ import (
 	"github.com/lib/pq"
 )
 
+var allowedTopBy = []string{"total_cost_in_usd", "total_requests"}
+
 func (s *Store) CreateEventsByDayTable() error {
 	createTableQuery := `
 	CREATE TABLE IF NOT EXISTS event_agg_by_day (
@@ -471,7 +474,7 @@ func (s *Store) GetTopKeyDataPoints(start, end int64, tags, keyIds []string, ord
 	return data, nil
 }
 
-func (s *Store) GetTopKeyRingDataPoints(start, end int64, tags []string, order string, limit, offset int, revoked *bool) ([]*event.KeyRingDataPoint, error) {
+func (s *Store) GetTopKeyRingDataPoints(start, end int64, tags []string, order string, limit, offset int, revoked *bool, topBy string) ([]*event.KeyRingDataPoint, error) {
 	args := []any{}
 	condition := ""
 	condition2 := ""
@@ -494,21 +497,12 @@ func (s *Store) GetTopKeyRingDataPoints(start, end int64, tags []string, order s
 	}
 
 	if len(tags) > 0 {
-		condition2 += fmt.Sprintf("AND keys.tags @> $%d", index)
+		condition2 += fmt.Sprintf("AND events.tags @> $%d", index)
 
 		args = append(args, pq.Array(tags))
 		index++
 	}
 
-	if revoked != nil {
-		bools := "False"
-		if *revoked {
-			bools = "True"
-		}
-
-		condition2 += fmt.Sprintf(" AND keys.revoked = %s", bools)
-	}
-
 	query := fmt.Sprintf(`
 	WITH keys_table AS
 	(
@@ -517,7 +511,8 @@ func (s *Store) GetTopKeyRingDataPoints(start, end int64, tags []string, order s
 	(
 		SELECT 
 		key_ring,
-		SUM(cost_in_usd) AS total_cost_in_usd
+		SUM(cost_in_usd) AS total_cost_in_usd,
+		COUNT(*) AS total_requests
 		FROM events
 		LEFT JOIN keys
 		ON keys.key_id = events.key_id
@@ -531,9 +526,13 @@ func (s *Store) GetTopKeyRingDataPoints(start, end int64, tags []string, order s
 		qorder = "ASC"
 	}
 
+	qtopBy := "total_cost_in_usd"
+	if topBy != "" && slices.Contains(allowedTopBy, topBy) {
+		qtopBy = topBy
+	}
 	query += fmt.Sprintf(`
-	ORDER BY total_cost_in_usd %s 
-`, qorder)
+	ORDER BY %s %s 
+`, qtopBy, qorder)
 
 	if limit != 0 {
 		query += fmt.Sprintf(`
@@ -558,6 +557,7 @@ func (s *Store) GetTopKeyRingDataPoints(start, end int64, tags []string, order s
 		additional := []any{
 			&keyRing,
 			&e.CostInUsd,
+			&e.Requests,
 		}
 
 		if err := rows.Scan(
@@ -597,10 +597,14 @@ func (s *Store) GetUsageData(tags []string) (*event.UsageData, error) {
 		COALESCE(SUM(cost_in_usd), 0) AS total_cost_in_usd,
 		COALESCE(SUM(CASE WHEN created_at > %d THEN cost_in_usd ELSE 0 END), 0) AS total_cost_in_usd_last_day,
 		COALESCE(SUM(CASE WHEN created_at > %d THEN cost_in_usd ELSE 0 END), 0) AS total_cost_in_usd_last_week,
-		COALESCE(SUM(CASE WHEN created_at > %d THEN cost_in_usd ELSE 0 END), 0) AS total_cost_in_usd_last_month
+		COALESCE(SUM(CASE WHEN created_at > %d THEN cost_in_usd ELSE 0 END), 0) AS total_cost_in_usd_last_month,
+		COALESCE(SUM(1), 0) AS total_requests,
+		COALESCE(SUM(CASE WHEN created_at > %d THEN 1 ELSE 0 END), 0) AS total_requests_last_day,
+		COALESCE(SUM(CASE WHEN created_at > %d THEN 1 ELSE 0 END), 0) AS total_requests_last_week,
+		COALESCE(SUM(CASE WHEN created_at > %d THEN 1 ELSE 0 END), 0) AS total_requests_last_month
 	FROM events
 	WHERE %s
-	`, dayAgo, weekAgo, monthAgo, condition)
+	`, dayAgo, weekAgo, monthAgo, dayAgo, weekAgo, monthAgo, condition)
 
 	ctx, cancel := context.WithTimeout(context.Background(), s.rt)
 	defer cancel()
@@ -611,6 +615,10 @@ func (s *Store) GetUsageData(tags []string) (*event.UsageData, error) {
 		&data.LastDayUsage,
 		&data.LastWeekUsage,
 		&data.LastMonthUsage,
+		&data.TotalUsageRequests,
+		&data.LastDayUsageRequests,
+		&data.LastWeekUsageRequests,
+		&data.LastMonthUsageRequests,
 	); err != nil {
 		if err == sql.ErrNoRows {
 			return nil, nil
diff --git a/internal/storage/postgresql/key.go b/internal/storage/postgresql/key.go
index c129aa9..e92f114 100644
--- a/internal/storage/postgresql/key.go
+++ b/internal/storage/postgresql/key.go
@@ -5,6 +5,7 @@ import (
 	"database/sql"
 	"encoding/json"
 	"fmt"
+	"github.com/bricks-cloud/bricksllm/internal/event"
 	"strings"
 
 	internal_errors "github.com/bricks-cloud/bricksllm/internal/errors"
@@ -56,7 +57,7 @@ func (s *Store) AlterKeysTable() error {
 			END IF;
 		END
 		$$;
-		ALTER TABLE keys ADD COLUMN IF NOT EXISTS setting_id VARCHAR(255), ADD COLUMN IF NOT EXISTS allowed_paths JSONB, ADD COLUMN IF NOT EXISTS setting_ids VARCHAR(255)[] NOT NULL DEFAULT ARRAY[]::VARCHAR(255)[], ADD COLUMN IF NOT EXISTS should_log_request BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS should_log_response BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS rotation_enabled BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS policy_id VARCHAR(255) NOT NULL DEFAULT '', ADD COLUMN IF NOT EXISTS is_key_not_hashed BOOLEAN NOT NULL DEFAULT FALSE;
+		ALTER TABLE keys ADD COLUMN IF NOT EXISTS setting_id VARCHAR(255), ADD COLUMN IF NOT EXISTS allowed_paths JSONB, ADD COLUMN IF NOT EXISTS setting_ids VARCHAR(255)[] NOT NULL DEFAULT ARRAY[]::VARCHAR(255)[], ADD COLUMN IF NOT EXISTS should_log_request BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS should_log_response BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS rotation_enabled BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS policy_id VARCHAR(255) NOT NULL DEFAULT '', ADD COLUMN IF NOT EXISTS is_key_not_hashed BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS requests_limit INT NOT NULL DEFAULT 0;
 	`
 
 	ctxTimeout, cancel := context.WithTimeout(context.Background(), s.wt)
@@ -189,6 +190,7 @@ func (s *Store) GetKeys(tags, keyIds []string, provider string) ([]*key.Response
 			&k.RotationEnabled,
 			&k.PolicyId,
 			&k.IsKeyNotHashed,
+			&k.RequestsLimit,
 		); err != nil {
 			return nil, err
 		}
@@ -314,6 +316,7 @@ func (s *Store) GetKeysV2(tags, keyIds []string, revoked *bool, limit, offset in
 			&k.RotationEnabled,
 			&k.PolicyId,
 			&k.IsKeyNotHashed,
+			&k.RequestsLimit,
 		); err != nil {
 			return nil, err
 		}
@@ -393,6 +396,7 @@ func (s *Store) GetKeyByHash(hash string) (*key.ResponseKey, error) {
 		&k.RotationEnabled,
 		&k.PolicyId,
 		&k.IsKeyNotHashed,
+		&k.RequestsLimit,
 	)
 
 	if err != nil {
@@ -457,6 +461,7 @@ func (s *Store) GetKey(keyId string) (*key.ResponseKey, error) {
 			&k.RotationEnabled,
 			&k.PolicyId,
 			&k.IsKeyNotHashed,
+			&k.RequestsLimit,
 		); err != nil {
 			return nil, err
 		}
@@ -483,7 +488,7 @@ func (s *Store) GetKey(keyId string) (*key.ResponseKey, error) {
 	return keys[0], nil
 }
 
-func (s *Store) GetSpentKeyRings(tags []string, order string, limit, offset int, validator func(*key.ResponseKey) bool) ([]string, error) {
+func (s *Store) GetSpentKeys(tags []string, order string, limit, offset int, validator func(*key.ResponseKey) bool) ([]event.SpentKey, error) {
 	args := []any{}
 	condition := ""
 
@@ -524,7 +529,7 @@ func (s *Store) GetSpentKeyRings(tags []string, order string, limit, offset int,
 	}
 	defer rows.Close()
 
-	invalidKeyRings := []string{}
+	invalidKeyRings := []event.SpentKey{}
 	for rows.Next() {
 		var k key.ResponseKey
 		var settingId sql.NullString
@@ -553,6 +558,7 @@ func (s *Store) GetSpentKeyRings(tags []string, order string, limit, offset int,
 			&k.RotationEnabled,
 			&k.PolicyId,
 			&k.IsKeyNotHashed,
+			&k.RequestsLimit,
 		); err != nil {
 			return nil, err
 		}
@@ -569,7 +575,10 @@ func (s *Store) GetSpentKeyRings(tags []string, order string, limit, offset int,
 		}
 
 		if !validator(pk) {
-			invalidKeyRings = append(invalidKeyRings, pk.KeyRing)
+			invalidKeyRings = append(invalidKeyRings, event.SpentKey{
+				KeyRing:     pk.KeyRing,
+				LinkedKeyId: pk.KeyId,
+			})
 		}
 	}
 
@@ -615,6 +624,7 @@ func (s *Store) GetAllKeys() ([]*key.ResponseKey, error) {
 			&k.RotationEnabled,
 			&k.PolicyId,
 			&k.IsKeyNotHashed,
+			&k.RequestsLimit,
 		); err != nil {
 			return nil, err
 		}
@@ -675,6 +685,7 @@ func (s *Store) GetUpdatedKeys(updatedAt int64) ([]*key.ResponseKey, error) {
 			&k.RotationEnabled,
 			&k.PolicyId,
 			&k.IsKeyNotHashed,
+			&k.RequestsLimit,
 		); err != nil {
 			return nil, err
 		}
@@ -853,6 +864,7 @@ func (s *Store) UpdateKey(id string, uk *key.UpdateKey) (*key.ResponseKey, error
 		&k.RotationEnabled,
 		&k.PolicyId,
 		&k.IsKeyNotHashed,
+		&k.RequestsLimit,
 	); err != nil {
 		if err == sql.ErrNoRows {
 			return nil, internal_errors.NewNotFoundError(fmt.Sprintf("key not found for id: %s", id))
@@ -877,8 +889,8 @@ func (s *Store) UpdateKey(id string, uk *key.UpdateKey) (*key.ResponseKey, error
 
 func (s *Store) CreateKey(rk *key.RequestKey) (*key.ResponseKey, error) {
 	query := `
-		INSERT INTO keys (name, created_at, updated_at, tags, revoked, key_id, key, revoked_reason, cost_limit_in_usd, cost_limit_in_usd_over_time, cost_limit_in_usd_unit, rate_limit_over_time, rate_limit_unit, ttl, key_ring, setting_id, allowed_paths, setting_ids, should_log_request, should_log_response, rotation_enabled, policy_id, is_key_not_hashed)
-		VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23)
+		INSERT INTO keys (name, created_at, updated_at, tags, revoked, key_id, key, revoked_reason, cost_limit_in_usd, cost_limit_in_usd_over_time, cost_limit_in_usd_unit, rate_limit_over_time, rate_limit_unit, ttl, key_ring, setting_id, allowed_paths, setting_ids, should_log_request, should_log_response, rotation_enabled, policy_id, is_key_not_hashed, requests_limit)
+		VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24)
 		RETURNING *;
 	`
 
@@ -911,6 +923,7 @@ func (s *Store) CreateKey(rk *key.RequestKey) (*key.ResponseKey, error) {
 		rk.RotationEnabled,
 		rk.PolicyId,
 		rk.IsKeyNotHashed,
+		rk.RequestsLimit,
 	}
 
 	ctxTimeout, cancel := context.WithTimeout(context.Background(), s.wt)
@@ -944,6 +957,7 @@ func (s *Store) CreateKey(rk *key.RequestKey) (*key.ResponseKey, error) {
 		&k.RotationEnabled,
 		&k.PolicyId,
 		&k.IsKeyNotHashed,
+		&k.RequestsLimit,
 	); err != nil {
 		return nil, err
 	}
diff --git a/internal/validator/validator.go b/internal/validator/validator.go
index 7efdfb9..c3df819 100644
--- a/internal/validator/validator.go
+++ b/internal/validator/validator.go
@@ -21,21 +21,28 @@ type costLimitStorage interface {
 	GetCounter(keyId string) (int64, error)
 }
 
+type requestsLimitStorage interface {
+	GetCounter(keyId string) (int64, error)
+}
+
 type Validator struct {
-	clc costLimitCache
-	rlc rateLimitCache
-	cls costLimitStorage
+	clc  costLimitCache
+	rlc  rateLimitCache
+	cls  costLimitStorage
+	rqls requestsLimitStorage
 }
 
 func NewValidator(
 	clc costLimitCache,
 	rlc rateLimitCache,
 	cls costLimitStorage,
+	rqls requestsLimitStorage,
 ) *Validator {
 	return &Validator{
-		clc: clc,
-		rlc: rlc,
-		cls: cls,
+		clc:  clc,
+		rlc:  rlc,
+		cls:  cls,
+		rqls: rqls,
 	}
 }
 
@@ -53,7 +60,12 @@ func (v *Validator) Validate(k *key.ResponseKey, promptCost float64) error {
 		return internal_errors.NewExpirationError("api key expired", internal_errors.TtlExpiration)
 	}
 
-	err := v.validateRateLimitOverTime(k.KeyId, k.RateLimitOverTime, k.RateLimitUnit)
+	err := v.validateRequestsLimit(k.KeyId, k.RequestsLimit)
+	if err != nil {
+		return err
+	}
+
+	err = v.validateRateLimitOverTime(k.KeyId, k.RateLimitOverTime, k.RateLimitUnit)
 	if err != nil {
 		return err
 	}
@@ -136,3 +148,17 @@ func (v *Validator) validateCostLimit(keyId string, costLimit float64) error {
 
 	return nil
 }
+
+func (v *Validator) validateRequestsLimit(keyId string, requestsLimit int) error {
+	if requestsLimit == 0 {
+		return nil
+	}
+	existingTotalRequests, err := v.rqls.GetCounter(keyId)
+	if err != nil {
+		return errors.New("failed to get total requests")
+	}
+	if existingTotalRequests >= int64(requestsLimit) {
+		return internal_errors.NewExpirationError(fmt.Sprintf("total requests limit: %d, has been reached", requestsLimit), internal_errors.RequestsLimitExpiration)
+	}
+	return nil
+}