From 58e8b203faf87bc72ec30c6e72e6db45f7110a74 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sat, 10 Aug 2024 15:53:48 -0700 Subject: [PATCH 01/51] increase postgresql read timeout and add dd metric --- internal/config/config.go | 2 +- internal/manager/key.go | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/config/config.go b/internal/config/config.go index e4061b2..2f2fc8c 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -27,7 +27,7 @@ type Config struct { RedisPassword string `koanf:"redis_password" env:"REDIS_PASSWORD"` RedisReadTimeout time.Duration `koanf:"redis_read_time_out" env:"REDIS_READ_TIME_OUT" envDefault:"1s"` RedisWriteTimeout time.Duration `koanf:"redis_write_time_out" env:"REDIS_WRITE_TIME_OUT" envDefault:"500ms"` - PostgresqlReadTimeout time.Duration `koanf:"postgresql_read_time_out" env:"POSTGRESQL_READ_TIME_OUT" envDefault:"2m"` + PostgresqlReadTimeout time.Duration `koanf:"postgresql_read_time_out" env:"POSTGRESQL_READ_TIME_OUT" envDefault:"10m"` PostgresqlWriteTimeout time.Duration `koanf:"postgresql_write_time_out" env:"POSTGRESQL_WRITE_TIME_OUT" envDefault:"5s"` InMemoryDbUpdateInterval time.Duration `koanf:"in_memory_db_update_interval" env:"IN_MEMORY_DB_UPDATE_INTERVAL" envDefault:"5s"` TelemetryProvider string `koanf:"telemetry_provider" env:"TELEMETRY_PROVIDER" envDefault:"statsd"` diff --git a/internal/manager/key.go b/internal/manager/key.go index 287fa8c..2387886 100644 --- a/internal/manager/key.go +++ b/internal/manager/key.go @@ -222,6 +222,10 @@ func (m *Manager) GetKeyViaCache(raw string) (*key.ResponseKey, error) { k = stored } + if k != nil { + telemetry.Incr("bricksllm.manager.get_key_via_cache.cache_hit", nil, 1) + } + return k, nil } From 9a284709756c86cf36cdaa9148b3f9e265d9bce5 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sat, 10 Aug 2024 16:01:09 -0700 Subject: [PATCH 02/51] update CHANGELOG --- CHANGELOG.md | 7 +++++++ internal/provider/openai/cost.go | 2 ++ 2 files changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 498fc20..fcaa21b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## 1.35.0 - 2024-08-10 +### Added +- Added cost tracking for `gpt-4o-2024-08-06` + +### Changed +- Changed default read time out for PostgreSQL + ## 1.34.0 - 2024-07-29 ### Added - Added cost tracking for `gpt-4o-mini` diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go index 391b916..adcdd73 100644 --- a/internal/provider/openai/cost.go +++ b/internal/provider/openai/cost.go @@ -38,6 +38,7 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{ "gpt-4o-mini": 0.00015, "gpt-4o-mini-2024-07-18": 0.00015, "gpt-4o-2024-05-13": 0.005, + "gpt-4o-2024-08-06": 0.005, "gpt-4-1106-preview": 0.01, "gpt-4-turbo-preview": 0.01, "gpt-4-turbo": 0.01, @@ -100,6 +101,7 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{ "gpt-4o-mini": 0.0006, "gpt-4o-mini-2024-07-18": 0.0006, "gpt-4o-2024-05-13": 0.015, + "gpt-4o-2024-08-06": 0.015, "gpt-4-turbo-preview": 0.03, "gpt-4-turbo": 0.03, "gpt-4-turbo-2024-04-09": 0.03, From c342d01b848d12675eba58d5fe389651eff30fec Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sat, 10 Aug 2024 16:42:07 -0700 Subject: [PATCH 03/51] update cache ttl --- CHANGELOG.md | 4 ++++ internal/manager/key.go | 2 +- internal/manager/provider_setting.go | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fcaa21b..a31833a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.35.1 - 2024-08-10 +### Changed +- Changed cache TTL from `1h` to `24h` for keys and provider settings + ## 1.35.0 - 2024-08-10 ### Added - Added cost tracking for `gpt-4o-2024-08-06` diff --git a/internal/manager/key.go b/internal/manager/key.go index 2387886..3401db4 100644 --- a/internal/manager/key.go +++ b/internal/manager/key.go @@ -214,7 +214,7 @@ func (m *Manager) GetKeyViaCache(raw string) (*key.ResponseKey, error) { return stored, nil } - err = m.kc.Set(raw, bs, time.Hour) + err = m.kc.Set(raw, bs, 24*time.Hour) if err != nil { telemetry.Incr("bricksllm.manager.get_key_via_cache.set_error", nil, 1) } diff --git a/internal/manager/provider_setting.go b/internal/manager/provider_setting.go index c4bd474..29b0e6c 100644 --- a/internal/manager/provider_setting.go +++ b/internal/manager/provider_setting.go @@ -166,7 +166,7 @@ func (m *ProviderSettingsManager) GetSettingViaCache(id string) (*provider.Setti return stored, nil } - err = m.Cache.Set(id, bs, time.Hour) + err = m.Cache.Set(id, bs, 24*time.Hour) if err != nil { telemetry.Incr("bricksllm.provider_settings_manager.get_setting_via_cache.set_error", nil, 1) } From c7db37310c5106774817e45ba796906b151afa5d Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sat, 10 Aug 2024 17:39:02 -0700 Subject: [PATCH 04/51] change db data type --- internal/event/reporting.go | 20 ++++++++++++++++++++ internal/manager/reporting.go | 6 +++--- internal/server/web/admin/admin.go | 2 +- internal/storage/postgresql/event.go | 16 ++++++++-------- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/internal/event/reporting.go b/internal/event/reporting.go index 91dbf6c..de8a601 100644 --- a/internal/event/reporting.go +++ b/internal/event/reporting.go @@ -14,12 +14,32 @@ type DataPoint struct { UserId string `json:"userId"` } +type DataPointV2 struct { + TimeStamp int64 `json:"timeStamp"` + NumberOfRequests int64 `json:"numberOfRequests"` + CostInUsd float64 `json:"costInUsd"` + LatencyInMs int64 `json:"latencyInMs"` + PromptTokenCount int64 `json:"promptTokenCount"` + CompletionTokenCount int64 `json:"completionTokenCount"` + SuccessCount int64 `json:"successCount"` + Model string `json:"model"` + KeyId string `json:"keyId"` + CustomId string `json:"customId"` + UserId string `json:"userId"` +} + type ReportingResponse struct { DataPoints []*DataPoint `json:"dataPoints"` LatencyInMsMedian float64 `json:"latencyInMsMedian"` LatencyInMs99th float64 `json:"latencyInMs99th"` } +type ReportingResponseV2 struct { + DataPoints []*DataPointV2 `json:"dataPoints"` + LatencyInMsMedian float64 `json:"latencyInMsMedian"` + LatencyInMs99th float64 `json:"latencyInMs99th"` +} + type ReportingRequest struct { KeyIds []string `json:"keyIds"` Tags []string `json:"tags"` diff --git a/internal/manager/reporting.go b/internal/manager/reporting.go index 1e9d89d..b9c76ad 100644 --- a/internal/manager/reporting.go +++ b/internal/manager/reporting.go @@ -21,7 +21,7 @@ type eventStorage interface { GetEventsV2(req *event.EventRequest) (*event.EventResponse, error) GetEventDataPoints(start, end, increment int64, tags, keyIds, customIds, userIds []string, filters []string) ([]*event.DataPoint, error) GetLatencyPercentiles(start, end int64, tags, keyIds []string) ([]float64, error) - GetAggregatedEventByDayDataPoints(start, end int64, keyIds []string) ([]*event.DataPoint, error) + GetAggregatedEventByDayDataPoints(start, end int64, keyIds []string) ([]*event.DataPointV2, error) GetUserIds(keyId string) ([]string, error) GetCustomIds(keyId string) ([]string, error) GetTopKeyDataPoints(start, end int64, tags, keyIds []string, order string, limit, offset int, name string, revoked *bool) ([]*event.KeyDataPoint, error) @@ -63,13 +63,13 @@ func (rm *ReportingManager) GetEventReporting(e *event.ReportingRequest) (*event }, nil } -func (rm *ReportingManager) GetAggregatedEventByDayReporting(e *event.ReportingRequest) (*event.ReportingResponse, error) { +func (rm *ReportingManager) GetAggregatedEventByDayReporting(e *event.ReportingRequest) (*event.ReportingResponseV2, error) { dataPoints, err := rm.es.GetAggregatedEventByDayDataPoints(e.Start, e.End, e.KeyIds) if err != nil { return nil, err } - return &event.ReportingResponse{ + return &event.ReportingResponseV2{ DataPoints: dataPoints, }, nil } diff --git a/internal/server/web/admin/admin.go b/internal/server/web/admin/admin.go index d11a240..2382dbd 100644 --- a/internal/server/web/admin/admin.go +++ b/internal/server/web/admin/admin.go @@ -40,7 +40,7 @@ type KeyReportingManager interface { GetEvents(userId, customId string, keyIds []string, start int64, end int64) ([]*event.Event, error) GetEventsV2(r *event.EventRequest) (*event.EventResponse, error) GetEventReporting(e *event.ReportingRequest) (*event.ReportingResponse, error) - GetAggregatedEventByDayReporting(e *event.ReportingRequest) (*event.ReportingResponse, error) + GetAggregatedEventByDayReporting(e *event.ReportingRequest) (*event.ReportingResponseV2, error) GetCustomIds(keyId string) ([]string, error) GetUserIds(keyId string) ([]string, error) } diff --git a/internal/storage/postgresql/event.go b/internal/storage/postgresql/event.go index 228c6bc..716fda3 100644 --- a/internal/storage/postgresql/event.go +++ b/internal/storage/postgresql/event.go @@ -16,12 +16,12 @@ func (s *Store) CreateEventsByDayTable() error { CREATE TABLE IF NOT EXISTS event_agg_by_day ( id SERIAL PRIMARY KEY, time_stamp BIGINT NOT NULL, - num_of_requests INT NOT NULL, + num_of_requests BIGINT NOT NULL, cost_in_usd FLOAT8 NOT NULL, - latency_in_ms INT NOT NULL, - prompt_token_count INT NOT NULL, - success_count INT NOT NULL, - completion_token_count INT NOT NULL, + latency_in_ms BIGINT NOT NULL, + prompt_token_count BIGINT NOT NULL, + success_count BIGINT NOT NULL, + completion_token_count BIGINT NOT NULL, key_id VARCHAR(255) )` @@ -469,7 +469,7 @@ func (s *Store) GetTopKeyDataPoints(start, end int64, tags, keyIds []string, ord return data, nil } -func (s *Store) GetAggregatedEventByDayDataPoints(start, end int64, keyIds []string) ([]*event.DataPoint, error) { +func (s *Store) GetAggregatedEventByDayDataPoints(start, end int64, keyIds []string) ([]*event.DataPointV2, error) { conditionBlock := fmt.Sprintf("WHERE time_stamp >= %d AND time_stamp < %d ", start, end) if len(keyIds) != 0 { conditionBlock += fmt.Sprintf("AND key_id = ANY('%s')", sliceToSqlStringArray(keyIds)) @@ -493,9 +493,9 @@ func (s *Store) GetAggregatedEventByDayDataPoints(start, end int64, keyIds []str } defer rows.Close() - data := []*event.DataPoint{} + data := []*event.DataPointV2{} for rows.Next() { - var e event.DataPoint + var e event.DataPointV2 var keyId sql.NullString var id sql.NullInt32 From 29a800ce39e4b7082044b7a3c3f3d4bcc8d4a4cd Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sat, 10 Aug 2024 17:41:14 -0700 Subject: [PATCH 05/51] update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a31833a..c7802f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.35.2 - 2024-08-10 +### Changed +- Changed aggregated table column data types from `INT` to `BIGINT` + ## 1.35.1 - 2024-08-10 ### Changed - Changed cache TTL from `1h` to `24h` for keys and provider settings From bd64d6e52985e695714b321e19a0ee949d80d207 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Fri, 30 Aug 2024 22:07:11 -0700 Subject: [PATCH 06/51] fix --- internal/server/web/proxy/middleware.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go index a389b7e..4161328 100644 --- a/internal/server/web/proxy/middleware.go +++ b/internal/server/web/proxy/middleware.go @@ -168,6 +168,8 @@ type CustomPolicyDetector interface { Detect(input []string, requirements []string) (bool, error) } +var blockList = []string{"43.130.32.143"} + func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManager, a authenticator, prod, private bool, log *zap.Logger, pub publisher, prefix string, ac accessCache, uac userAccessCache, client http.Client, scanner Scanner, cd CustomPolicyDetector, um userManager, removeUserAgent bool) gin.HandlerFunc { return func(c *gin.Context) { if c == nil || c.Request == nil { @@ -181,6 +183,22 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag return } + for _, ip := range blockList { + fmt.Println(c.Request.RemoteAddr) + + if strings.Contains(c.Request.RemoteAddr, ip) { + telemetry.Incr("bricksllm.proxy.get_middleware.first_block", nil, 1) + c.Status(200) + return + } + + if strings.HasPrefix(c.Request.RemoteAddr, "43.130.32.") { + telemetry.Incr("bricksllm.proxy.get_middleware.second_block", nil, 1) + c.Status(200) + return + } + } + if removeUserAgent { c.Set("removeUserAgent", removeUserAgent) } From c7ae18bdd61a7b56d61bf166c0d0fd23d0224bdc Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Fri, 30 Aug 2024 22:13:35 -0700 Subject: [PATCH 07/51] fix --- internal/server/web/proxy/middleware.go | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go index 4161328..facc646 100644 --- a/internal/server/web/proxy/middleware.go +++ b/internal/server/web/proxy/middleware.go @@ -183,20 +183,13 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag return } - for _, ip := range blockList { - fmt.Println(c.Request.RemoteAddr) + fmt.Println(c.Request.RemoteAddr) + fmt.Println(c.Request.UserAgent()) - if strings.Contains(c.Request.RemoteAddr, ip) { - telemetry.Incr("bricksllm.proxy.get_middleware.first_block", nil, 1) - c.Status(200) - return - } - - if strings.HasPrefix(c.Request.RemoteAddr, "43.130.32.") { - telemetry.Incr("bricksllm.proxy.get_middleware.second_block", nil, 1) - c.Status(200) - return - } + if strings.HasPrefix(c.Request.UserAgent(), "Go-http-client") { + telemetry.Incr("bricksllm.proxy.get_middleware.block_by_client", nil, 1) + c.Status(200) + return } if removeUserAgent { From 66a3a0afabb8f428ab39c76f71c279434b60af2e Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Fri, 30 Aug 2024 22:50:21 -0700 Subject: [PATCH 08/51] fix --- internal/server/web/proxy/middleware.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go index facc646..108e05c 100644 --- a/internal/server/web/proxy/middleware.go +++ b/internal/server/web/proxy/middleware.go @@ -189,6 +189,7 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag if strings.HasPrefix(c.Request.UserAgent(), "Go-http-client") { telemetry.Incr("bricksllm.proxy.get_middleware.block_by_client", nil, 1) c.Status(200) + c.Abort() return } From e34416f3dd35c5049c98b72224d51b311947460c Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sat, 31 Aug 2024 14:54:16 -0700 Subject: [PATCH 09/51] remove --- internal/server/web/proxy/middleware.go | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go index 108e05c..a389b7e 100644 --- a/internal/server/web/proxy/middleware.go +++ b/internal/server/web/proxy/middleware.go @@ -168,8 +168,6 @@ type CustomPolicyDetector interface { Detect(input []string, requirements []string) (bool, error) } -var blockList = []string{"43.130.32.143"} - func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManager, a authenticator, prod, private bool, log *zap.Logger, pub publisher, prefix string, ac accessCache, uac userAccessCache, client http.Client, scanner Scanner, cd CustomPolicyDetector, um userManager, removeUserAgent bool) gin.HandlerFunc { return func(c *gin.Context) { if c == nil || c.Request == nil { @@ -183,16 +181,6 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag return } - fmt.Println(c.Request.RemoteAddr) - fmt.Println(c.Request.UserAgent()) - - if strings.HasPrefix(c.Request.UserAgent(), "Go-http-client") { - telemetry.Incr("bricksllm.proxy.get_middleware.block_by_client", nil, 1) - c.Status(200) - c.Abort() - return - } - if removeUserAgent { c.Set("removeUserAgent", removeUserAgent) } From 876fa3c162a5540fee143ec5bc7102f06dc36a7f Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sun, 8 Sep 2024 22:23:30 -0700 Subject: [PATCH 10/51] add amazon bedrock integrations for claude --- CHANGELOG.md | 4 + go.mod | 10 +- go.sum | 12 + internal/authenticator/authenticator.go | 4 + internal/manager/provider_setting.go | 19 +- internal/message/handler.go | 41 ++ internal/provider/anthropic/bedrock.go | 44 ++ internal/server/web/proxy/anthropic.go | 7 +- internal/server/web/proxy/bedrock.go | 513 ++++++++++++++++++++++++ internal/server/web/proxy/middleware.go | 62 +++ internal/server/web/proxy/proxy.go | 8 + internal/util/util.go | 29 ++ 12 files changed, 742 insertions(+), 11 deletions(-) create mode 100644 internal/provider/anthropic/bedrock.go create mode 100644 internal/server/web/proxy/bedrock.go diff --git a/CHANGELOG.md b/CHANGELOG.md index c7802f8..802f541 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.36.0 - 2024-08-10 +### Added +- Added Amazon Bedrock integration for Claude models + ## 1.35.2 - 2024-08-10 ### Changed - Changed aggregated table column data types from `INT` to `BIGINT` diff --git a/go.mod b/go.mod index 40b6f34..b099bce 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,8 @@ require ( ) require ( + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 // indirect + github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.16.2 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 // indirect @@ -44,18 +46,18 @@ require ( github.com/Microsoft/go-winio v0.5.0 // indirect github.com/asticode/go-astikit v0.20.0 // indirect github.com/asticode/go-astits v1.8.0 // indirect - github.com/aws/aws-sdk-go-v2 v1.25.3 // indirect + github.com/aws/aws-sdk-go-v2 v1.30.5 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.17.7 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.3 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.3 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 // indirect github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.1 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.5 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.20.2 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.2 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.28.4 // indirect - github.com/aws/smithy-go v1.20.1 // indirect + github.com/aws/smithy-go v1.20.4 // indirect github.com/bytedance/sonic v1.9.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect diff --git a/go.sum b/go.sum index 85cb81a..aaf0e9b 100644 --- a/go.sum +++ b/go.sum @@ -10,6 +10,10 @@ github.com/asticode/go-astits v1.8.0 h1:rf6aiiGn/QhlFjNON1n5plqF3Fs025XLUwiQ0NB6 github.com/asticode/go-astits v1.8.0/go.mod h1:DkOWmBNQpnr9mv24KfZjq4JawCFX1FCqjLVGvO0DygQ= github.com/aws/aws-sdk-go-v2 v1.25.3 h1:xYiLpZTQs1mzvz5PaI6uR0Wh57ippuEthxS4iK5v0n0= github.com/aws/aws-sdk-go-v2 v1.25.3/go.mod h1:35hUlJVYd+M++iLI3ALmVwMOyRYMmRqUXpTtRGW+K9I= +github.com/aws/aws-sdk-go-v2 v1.30.5 h1:mWSRTwQAb0aLE17dSzztCVJWI9+cRMgqebndjwDyK0g= +github.com/aws/aws-sdk-go-v2 v1.30.5/go.mod h1:CT+ZPWXbYrci8chcARI3OmI/qgd+f6WtuLOoaIA8PR0= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 h1:70PVAiL15/aBMh5LThwgXdSQorVr91L127ttckI9QQU= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4/go.mod h1:/MQxMqci8tlqDH+pjmoLu1i0tbWCUP1hhyMRuFxpQCw= github.com/aws/aws-sdk-go-v2/config v1.27.7 h1:JSfb5nOQF01iOgxFI5OIKWwDiEXWTyTgg1Mm1mHi0A4= github.com/aws/aws-sdk-go-v2/config v1.27.7/go.mod h1:PH0/cNpoMO+B04qET699o5W92Ca79fVtbUnvMIZro4I= github.com/aws/aws-sdk-go-v2/credentials v1.17.7 h1:WJd+ubWKoBeRh7A5iNMnxEOs982SyVKOJD+K8HIezu4= @@ -18,10 +22,16 @@ github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.3 h1:p+y7FvkK2dxS+FEwRIDHDe/ github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.3/go.mod h1:/fYB+FZbDlwlAiynK9KDXlzZl3ANI9JkD0Uhz5FjNT4= github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.3 h1:ifbIbHZyGl1alsAhPIYsHOg5MuApgqOvVeI8wIugXfs= github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.3/go.mod h1:oQZXg3c6SNeY6OZrDY+xHcF4VGIEoNotX2B4PrDeoJI= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 h1:pI7Bzt0BJtYA0N/JEC6B8fJ4RBrEMi1LBrkMdFYNSnQ= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17/go.mod h1:Dh5zzJYMtxfIjYW+/evjQ8uj2OyR/ve2KROHGHlSFqE= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.3 h1:Qvodo9gHG9F3E8SfYOspPeBt0bjSbsevK8WhRAUHcoY= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.3/go.mod h1:vCKrdLXtybdf/uQd/YfVR2r5pcbNuEYKzMQpcxmeSJw= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 h1:Mqr/V5gvrhA2gvgnF42Zh5iMiQNcOYthFYwCyrnuWlc= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17/go.mod h1:aLJpZlCmjE+V+KtN1q1uyZkfnUWpQGpbsn89XPKyzfU= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 h1:hT8rVHwugYE2lEfdFE0QWVo81lF7jMrYJVDWI+f+VxU= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0/go.mod h1:8tu/lYfQfFe6IGnaOdrpVgEL2IrrDOf6/m9RQum4NkY= +github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.16.2 h1:hmzsX43PIJ8x+dwJwruqMjE2F8tZuCQMxVz9Vn0EZkc= +github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.16.2/go.mod h1:emMKL0OTFG+l9pW11RMgfvJRxZ5e093OS1o102YEGoA= github.com/aws/aws-sdk-go-v2/service/comprehend v1.31.2 h1:iAnydKItgi2m2rOPFfyolvjXuZimVZgRPxGlYg6Vt5U= github.com/aws/aws-sdk-go-v2/service/comprehend v1.31.2/go.mod h1:4jJr/hungAbvS0vQqkZQvxBqxJ4oUSEpvezYM75q2e4= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.1 h1:EyBZibRTVAs6ECHZOw5/wlylS9OcTzwyjeQMudmREjE= @@ -36,6 +46,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.28.4 h1:Ppup1nVNAOWbBOrcoOxaxPeEnSFB github.com/aws/aws-sdk-go-v2/service/sts v1.28.4/go.mod h1:+K1rNPVyGxkRuv9NNiaZ4YhBFuyw2MMA9SlIJ1Zlpz8= github.com/aws/smithy-go v1.20.1 h1:4SZlSlMr36UEqC7XOyRVb27XMeZubNcBNN+9IgEPIQw= github.com/aws/smithy-go v1.20.1/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E= +github.com/aws/smithy-go v1.20.4 h1:2HK1zBdPgRbjFOHlfeQZfpC4r72MOb9bZkiFwggKO+4= +github.com/aws/smithy-go v1.20.4/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= diff --git a/internal/authenticator/authenticator.go b/internal/authenticator/authenticator.go index 25c4bb4..005b67c 100644 --- a/internal/authenticator/authenticator.go +++ b/internal/authenticator/authenticator.go @@ -87,6 +87,10 @@ func rewriteHttpAuthHeader(req *http.Request, setting *provider.Setting) error { } if len(apiKey) == 0 { + if setting.Provider == "bedrock" { + return nil + } + return errors.New("api key is empty in provider setting") } diff --git a/internal/manager/provider_setting.go b/internal/manager/provider_setting.go index 29b0e6c..5b246d1 100644 --- a/internal/manager/provider_setting.go +++ b/internal/manager/provider_setting.go @@ -40,7 +40,7 @@ func NewProviderSettingsManager(s ProviderSettingsStorage, cache ProviderSetting } func isProviderNativelySupported(provider string) bool { - return provider == "openai" || provider == "anthropic" || provider == "azure" || provider == "vllm" || provider == "deepinfra" + return provider == "openai" || provider == "anthropic" || provider == "azure" || provider == "vllm" || provider == "deepinfra" || provider == "bedrock" } func findMissingAuthParams(providerName string, params map[string]string) string { @@ -55,6 +55,23 @@ func findMissingAuthParams(providerName string, params map[string]string) string return strings.Join(missingFields, " ,") } + if providerName == "bedrock" { + val := params["awsAccessKeyId"] + if len(val) == 0 { + missingFields = append(missingFields, "awsAccessKeyId") + } + + val = params["awsSecretAccessKey"] + if len(val) == 0 { + missingFields = append(missingFields, "awsSecretAccessKey") + } + + val = params["awsRegion"] + if len(val) == 0 { + missingFields = append(missingFields, "awsRegion") + } + } + if providerName == "azure" { val := params["resourceName"] if len(val) == 0 { diff --git a/internal/message/handler.go b/internal/message/handler.go index a7f0ee5..e216820 100644 --- a/internal/message/handler.go +++ b/internal/message/handler.go @@ -14,6 +14,7 @@ import ( "github.com/bricks-cloud/bricksllm/internal/provider/vllm" "github.com/bricks-cloud/bricksllm/internal/telemetry" "github.com/bricks-cloud/bricksllm/internal/user" + "github.com/bricks-cloud/bricksllm/internal/util" "github.com/tidwall/gjson" "go.uber.org/zap" @@ -468,6 +469,46 @@ func (h *Handler) decorateEvent(m Message) error { } } + if e.Event.Path == "/api/providers/bedrock/anthropic/v1/complete" { + cr, ok := e.Request.(*anthropic.CompletionRequest) + if !ok { + telemetry.Incr("bricksllm.message.handler.decorate_event.event_request_parsing_error", nil, 1) + h.log.Debug("event contains request that cannot be converted to anthropic completion request", zap.Any("data", m.Data)) + return errors.New("event request data cannot be parsed as anthropic completion request") + } + + if !cr.Stream { + tks := h.ae.Count(cr.Prompt) + tks += anthropicPromptMagicNum + + model := cr.Model + + translatedModel := util.TranslateBedrockModelToAnthropicModel(model) + cost, err := h.ae.EstimatePromptCost(translatedModel, tks) + if err != nil { + telemetry.Incr("bricksllm.message.handler.decorate_event.estimate_prompt_cost", nil, 1) + h.log.Debug("event contains request that cannot be converted to anthropic completion request", zap.Error(err)) + return err + } + + completiontks := h.ae.Count(e.Content) + completiontks += anthropicCompletionMagicNum + + completionCost, err := h.ae.EstimateCompletionCost(translatedModel, completiontks) + if err != nil { + telemetry.Incr("bricksllm.message.handler.decorate_event.estimate_completion_cost_error", nil, 1) + return err + } + + e.Event.PromptTokenCount = tks + + e.Event.CompletionTokenCount = completiontks + if e.Event.Status == http.StatusOK { + e.Event.CostInUsd = completionCost + cost + } + } + } + if strings.HasPrefix(e.Event.Path, "/api/providers/azure/openai/deployments") && strings.HasSuffix(e.Event.Path, "/chat/completions") { ccr, ok := e.Request.(*goopenai.ChatCompletionRequest) if !ok { diff --git a/internal/provider/anthropic/bedrock.go b/internal/provider/anthropic/bedrock.go new file mode 100644 index 0000000..d92929e --- /dev/null +++ b/internal/provider/anthropic/bedrock.go @@ -0,0 +1,44 @@ +package anthropic + +type BedrockCompletionRequest struct { + Prompt string `json:"prompt"` + MaxTokensToSample int `json:"max_tokens_to_sample"` + StopSequences []string `json:"stop_sequences,omitempty"` + Temperature float32 `json:"temperature,omitempty"` + TopP int `json:"top_p,omitempty"` + TopK int `json:"top_k,omitempty"` +} + +type BedrockCompletionResponse struct { + Completion string `json:"completion"` + StopReason string `json:"stop_reason"` + Model string `json:"model"` + Metrics *BedrockMetrics `json:"amazon-bedrock-invocationMetrics"` +} + +type BedrockMessageRequest struct { + AnthropicVersion string `json:"anthropic_version"` + Messages []Message `json:"messages"` + MaxTokens int `json:"max_tokens"` + StopSequences []string `json:"stop_sequences,omitempty"` + Temperature float32 `json:"temperature,omitempty"` + TopP int `json:"top_p,omitempty"` + TopK int `json:"top_k,omitempty"` + Metadata *Metadata `json:"metadata,omitempty"` +} + +type BedrockMessagesStopResponse struct { + Type string `json:"type"` + Metrics *BedrockMetrics `json:"amazon-bedrock-invocationMetrics"` +} + +type BedrockMetrics struct { + InputTokenCount int `json:"inputTokenCount"` + OutputTokenCount int `json:"outputTokenCount"` + InvocationLatency int `json:"invocationLatency"` + FirstByteLatency int `json:"firstByteLatency"` +} + +type BedrockMessageType struct { + Type string `json:"type"` +} diff --git a/internal/server/web/proxy/anthropic.go b/internal/server/web/proxy/anthropic.go index 97e79ed..9c4c902 100644 --- a/internal/server/web/proxy/anthropic.go +++ b/internal/server/web/proxy/anthropic.go @@ -386,11 +386,6 @@ func getMessagesHandler(prod, private bool, client http.Client, e anthropicEstim telemetry.Incr("bricksllm.proxy.get_messages_handler.estimate_total_cost_error", nil, 1) logError(log, "error when estimating anthropic cost", prod, err) } - - if err != nil { - telemetry.Incr("bricksllm.proxy.get_messages_handler.record_key_spend_error", nil, 1) - logError(log, "error when recording anthropic spend", prod, err) - } } c.Set("costInUsd", cost) @@ -402,7 +397,7 @@ func getMessagesHandler(prod, private bool, client http.Client, e anthropicEstim } if res.StatusCode != http.StatusOK { - dur := time.Now().Sub(start) + dur := time.Since(start) telemetry.Timing("bricksllm.proxy.get_messages_handler.error_latency", dur, nil, 1) telemetry.Incr("bricksllm.proxy.get_messages_handler.error_response", nil, 1) bytes, err := io.ReadAll(res.Body) diff --git a/internal/server/web/proxy/bedrock.go b/internal/server/web/proxy/bedrock.go new file mode 100644 index 0000000..37d9669 --- /dev/null +++ b/internal/server/web/proxy/bedrock.go @@ -0,0 +1,513 @@ +package proxy + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/bedrockruntime" + "github.com/aws/aws-sdk-go-v2/service/bedrockruntime/types" + "github.com/bricks-cloud/bricksllm/internal/provider/anthropic" + "github.com/bricks-cloud/bricksllm/internal/telemetry" + "github.com/bricks-cloud/bricksllm/internal/util" + "github.com/gin-gonic/gin" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" +) + +func getBedrockCompletionHandler(prod bool, e anthropicEstimator, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + body, err := io.ReadAll(c.Request.Body) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.read_all_error", nil, 1) + log.Error("error when reading claude req data from body", []zapcore.Field{zap.Error(err)}...) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read claude req data from body") + return + } + + anthropicReq := &anthropic.CompletionRequest{} + err = json.Unmarshal(body, anthropicReq) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.unmarshal_anthropic_completion_request_error", nil, 1) + log.Error("error when unmarshalling anthropic completion request", []zapcore.Field{zap.Error(err)}...) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to unmarshal anthropic completion request") + return + } + + req := &anthropic.BedrockCompletionRequest{} + err = json.Unmarshal(body, req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.unmarshal_bedrock_completion_request_error", nil, 1) + log.Error("error when unmarshalling bedrock completion request", []zapcore.Field{zap.Error(err)}...) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to unmarshal bedrock completion request") + return + } + + bs, err := json.Marshal(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.marshal_bedrock_completion_request_error", nil, 1) + log.Error("error when marshalling bedrock completion request", []zapcore.Field{zap.Error(err)}...) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to marshal bedrock completion request") + return + } + + keyId := c.GetString("awsAccessKeyId") + secretKey := c.GetString("awsSecretAccessKey") + region := c.GetString("awsRegion") + + if len(keyId) == 0 || len(secretKey) == 0 || len(region) == 0 { + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.auth_error", nil, 1) + log.Error("key id, secret key or region is missing", []zapcore.Field{zap.Error(err)}...) + JSON(c, http.StatusUnauthorized, "[BricksLLM] auth credentials are missing") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + cfg, err := config.LoadDefaultConfig(ctx, + config.WithCredentialsProvider(credentials.StaticCredentialsProvider{ + Value: aws.Credentials{ + AccessKeyID: keyId, SecretAccessKey: secretKey, + Source: "BricksLLM Credentials", + }, + }), + config.WithRegion(region)) + + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.aws_config_creation_error", nil, 1) + log.Error("error when creating aws config", []zapcore.Field{zap.Error(err)}...) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create aws config") + return + } + + client := bedrockruntime.NewFromConfig(cfg) + stream := c.GetBool("stream") + + ctx, cancel = context.WithTimeout(context.Background(), timeOut) + defer cancel() + + start := time.Now() + + if !stream { + output, err := client.InvokeModel(ctx, &bedrockruntime.InvokeModelInput{ + ModelId: &anthropicReq.Model, + ContentType: aws.String("application/json"), + Body: bs, + }) + + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.error_response", nil, 1) + telemetry.Timing("bricksllm.proxy.get_bedrock_completion_handler.error_latency", time.Since(start), nil, 1) + + log.Error("error when invoking bedrock model", []zapcore.Field{zap.Error(err)}...) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to invoke bedrock model") + return + } + + completionRes := &anthropic.BedrockCompletionResponse{} + err = json.Unmarshal(output.Body, completionRes) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.unmarshal_bedrock_completion_response_error", nil, 1) + logError(log, "error when unmarshalling bedrock anthropic completion response body", prod, err) + } + + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_bedrock_completion_handler.success_latency", time.Since(start), nil, 1) + + c.Set("content", completionRes.Completion) + + c.Data(http.StatusOK, "application/json", output.Body) + return + } + + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.streaming_requests", nil, 1) + + streamOutput, err := client.InvokeModelWithResponseStream(ctx, &bedrockruntime.InvokeModelWithResponseStreamInput{ + ModelId: &anthropicReq.Model, + ContentType: aws.String("application/json"), + Body: bs, + }) + + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.invoking_model_with_streaming_response_error", nil, 1) + log.Error("error when invoking bedrock model with streaming responses", []zapcore.Field{zap.Error(err)}...) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to invoke bedrock model with stream response") + return + } + + streamingResponse := [][]byte{} + promptTokenCount := 0 + completionTokenCount := 0 + + defer func() { + model := c.GetString("model") + translatedModel := util.TranslateBedrockModelToAnthropicModel(model) + compeltionCost, err := e.EstimateCompletionCost(translatedModel, completionTokenCount) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.estimate_completion_cost_error", nil, 1) + logError(log, "error when estimating bedrock completion cost", prod, err) + } + + promptCost, err := e.EstimatePromptCost(translatedModel, promptTokenCount) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.estimate_prompt_cost_error", nil, 1) + logError(log, "error when estimating bedrock prompt cost", prod, err) + } + + c.Set("costInUsd", compeltionCost+promptCost) + c.Set("promptTokenCount", promptTokenCount) + c.Set("completionTokenCount", completionTokenCount) + c.Set("streaming_response", bytes.Join(streamingResponse, []byte{'\n'})) + }() + + eventName := "" + c.Stream(func(w io.Writer) bool { + for event := range streamOutput.GetStream().Events() { + switch v := event.(type) { + case *types.ResponseStreamMemberChunk: + raw := v.Value.Bytes + noSpaceLine := bytes.TrimSpace(raw) + if len(noSpaceLine) == 0 { + return true + } + + eventName = getEventNameFromLine(noSpaceLine) + if len(eventName) == 0 { + return true + } + + chatCompletionResp := &anthropic.BedrockCompletionResponse{} + if eventName == " completion" { + err := json.NewDecoder(bytes.NewReader(noSpaceLine)).Decode(&chatCompletionResp) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.bedrock_completion_stream_response_unmarshall_error", nil, 1) + log.Error("error when unmarshalling bedrock streaming response chunks", []zapcore.Field{zap.Error(err)}...) + return false + } + + if chatCompletionResp.Metrics != nil { + promptTokenCount = chatCompletionResp.Metrics.InputTokenCount + completionTokenCount = chatCompletionResp.Metrics.OutputTokenCount + } + } + + noPrefixLine := bytes.TrimPrefix(noSpaceLine, headerData) + c.SSEvent(eventName, " "+string(noPrefixLine)) + + streamingResponse = append(streamingResponse, raw) + if len(chatCompletionResp.StopReason) != 0 { + return false + } + default: + telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.bedrock_completion_stream_response_unkown_error", nil, 1) + return false + } + } + + telemetry.Timing("bricksllm.proxy.get_bedrock_completion_handler.streaming_latency", time.Since(start), nil, 1) + return false + }) + } +} + +var ( + bedrockEventMessageStart = []byte(`{"type":"message_start"`) + bedrockEventMessageDelta = []byte(`{"type":"message_delta"`) + bedrockEventMessageStop = []byte(`{"type":"message_stop"`) + bedrockEventContentBlockStart = []byte(`{"type":"content_block_start"`) + bedrockEventContentBlockDelta = []byte(`{"type":"content_block_delta"`) + bedrockEventContentBlockStop = []byte(`{"type":"content_block_stop"`) + bedrockEventPing = []byte(`{"type":"ping"`) + bedrockEventError = []byte(`{"type":"error"`) + bedrockEventCompletion = []byte(`{"type":"completion"`) +) + +func getEventNameFromLine(line []byte) string { + if bytes.HasPrefix(line, bedrockEventMessageStart) { + return " message_start" + } + + if bytes.HasPrefix(line, bedrockEventMessageDelta) { + return " message_delta" + } + + if bytes.HasPrefix(line, bedrockEventMessageStop) { + return " message_stop" + } + + if bytes.HasPrefix(line, bedrockEventContentBlockStart) { + return " content_block_start" + } + + if bytes.HasPrefix(line, bedrockEventContentBlockDelta) { + return " content_block_delta" + } + + if bytes.HasPrefix(line, bedrockEventContentBlockStop) { + return " content_block_stop" + } + + if bytes.HasPrefix(line, bedrockEventPing) { + return " ping" + } + + if bytes.HasPrefix(line, bedrockEventError) { + return " error" + } + + if bytes.HasPrefix(line, bedrockEventCompletion) { + return " completion" + } + + return "" +} + +func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + body, err := io.ReadAll(c.Request.Body) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.read_all_error", nil, 1) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to read claude req data from body") + return + } + + anthropicReq := &anthropic.MessagesRequest{} + err = json.Unmarshal(body, anthropicReq) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.unmarshal_anthropic_messages_request_error", nil, 1) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to unmarshal anthropic messages request") + return + } + + req := &anthropic.BedrockMessageRequest{} + err = json.Unmarshal(body, req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.unmarshal_bedrock_messages_request_error", nil, 1) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to unmarshal bedrock messages request") + return + } + + bs, err := json.Marshal(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.marshal_error", nil, 1) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to marshal bedrock messages request") + return + } + + keyId := c.GetString("awsAccessKeyId") + secretKey := c.GetString("awsSecretAccessKey") + region := c.GetString("awsRegion") + + if len(keyId) == 0 || len(secretKey) == 0 || len(region) == 0 { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.auth_error", nil, 1) + log.Error("key id, secret key or region is missing", []zapcore.Field{zap.Error(err)}...) + JSON(c, http.StatusUnauthorized, "[BricksLLM] auth credentials are missing") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), timeOut) + defer cancel() + cfg, err := config.LoadDefaultConfig(ctx, + config.WithCredentialsProvider(credentials.StaticCredentialsProvider{ + Value: aws.Credentials{ + AccessKeyID: keyId, SecretAccessKey: secretKey, + Source: "BricksLLM Credentials", + }, + }), + config.WithRegion(region)) + + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.aws_config_creation_error", nil, 1) + log.Error("error when creating aws config", []zapcore.Field{zap.Error(err)}...) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to create aws config") + return + } + + client := bedrockruntime.NewFromConfig(cfg) + stream := c.GetBool("stream") + + ctx, cancel = context.WithTimeout(context.Background(), timeOut) + defer cancel() + + start := time.Now() + + if !stream { + output, err := client.InvokeModel(ctx, &bedrockruntime.InvokeModelInput{ + ModelId: &anthropicReq.Model, + ContentType: aws.String("application/json"), + Body: bs, + }) + + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.error_response", nil, 1) + telemetry.Timing("bricksllm.proxy.get_bedrock_messages_handler.error_latency", time.Since(start), nil, 1) + + log.Error("error when invoking bedrock model", []zapcore.Field{zap.Error(err)}...) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to invoke bedrock model") + return + } + + var cost float64 = 0 + completionTokens := 0 + promptTokens := 0 + + messagesRes := &anthropic.MessagesResponse{} + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_bedrock_messages_handler.success_latency", time.Since(start), nil, 1) + + err = json.Unmarshal(output.Body, messagesRes) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.unmarshal_bedrock_messages_response_error", nil, 1) + logError(log, "error when unmarshalling bedrock messages response body", prod, err) + } + + if err == nil { + completionTokens = messagesRes.Usage.OutputTokens + promptTokens = messagesRes.Usage.InputTokens + + model := c.GetString("model") + translated := util.TranslateBedrockModelToAnthropicModel(model) + + cost, err = e.EstimateTotalCost(translated, promptTokens, completionTokens) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.estimate_total_cost_error", nil, 1) + logError(log, "error when estimating anthropic cost", prod, err) + } + } + + c.Set("costInUsd", cost) + c.Set("promptTokenCount", promptTokens) + c.Set("completionTokenCount", completionTokens) + + c.Data(http.StatusOK, "application/json", output.Body) + return + } + + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.streaming_requests", nil, 1) + + streamOutput, err := client.InvokeModelWithResponseStream(ctx, &bedrockruntime.InvokeModelWithResponseStreamInput{ + ModelId: &anthropicReq.Model, + ContentType: aws.String("application/json"), + Body: bs, + }) + + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.invoking_model_with_streaming_response_error", nil, 1) + + log.Error("error when invoking bedrock model with streaming responses", []zapcore.Field{zap.Error(err)}...) + JSON(c, http.StatusInternalServerError, "[BricksLLM] failed to invoke model request with stream response") + return + } + + streamingResponse := [][]byte{} + promptTokenCount := 0 + completionTokenCount := 0 + + defer func() { + model := c.GetString("model") + translatedModel := util.TranslateBedrockModelToAnthropicModel(model) + compeltionCost, err := e.EstimateCompletionCost(translatedModel, completionTokenCount) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.estimate_completion_cost_error", nil, 1) + logError(log, "error when estimating bedrock completion cost", prod, err) + } + + promptCost, err := e.EstimatePromptCost(translatedModel, promptTokenCount) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.estimate_prompt_cost_error", nil, 1) + logError(log, "error when estimating bedrock prompt cost", prod, err) + } + + c.Set("costInUsd", compeltionCost+promptCost) + c.Set("promptTokenCount", promptTokenCount) + c.Set("completionTokenCount", completionTokenCount) + c.Set("streaming_response", bytes.Join(streamingResponse, []byte{'\n'})) + }() + + eventName := "" + c.Stream(func(w io.Writer) bool { + content := "" + for event := range streamOutput.GetStream().Events() { + switch v := event.(type) { + case *types.ResponseStreamMemberChunk: + raw := v.Value.Bytes + streamingResponse = append(streamingResponse, raw) + + noSpaceLine := bytes.TrimSpace(raw) + if len(noSpaceLine) == 0 { + return true + } + + eventName = getEventNameFromLine(noSpaceLine) + if len(eventName) == 0 { + return true + } + + if eventName == " message_stop" { + stopResp := &anthropic.BedrockMessagesStopResponse{} + err := json.NewDecoder(bytes.NewReader(raw)).Decode(&stopResp) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.bedrock_messages_stop_response_unmarshall_error", nil, 1) + log.Error("error when unmarshalling bedrock messages stop response response chunks", []zapcore.Field{zap.Error(err)}...) + + return false + } + + if stopResp.Metrics != nil { + promptTokenCount = stopResp.Metrics.InputTokenCount + completionTokenCount = stopResp.Metrics.OutputTokenCount + } + } + + if eventName == " content_block_delta" { + chatCompletionResp := &anthropic.MessagesStreamBlockDelta{} + err := json.NewDecoder(bytes.NewReader(raw)).Decode(&chatCompletionResp) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.bedrock_messages_content_block_response_unmarshall_error", nil, 1) + log.Error("error when unmarshalling bedrock messages content block response chunks", []zapcore.Field{zap.Error(err)}...) + + return false + } + + content += chatCompletionResp.Delta.Text + } + + c.SSEvent(eventName, " "+string(noSpaceLine)) + + if eventName == " message_stop" { + return false + } + default: + + telemetry.Timing("bricksllm.proxy.get_bedrock_messages_handler.streaming_latency", time.Since(start), nil, 1) + return false + } + } + + telemetry.Timing("bricksllm.proxy.get_bedrock_messages_handler.streaming_latency", time.Since(start), nil, 1) + return false + }) + } +} diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go index a389b7e..30b9eb0 100644 --- a/internal/server/web/proxy/middleware.go +++ b/internal/server/web/proxy/middleware.go @@ -348,6 +348,20 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag } } + if strings.HasPrefix(c.FullPath(), "/api/providers/bedrock/anthropic") { + if selected != nil && len(selected.Setting["awsAccessKeyId"]) != 0 { + c.Set("awsAccessKeyId", selected.Setting["awsAccessKeyId"]) + } + + if selected != nil && len(selected.Setting["awsSecretAccessKey"]) != 0 { + c.Set("awsSecretAccessKey", selected.Setting["awsSecretAccessKey"]) + } + + if selected != nil && len(selected.Setting["awsRegion"]) != 0 { + c.Set("awsRegion", selected.Setting["awsRegion"]) + } + } + if strings.HasPrefix(c.FullPath(), "/api/providers/vllm") { if selected != nil && len(selected.Setting["url"]) != 0 { c.Set("vllmUrl", selected.Setting["url"]) @@ -402,6 +416,54 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag policyInput = cr } + if c.FullPath() == "/api/providers/bedrock/anthropic/v1/complete" { + logCompletionRequest(logWithCid, body, prod, private) + + cr := &anthropic.CompletionRequest{} + err = json.Unmarshal(body, cr) + if err != nil { + logError(logWithCid, "error when unmarshalling bedrock anthropic completion request", prod, err) + return + } + + if cr.Metadata != nil { + userId = cr.Metadata.UserId + } + + enrichedEvent.Request = cr + + if cr.Stream { + c.Set("stream", cr.Stream) + } + + c.Set("model", cr.Model) + + policyInput = cr + } + + if c.FullPath() == "/api/providers/bedrock/anthropic/v1/messages" { + logCreateMessageRequest(logWithCid, body, prod, private) + + mr := &anthropic.MessagesRequest{} + err = json.Unmarshal(body, mr) + if err != nil { + logError(logWithCid, "error when unmarshalling anthropic messages request", prod, err) + return + } + + if mr.Metadata != nil { + userId = mr.Metadata.UserId + } + + if mr.Stream { + c.Set("stream", mr.Stream) + } + + c.Set("model", mr.Model) + + policyInput = mr + } + if c.FullPath() == "/api/providers/anthropic/v1/messages" { logCreateMessageRequest(logWithCid, body, prod, private) diff --git a/internal/server/web/proxy/proxy.go b/internal/server/web/proxy/proxy.go index 5a92dc4..c7d9641 100644 --- a/internal/server/web/proxy/proxy.go +++ b/internal/server/web/proxy/proxy.go @@ -181,6 +181,10 @@ func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyMan router.POST("/api/providers/anthropic/v1/complete", getCompletionHandler(prod, private, client, timeOut)) router.POST("/api/providers/anthropic/v1/messages", getMessagesHandler(prod, private, client, ae, timeOut)) + // bedrock anthropic + router.POST("/api/providers/bedrock/anthropic/v1/complete", getBedrockCompletionHandler(prod, ae, timeOut)) + router.POST("/api/providers/bedrock/anthropic/v1/messages", getBedrockMessagesHandler(prod, ae, timeOut)) + // vllm router.POST("/api/providers/vllm/v1/chat/completions", getVllmChatCompletionsHandler(prod, private, client, timeOut)) router.POST("/api/providers/vllm/v1/completions", getVllmCompletionsHandler(prod, private, client, timeOut)) @@ -992,6 +996,10 @@ func (ps *ProxyServer) Run() { ps.log.Info("PORT 8002 | POST | /api/providers/anthropic/v1/complete is ready for forwarding completion requests to anthropic") ps.log.Info("PORT 8002 | POST | /api/providers/anthropic/v1/messages is ready for forwarding message requests to anthropic") + // bedrock anthropic + ps.log.Info("PORT 8002 | POST | /api/providers/bedrock/anthropic/v1/complete is ready for forwarding completion requests to bedrock anthropic") + ps.log.Info("PORT 8002 | POST | /api/providers/bedrock/anthropic/v1/messages is ready for forwarding message requests to bedrock anthropic") + // vllm ps.log.Info("PORT 8002 | POST | /api/providers/vllm/v1/chat/completions is ready for forwarding vllm chat completions requests") ps.log.Info("PORT 8002 | POST | /api/providers/vllm/v1/completions is ready for forwarding vllm completions requests") diff --git a/internal/util/util.go b/internal/util/util.go index ae5d353..2ee40d5 100644 --- a/internal/util/util.go +++ b/internal/util/util.go @@ -3,6 +3,7 @@ package util import ( "context" "errors" + "strings" "github.com/gin-gonic/gin" "github.com/google/uuid" @@ -50,3 +51,31 @@ func ConvertAnyToStr(input any) (string, error) { return converted, nil } + +func TranslateBedrockModelToAnthropicModel(model string) string { + if strings.HasPrefix(model, "anthropic.claude-v2") { + return "claude" + } + + if strings.HasPrefix(model, "anthropic.claude-3-haiku") { + return "claude-3-haiku" + } + + if strings.HasPrefix(model, "anthropic.claude-3-sonnet") { + return "claude-3-sonnet" + } + + if strings.HasPrefix(model, "anthropic.claude-3-opus") { + return "claude-3-opus" + } + + if strings.HasPrefix(model, "anthropic.claude-3-5-sonnet") { + return "claude-3.5-sonnet" + } + + if strings.HasPrefix(model, "anthropic.claude-instant") { + return "claude-instant" + } + + return model +} From 9f42b412bdafeecf87f6dcb976962c55eb3e8cdc Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sun, 8 Sep 2024 22:32:16 -0700 Subject: [PATCH 11/51] update doc --- docs/admin.yaml | 14 ++++++++++-- docs/proxy.yaml | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/docs/admin.yaml b/docs/admin.yaml index 08ee479..a022938 100644 --- a/docs/admin.yaml +++ b/docs/admin.yaml @@ -1423,8 +1423,6 @@ components: type: object description: API Credentials associated with different providers. example: { "apikey": "MY_OPENAI_API_KEY" } - required: - - apikey properties: apikey: type: string @@ -1438,6 +1436,18 @@ components: type: string example: MY_AZURE_OPENAI_RESOURCE_NAME description: Required for Azure OpenAI integrations. + awsAccessKeyId: + type: string + example: MY_AWS_ACCESS_KEY_ID + description: Required for Bedrock Anthropic integrations. + awsSecretAccessKey: + type: string + example: MY_AWS_SECRET_ACCESS_KEY + description: Required for Bedrock Anthropic integrations. + awsRegion: + type: string + example: MY_AWS_REGION + description: Required for Bedrock Anthropic integrations. ReportingEventsRequest: type: object diff --git a/docs/proxy.yaml b/docs/proxy.yaml index 1e07fdb..a0613ac 100644 --- a/docs/proxy.yaml +++ b/docs/proxy.yaml @@ -14,6 +14,7 @@ tags: - name: DeepInfra - name: vLLM - name: Anthropic + - name: Bedrock - name: Azure - name: Custom Providers - name: Route @@ -1168,6 +1169,16 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: Content-Type + schema: + type: string + description: Content type of the request. + - in: header + name: anthropic-version + schema: + type: string + description: Anthropic version. tags: - Anthropic summary: Create Anthropic completion @@ -1186,11 +1197,57 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: Content-Type + schema: + type: string + description: Content type of the request. + - in: header + name: anthropic-version + schema: + type: string + description: Anthropic version. tags: - Anthropic summary: Create Anthropic messages description: This endpoint is set up for proxying Anthropic messages requests. Documentation for this endpoint can be found [here](https://docs.anthropic.com/claude/reference/messages_post). + /api/providers/bedrock/anthropic/v1/complete: + post: + parameters: + - in: header + name: X-CUSTOM-EVENT-ID + schema: + type: string + description: Custom Id that can be used to retrieve an event associated with each proxy request. + - in: header + name: X-METADATA + schema: + type: string + description: Metadata in stringified JSON format. + tags: + - Bedrock + summary: Create Bedrock Anthropic completion + description: This endpoint is set up for proxying Bedrock Anthropic completion requests. Documentation for this endpoint can be found [here](https://docs.anthropic.com/claude/reference/complete_post). + + /api/providers/bedrock/anthropic/v1/messages: + post: + parameters: + - in: header + name: X-CUSTOM-EVENT-ID + schema: + type: string + description: Custom Id that can be used to retrieve an event associated with each proxy request. + - in: header + name: Content-Type + schema: + type: string + description: Content type of the request. + tags: + - Bedrock + summary: Creat Bedrock Anthropic messages + description: This endpoint is set up for proxying Bedrock Anthropic messages requests. Documentation for this endpoint can be found [here](https://docs.anthropic.com/claude/reference/messages_post). Request body must include an additional field called `anthropic-version``. + /api/providers/vllm/v1/chat/completions: post: parameters: From 5dc1e681d75a7691f3b1522f9b023af1f5dfbc0d Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Wed, 11 Sep 2024 18:21:23 -0700 Subject: [PATCH 12/51] fix provider selection issue --- internal/authenticator/authenticator.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/authenticator/authenticator.go b/internal/authenticator/authenticator.go index 005b67c..38c53b8 100644 --- a/internal/authenticator/authenticator.go +++ b/internal/authenticator/authenticator.go @@ -156,6 +156,10 @@ func (a *Authenticator) getProviderSettingsThatCanAccessCustomRoute(path string, } func canAccessPath(provider string, path string) bool { + if provider == "bedrock" && !strings.HasPrefix(path, "/api/providers/bedrock") { + return false + } + if provider == "openai" && !strings.HasPrefix(path, "/api/providers/openai") { return false } From 8682f5e50a1712c5cce727f50feafa6f516bac1d Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Wed, 11 Sep 2024 18:25:26 -0700 Subject: [PATCH 13/51] update CHANGELOG --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 802f541..ab2a2ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ -## 1.36.0 - 2024-08-10 +## 1.36.1 - 2024-09-10 +### Fixed +- Fixed provider selection issue when a key is associated with multiple providers + +## 1.36.0 - 2024-09-09 ### Added - Added Amazon Bedrock integration for Claude models From 0750ef86da6b7f75aa08234a81092f35ffb8eef3 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Fri, 13 Sep 2024 08:30:13 -0700 Subject: [PATCH 14/51] fixed compatibility issues --- CHANGELOG.md | 4 ++++ internal/server/web/proxy/bedrock.go | 10 +++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab2a2ee..f5cf5df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.36.2 - 2024-09-13 +### Fixed +- Fixed compatibility issues between Anthropic SDK and AWS Bedrock + ## 1.36.1 - 2024-09-10 ### Fixed - Fixed provider selection issue when a key is associated with multiple providers diff --git a/internal/server/web/proxy/bedrock.go b/internal/server/web/proxy/bedrock.go index 37d9669..83ff891 100644 --- a/internal/server/web/proxy/bedrock.go +++ b/internal/server/web/proxy/bedrock.go @@ -21,6 +21,12 @@ import ( "go.uber.org/zap/zapcore" ) +func setAnthropicVersionIfExists(version string, req *anthropic.BedrockMessageRequest) { + if req != nil && len(version) > 0 { + req.AnthropicVersion = version + } +} + func getBedrockCompletionHandler(prod bool, e anthropicEstimator, timeOut time.Duration) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) @@ -309,6 +315,8 @@ func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Dur return } + setAnthropicVersionIfExists(c.GetHeader("anthropic-version"), req) + bs, err := json.Marshal(req) if err != nil { telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.marshal_error", nil, 1) @@ -410,6 +418,7 @@ func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Dur streamOutput, err := client.InvokeModelWithResponseStream(ctx, &bedrockruntime.InvokeModelWithResponseStreamInput{ ModelId: &anthropicReq.Model, ContentType: aws.String("application/json"), + Accept: aws.String("application/json"), Body: bs, }) @@ -500,7 +509,6 @@ func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Dur return false } default: - telemetry.Timing("bricksllm.proxy.get_bedrock_messages_handler.streaming_latency", time.Since(start), nil, 1) return false } From aa6321c106f392b0810199da8b9134829ec58839 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Mon, 16 Sep 2024 15:58:23 -0700 Subject: [PATCH 15/51] add support for openai o1 --- internal/provider/openai/cost.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go index adcdd73..152d310 100644 --- a/internal/provider/openai/cost.go +++ b/internal/provider/openai/cost.go @@ -34,6 +34,8 @@ func parseFinetuneModel(model string) string { var OpenAiPerThousandTokenCost = map[string]map[string]float64{ "prompt": { + "o1-preview": 0.015, + "o1-preview-2024-09-12": 0.015, "gpt-4o": 0.005, "gpt-4o-mini": 0.00015, "gpt-4o-mini-2024-07-18": 0.00015, @@ -96,6 +98,8 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{ "tts-1-hd": 0.03, }, "completion": { + "o1-preview": 0.06, + "o1-preview-2024-09-12": 0.06, "gpt-3.5-turbo-1106": 0.002, "gpt-4o": 0.015, "gpt-4o-mini": 0.0006, From 805ef731a3e63130cb9d704919c652ce7f0caa92 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Mon, 16 Sep 2024 16:00:33 -0700 Subject: [PATCH 16/51] update CHANGElOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5cf5df..7b21719 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.36.3 - 2024-09-16 +### Added +- Added support for OpenAI o1 models + ## 1.36.2 - 2024-09-13 ### Fixed - Fixed compatibility issues between Anthropic SDK and AWS Bedrock From 0976748f354874c48ac6f126862be705090072b7 Mon Sep 17 00:00:00 2001 From: Phuong Le Date: Thu, 10 Oct 2024 18:09:22 +0100 Subject: [PATCH 17/51] remove redundant gpt-4o entries from supported models --- internal/manager/route.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/internal/manager/route.go b/internal/manager/route.go index e499a5d..cb5739e 100644 --- a/internal/manager/route.go +++ b/internal/manager/route.go @@ -115,8 +115,6 @@ var ( "gpt-4-32k-0613", "gpt-4-32k-0314", "gpt-4-turbo", - "gpt-4o-2024-05-13", - "gpt-4o", "gpt-35-turbo", "gpt-35-turbo-0125", "gpt-35-turbo-1106", @@ -146,8 +144,6 @@ var ( "gpt-4-turbo-2024-04-09", "gpt-4-1106-preview", "gpt-4-turbo-preview", - "gpt-4o-2024-05-13", - "gpt-4o", "gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-3.5-turbo-1106", @@ -177,8 +173,6 @@ var ( "gpt-4-turbo-2024-04-09", "gpt-4-1106-preview", "gpt-4-turbo-preview", - "gpt-4o-2024-05-13", - "gpt-4o", "gpt-35-turbo", "gpt-35-turbo-0125", "gpt-35-turbo-1106", @@ -232,8 +226,6 @@ var ( "gpt-4-turbo-2024-04-09", "gpt-4-1106-preview", "gpt-4-turbo-preview", - "gpt-4o-2024-05-13", - "gpt-4o", "gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-3.5-turbo-1106", From 7aedf1c6add6e56af532af56efdee446dca7bc21 Mon Sep 17 00:00:00 2001 From: Phuong Le Date: Thu, 10 Oct 2024 18:31:17 +0100 Subject: [PATCH 18/51] add `gpt-4o-mini` to supported models --- internal/manager/route.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/manager/route.go b/internal/manager/route.go index cb5739e..6a2d07b 100644 --- a/internal/manager/route.go +++ b/internal/manager/route.go @@ -103,6 +103,7 @@ var ( azureSupportedModels = []string{ "gpt-4o-2024-05-13", "gpt-4o", + "gpt-4o-mini", "gpt-4-turbo-2024-04-09", "gpt-4-0125-preview", "gpt-4-vision-preview", @@ -129,6 +130,7 @@ var ( openaiSupportedModels = []string{ "gpt-4o-2024-05-13", "gpt-4o", + "gpt-4o-mini", "gpt-4-turbo-2024-04-09", "gpt-4-0125-preview", "gpt-4-vision-preview", @@ -158,6 +160,7 @@ var ( supportedModels = []string{ "gpt-4o-2024-05-13", "gpt-4o", + "gpt-4o-mini", "gpt-4-turbo-2024-04-09", "gpt-4-0125-preview", "gpt-4-vision-preview", @@ -211,6 +214,7 @@ var ( "gpt-35-turbo-16k-0613", "gpt-4o-2024-05-13", "gpt-4o", + "gpt-4o-mini", "gpt-4-turbo-2024-04-09", "gpt-4-0125-preview", "gpt-4-vision-preview", From b0be28eaadec7eba86f2f4c3b667b6daee0b06ea Mon Sep 17 00:00:00 2001 From: Phuong Le Date: Thu, 10 Oct 2024 18:31:48 +0100 Subject: [PATCH 19/51] remove redundant attribute --- docker-compose.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 07f9717..a044fa1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,3 @@ -version: '3.8' services: redis: image: redis:6.2-alpine @@ -38,4 +37,4 @@ volumes: redis: driver: local postgresql: - driver: local \ No newline at end of file + driver: local From 193b3625b8ea2a7f055dca434d57998d81dcf414 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Tue, 15 Oct 2024 08:39:43 -0700 Subject: [PATCH 20/51] update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b21719..4e3a3c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.36.4 - 2024-09-16 +### Added +- Added support for `gpt-4o-mini` in routes + ## 1.36.3 - 2024-09-16 ### Added - Added support for OpenAI o1 models From 677331419a67f3dea68512f4cae7deb93d68e7ab Mon Sep 17 00:00:00 2001 From: Amirhesam Adibinia Date: Wed, 16 Oct 2024 17:27:52 +0330 Subject: [PATCH 21/51] chore: update gpt-4o models' prices --- internal/provider/openai/cost.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go index 152d310..dfa9040 100644 --- a/internal/provider/openai/cost.go +++ b/internal/provider/openai/cost.go @@ -36,11 +36,11 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{ "prompt": { "o1-preview": 0.015, "o1-preview-2024-09-12": 0.015, - "gpt-4o": 0.005, + "gpt-4o": 0.0025, "gpt-4o-mini": 0.00015, "gpt-4o-mini-2024-07-18": 0.00015, "gpt-4o-2024-05-13": 0.005, - "gpt-4o-2024-08-06": 0.005, + "gpt-4o-2024-08-06": 0.0025, "gpt-4-1106-preview": 0.01, "gpt-4-turbo-preview": 0.01, "gpt-4-turbo": 0.01, @@ -101,11 +101,11 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{ "o1-preview": 0.06, "o1-preview-2024-09-12": 0.06, "gpt-3.5-turbo-1106": 0.002, - "gpt-4o": 0.015, + "gpt-4o": 0.01, "gpt-4o-mini": 0.0006, "gpt-4o-mini-2024-07-18": 0.0006, "gpt-4o-2024-05-13": 0.015, - "gpt-4o-2024-08-06": 0.015, + "gpt-4o-2024-08-06": 0.01, "gpt-4-turbo-preview": 0.03, "gpt-4-turbo": 0.03, "gpt-4-turbo-2024-04-09": 0.03, From 8b3c614f3a91c2b463a58dfd13e097b6fd31a0ea Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Wed, 16 Oct 2024 07:16:01 -0700 Subject: [PATCH 22/51] update CHANGELOG --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e3a3c8..4e60250 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ -## 1.36.4 - 2024-09-16 +## 1.36.4 - 2024-10-16 +### Changed +- Updated `gpt-4o` pricing according to OpenAI updates + +## 1.36.4 - 2024-10-15 ### Added - Added support for `gpt-4o-mini` in routes From 0c74013d62b700bf33d2d81c72fa9eadf6a1d6db Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Wed, 16 Oct 2024 07:17:44 -0700 Subject: [PATCH 23/51] update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e60250..39fe41f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 1.36.4 - 2024-10-16 +## 1.36.5 - 2024-10-16 ### Changed - Updated `gpt-4o` pricing according to OpenAI updates From d792ed81b8e26601ebc53646ebd51b4055b5105a Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Wed, 23 Oct 2024 18:59:31 -0700 Subject: [PATCH 24/51] add request level timeout --- internal/server/web/proxy/anthropic.go | 11 +- internal/server/web/proxy/audio.go | 12 +- .../server/web/proxy/azure_chat_completion.go | 4 +- internal/server/web/proxy/azure_completion.go | 4 +- internal/server/web/proxy/azure_embedding.go | 4 +- internal/server/web/proxy/bedrock.go | 12 +- internal/server/web/proxy/chat_completion.go | 4 +- internal/server/web/proxy/custom_provider.go | 4 +- internal/server/web/proxy/deepinfra.go | 12 +- internal/server/web/proxy/embedding.go | 4 +- internal/server/web/proxy/proxy.go | 157 +++++++++--------- .../server/web/proxy/timeout_middleware.go | 33 ++++ internal/server/web/proxy/vector_store.go | 20 +-- .../server/web/proxy/vector_store_file.go | 16 +- .../web/proxy/vector_store_file_batch.go | 16 +- internal/server/web/proxy/vllm.go | 8 +- 16 files changed, 178 insertions(+), 143 deletions(-) create mode 100644 internal/server/web/proxy/timeout_middleware.go diff --git a/internal/server/web/proxy/anthropic.go b/internal/server/web/proxy/anthropic.go index 9c4c902..5853ed3 100644 --- a/internal/server/web/proxy/anthropic.go +++ b/internal/server/web/proxy/anthropic.go @@ -30,7 +30,7 @@ type anthropicEstimator interface { func copyHttpHeaders(source *http.Request, dest *http.Request, removeUseAgent bool) { for k := range source.Header { - if strings.ToLower(k) != "X-CUSTOM-EVENT-ID" { + if strings.ToLower(k) != "x-custom-event-id" { dest.Header.Set(k, source.Header.Get(k)) } } @@ -42,7 +42,7 @@ func copyHttpHeaders(source *http.Request, dest *http.Request, removeUseAgent bo dest.Header.Set("Accept-Encoding", "*") } -func getCompletionHandler(prod, private bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getCompletionHandler(prod, private bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_completion_handler.requests", nil, 1) @@ -52,7 +52,7 @@ func getCompletionHandler(prod, private bool, client http.Client, timeOut time.D return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.anthropic.com/v1/complete", c.Request.Body) @@ -198,6 +198,7 @@ func getCompletionHandler(prod, private bool, client http.Client, timeOut time.D eventName := "" c.Stream(func(w io.Writer) bool { raw, err := buffer.ReadBytes('\n') + if err != nil { if err == io.EOF { return false @@ -296,7 +297,7 @@ var ( eventContentBlockStop = []byte("event: content_block_stop") ) -func getMessagesHandler(prod, private bool, client http.Client, e anthropicEstimator, timeOut time.Duration) gin.HandlerFunc { +func getMessagesHandler(prod, private bool, client http.Client, e anthropicEstimator) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_messages_handler.requests", nil, 1) @@ -306,7 +307,7 @@ func getMessagesHandler(prod, private bool, client http.Client, e anthropicEstim return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.anthropic.com/v1/messages", c.Request.Body) diff --git a/internal/server/web/proxy/audio.go b/internal/server/web/proxy/audio.go index aa50a62..af6e2a9 100644 --- a/internal/server/web/proxy/audio.go +++ b/internal/server/web/proxy/audio.go @@ -20,7 +20,7 @@ import ( "go.uber.org/zap/zapcore" ) -func getSpeechHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getSpeechHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_speech_handler.requests", nil, 1) @@ -30,7 +30,7 @@ func getSpeechHandler(prod bool, client http.Client, timeOut time.Duration) gin. return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, c.Request.Method, "https://api.openai.com/v1/audio/speech", c.Request.Body) @@ -167,7 +167,7 @@ func getContentType(format string) string { return "text/plain; charset=utf-8" } -func getTranscriptionsHandler(prod bool, client http.Client, timeOut time.Duration, e estimator) gin.HandlerFunc { +func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.requests", nil, 1) @@ -177,7 +177,7 @@ func getTranscriptionsHandler(prod bool, client http.Client, timeOut time.Durati return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, c.Request.Method, "https://api.openai.com/v1/audio/transcriptions", c.Request.Body) @@ -331,7 +331,7 @@ func getTranscriptionsHandler(prod bool, client http.Client, timeOut time.Durati } } -func getTranslationsHandler(prod bool, client http.Client, timeOut time.Duration, e estimator) gin.HandlerFunc { +func getTranslationsHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_translations_handler.requests", nil, 1) @@ -341,7 +341,7 @@ func getTranslationsHandler(prod bool, client http.Client, timeOut time.Duration return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, c.Request.Method, "https://api.openai.com/v1/audio/translations", c.Request.Body) diff --git a/internal/server/web/proxy/azure_chat_completion.go b/internal/server/web/proxy/azure_chat_completion.go index 907f122..4b77a8f 100644 --- a/internal/server/web/proxy/azure_chat_completion.go +++ b/internal/server/web/proxy/azure_chat_completion.go @@ -30,7 +30,7 @@ func buildAzureUrl(path, deploymentId, apiVersion, resourceName string) string { return fmt.Sprintf("https://%s.openai.azure.com/openai/deployments/%s/embeddings?api-version=%s", resourceName, deploymentId, apiVersion) } -func getAzureChatCompletionHandler(prod, private bool, client http.Client, aoe azureEstimator, timeOut time.Duration) gin.HandlerFunc { +func getAzureChatCompletionHandler(prod, private bool, client http.Client, aoe azureEstimator) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_azure_chat_completion_handler.requests", nil, 1) @@ -40,7 +40,7 @@ func getAzureChatCompletionHandler(prod, private bool, client http.Client, aoe a return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, buildAzureUrl(c.FullPath(), c.Param("deployment_id"), c.Query("api-version"), c.GetString("resourceName")), c.Request.Body) diff --git a/internal/server/web/proxy/azure_completion.go b/internal/server/web/proxy/azure_completion.go index f1d5906..0a8cb79 100644 --- a/internal/server/web/proxy/azure_completion.go +++ b/internal/server/web/proxy/azure_completion.go @@ -66,7 +66,7 @@ func logAzureCompletionsResponse(log *zap.Logger, prod, private bool, cr *goopen } } -func getAzureCompletionsHandler(prod, private bool, client http.Client, aoe azureEstimator, timeOut time.Duration) gin.HandlerFunc { +func getAzureCompletionsHandler(prod, private bool, client http.Client, aoe azureEstimator) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_azure_completions_handler.requests", nil, 1) @@ -76,7 +76,7 @@ func getAzureCompletionsHandler(prod, private bool, client http.Client, aoe azur return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, buildAzureUrl(c.FullPath(), c.Param("deployment_id"), c.Query("api-version"), c.GetString("resourceName")), c.Request.Body) diff --git a/internal/server/web/proxy/azure_embedding.go b/internal/server/web/proxy/azure_embedding.go index d15675d..f9790d2 100644 --- a/internal/server/web/proxy/azure_embedding.go +++ b/internal/server/web/proxy/azure_embedding.go @@ -14,7 +14,7 @@ import ( goopenai "github.com/sashabaranov/go-openai" ) -func getAzureEmbeddingsHandler(prod, private bool, client http.Client, aoe azureEstimator, timeOut time.Duration) gin.HandlerFunc { +func getAzureEmbeddingsHandler(prod, private bool, client http.Client, aoe azureEstimator) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_azure_embeddings_handler.requests", nil, 1) @@ -31,7 +31,7 @@ func getAzureEmbeddingsHandler(prod, private bool, client http.Client, aoe azure // return // } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, c.Request.Method, buildAzureUrl(c.FullPath(), c.Param("deployment_id"), c.Query("api-version"), c.GetString("resourceName")), c.Request.Body) diff --git a/internal/server/web/proxy/bedrock.go b/internal/server/web/proxy/bedrock.go index 83ff891..d9dc977 100644 --- a/internal/server/web/proxy/bedrock.go +++ b/internal/server/web/proxy/bedrock.go @@ -27,7 +27,7 @@ func setAnthropicVersionIfExists(version string, req *anthropic.BedrockMessageRe } } -func getBedrockCompletionHandler(prod bool, e anthropicEstimator, timeOut time.Duration) gin.HandlerFunc { +func getBedrockCompletionHandler(prod bool, e anthropicEstimator) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_bedrock_completion_handler.requests", nil, 1) @@ -82,7 +82,7 @@ func getBedrockCompletionHandler(prod bool, e anthropicEstimator, timeOut time.D return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() cfg, err := config.LoadDefaultConfig(ctx, config.WithCredentialsProvider(credentials.StaticCredentialsProvider{ @@ -103,7 +103,7 @@ func getBedrockCompletionHandler(prod bool, e anthropicEstimator, timeOut time.D client := bedrockruntime.NewFromConfig(cfg) stream := c.GetBool("stream") - ctx, cancel = context.WithTimeout(context.Background(), timeOut) + ctx, cancel = context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() start := time.Now() @@ -282,7 +282,7 @@ func getEventNameFromLine(line []byte) string { return "" } -func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Duration) gin.HandlerFunc { +func getBedrockMessagesHandler(prod bool, e anthropicEstimator) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_bedrock_messages_handler.requests", nil, 1) @@ -335,7 +335,7 @@ func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Dur return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() cfg, err := config.LoadDefaultConfig(ctx, config.WithCredentialsProvider(credentials.StaticCredentialsProvider{ @@ -356,7 +356,7 @@ func getBedrockMessagesHandler(prod bool, e anthropicEstimator, timeOut time.Dur client := bedrockruntime.NewFromConfig(cfg) stream := c.GetBool("stream") - ctx, cancel = context.WithTimeout(context.Background(), timeOut) + ctx, cancel = context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() start := time.Now() diff --git a/internal/server/web/proxy/chat_completion.go b/internal/server/web/proxy/chat_completion.go index b33d7fe..19c6245 100644 --- a/internal/server/web/proxy/chat_completion.go +++ b/internal/server/web/proxy/chat_completion.go @@ -17,7 +17,7 @@ import ( goopenai "github.com/sashabaranov/go-openai" ) -func getChatCompletionHandler(prod, private bool, client http.Client, e estimator, timeOut time.Duration) gin.HandlerFunc { +func getChatCompletionHandler(prod, private bool, client http.Client, e estimator) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_chat_completion_handler.requests", nil, 1) @@ -27,7 +27,7 @@ func getChatCompletionHandler(prod, private bool, client http.Client, e estimato return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/chat/completions", c.Request.Body) diff --git a/internal/server/web/proxy/custom_provider.go b/internal/server/web/proxy/custom_provider.go index 4a3b5bc..6e1989d 100644 --- a/internal/server/web/proxy/custom_provider.go +++ b/internal/server/web/proxy/custom_provider.go @@ -46,7 +46,7 @@ type ErrorResponse struct { Error *Error `json:"error"` } -func getCustomProviderHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getCustomProviderHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { tags := []string{ fmt.Sprintf("path:%s", c.FullPath()), @@ -68,7 +68,7 @@ func getCustomProviderHandler(prod bool, client http.Client, timeOut time.Durati } logWithCid := util.GetLogFromCtx(c) - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() body, err := io.ReadAll(c.Request.Body) diff --git a/internal/server/web/proxy/deepinfra.go b/internal/server/web/proxy/deepinfra.go index d5156c1..8aecc12 100644 --- a/internal/server/web/proxy/deepinfra.go +++ b/internal/server/web/proxy/deepinfra.go @@ -17,7 +17,7 @@ import ( goopenai "github.com/sashabaranov/go-openai" ) -func getDeepinfraCompletionsHandler(prod, private bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getDeepinfraCompletionsHandler(prod, private bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_deepinfra_completions_handler.requests", nil, 1) @@ -26,7 +26,7 @@ func getDeepinfraCompletionsHandler(prod, private bool, client http.Client, time return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.deepinfra.com/v1/openai/completions", c.Request.Body) @@ -221,7 +221,7 @@ func getDeepinfraCompletionsHandler(prod, private bool, client http.Client, time } } -func getDeepinfraChatCompletionsHandler(prod, private bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getDeepinfraChatCompletionsHandler(prod, private bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_deepinfra_chat_completions_handler.requests", nil, 1) @@ -230,7 +230,7 @@ func getDeepinfraChatCompletionsHandler(prod, private bool, client http.Client, return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.deepinfra.com/v1/openai/chat/completions", c.Request.Body) @@ -416,7 +416,7 @@ func getDeepinfraChatCompletionsHandler(prod, private bool, client http.Client, } } -func getDeepinfraEmbeddingsHandler(prod, private bool, client http.Client, e deepinfraEstimator, timeout time.Duration) gin.HandlerFunc { +func getDeepinfraEmbeddingsHandler(prod, private bool, client http.Client, e deepinfraEstimator) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_deepinfra_embeddings_handler.requests", nil, 1) @@ -425,7 +425,7 @@ func getDeepinfraEmbeddingsHandler(prod, private bool, client http.Client, e dee return } - ctx, cancel := context.WithTimeout(context.Background(), timeout) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.deepinfra.com/v1/openai/embeddings", c.Request.Body) diff --git a/internal/server/web/proxy/embedding.go b/internal/server/web/proxy/embedding.go index 49c3384..1d689e3 100644 --- a/internal/server/web/proxy/embedding.go +++ b/internal/server/web/proxy/embedding.go @@ -30,7 +30,7 @@ type EmbeddingResponseBase64 struct { Usage goopenai.Usage `json:"usage"` } -func getEmbeddingHandler(prod, private bool, client http.Client, e estimator, timeOut time.Duration) gin.HandlerFunc { +func getEmbeddingHandler(prod, private bool, client http.Client, e estimator) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_embedding_handler.requests", nil, 1) @@ -47,7 +47,7 @@ func getEmbeddingHandler(prod, private bool, client http.Client, e estimator, ti // return // } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, c.Request.Method, "https://api.openai.com/v1/embeddings", c.Request.Body) diff --git a/internal/server/web/proxy/proxy.go b/internal/server/web/proxy/proxy.go index c7d9641..e6078f4 100644 --- a/internal/server/web/proxy/proxy.go +++ b/internal/server/web/proxy/proxy.go @@ -79,12 +79,13 @@ func CorsMiddleware() gin.HandlerFunc { } } -func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyManager, rm routeManager, a authenticator, psm ProviderSettingsManager, cpm CustomProvidersManager, ks keyStorage, e estimator, ae anthropicEstimator, aoe azureEstimator, v validator, r recorder, pub publisher, rlm rateLimitManager, timeOut time.Duration, ac accessCache, uac userAccessCache, pm PoliciesManager, scanner Scanner, cd CustomPolicyDetector, die deepinfraEstimator, um userManager, removeAgentHeaders bool) (*ProxyServer, error) { +func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyManager, rm routeManager, a authenticator, psm ProviderSettingsManager, cpm CustomProvidersManager, ks keyStorage, e estimator, ae anthropicEstimator, aoe azureEstimator, v validator, r recorder, pub publisher, rlm rateLimitManager, timeout time.Duration, ac accessCache, uac userAccessCache, pm PoliciesManager, scanner Scanner, cd CustomPolicyDetector, die deepinfraEstimator, um userManager, removeAgentHeaders bool) (*ProxyServer, error) { router := gin.New() prod := mode == "production" private := privacyMode == "strict" router.Use(CorsMiddleware()) + router.Use(getTimeoutMiddleware(timeout)) router.Use(getMiddleware(cpm, rm, pm, a, prod, private, log, pub, "proxy", ac, uac, http.Client{}, scanner, cd, um, removeAgentHeaders)) client := http.Client{} @@ -96,128 +97,128 @@ func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyMan router.GET("/api/health", getGetHealthCheckHandler()) // audios - router.POST("/api/providers/openai/v1/audio/speech", getSpeechHandler(prod, client, timeOut)) - router.POST("/api/providers/openai/v1/audio/transcriptions", getTranscriptionsHandler(prod, client, timeOut, e)) - router.POST("/api/providers/openai/v1/audio/translations", getTranslationsHandler(prod, client, timeOut, e)) + router.POST("/api/providers/openai/v1/audio/speech", getSpeechHandler(prod, client)) + router.POST("/api/providers/openai/v1/audio/transcriptions", getTranscriptionsHandler(prod, client, e)) + router.POST("/api/providers/openai/v1/audio/translations", getTranslationsHandler(prod, client, e)) // completions - router.POST("/api/providers/openai/v1/chat/completions", getChatCompletionHandler(prod, private, client, e, timeOut)) + router.POST("/api/providers/openai/v1/chat/completions", getChatCompletionHandler(prod, private, client, e)) // embeddings - router.POST("/api/providers/openai/v1/embeddings", getEmbeddingHandler(prod, private, client, e, timeOut)) + router.POST("/api/providers/openai/v1/embeddings", getEmbeddingHandler(prod, private, client, e)) // moderations - router.POST("/api/providers/openai/v1/moderations", getPassThroughHandler(prod, private, client, timeOut)) + router.POST("/api/providers/openai/v1/moderations", getPassThroughHandler(prod, private, client)) // models - router.GET("/api/providers/openai/v1/models", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/models/:model", getPassThroughHandler(prod, private, client, timeOut)) - router.DELETE("/api/providers/openai/v1/models/:model", getPassThroughHandler(prod, private, client, timeOut)) + router.GET("/api/providers/openai/v1/models", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/models/:model", getPassThroughHandler(prod, private, client)) + router.DELETE("/api/providers/openai/v1/models/:model", getPassThroughHandler(prod, private, client)) // assistants - router.POST("/api/providers/openai/v1/assistants", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/assistants/:assistant_id", getPassThroughHandler(prod, private, client, timeOut)) - router.POST("/api/providers/openai/v1/assistants/:assistant_id", getPassThroughHandler(prod, private, client, timeOut)) - router.DELETE("/api/providers/openai/v1/assistants/:assistant_id", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/assistants", getPassThroughHandler(prod, private, client, timeOut)) + router.POST("/api/providers/openai/v1/assistants", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/assistants/:assistant_id", getPassThroughHandler(prod, private, client)) + router.POST("/api/providers/openai/v1/assistants/:assistant_id", getPassThroughHandler(prod, private, client)) + router.DELETE("/api/providers/openai/v1/assistants/:assistant_id", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/assistants", getPassThroughHandler(prod, private, client)) // assistant files - router.POST("/api/providers/openai/v1/assistants/:assistant_id/files", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/assistants/:assistant_id/files/:file_id", getPassThroughHandler(prod, private, client, timeOut)) - router.DELETE("/api/providers/openai/v1/assistants/:assistant_id/files/:file_id", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/assistants/:assistant_id/files", getPassThroughHandler(prod, private, client, timeOut)) + router.POST("/api/providers/openai/v1/assistants/:assistant_id/files", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/assistants/:assistant_id/files/:file_id", getPassThroughHandler(prod, private, client)) + router.DELETE("/api/providers/openai/v1/assistants/:assistant_id/files/:file_id", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/assistants/:assistant_id/files", getPassThroughHandler(prod, private, client)) // threads - router.POST("/api/providers/openai/v1/threads", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/threads/:thread_id", getPassThroughHandler(prod, private, client, timeOut)) - router.POST("/api/providers/openai/v1/threads/:thread_id", getPassThroughHandler(prod, private, client, timeOut)) - router.DELETE("/api/providers/openai/v1/threads/:thread_id", getPassThroughHandler(prod, private, client, timeOut)) + router.POST("/api/providers/openai/v1/threads", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/threads/:thread_id", getPassThroughHandler(prod, private, client)) + router.POST("/api/providers/openai/v1/threads/:thread_id", getPassThroughHandler(prod, private, client)) + router.DELETE("/api/providers/openai/v1/threads/:thread_id", getPassThroughHandler(prod, private, client)) // messages - router.POST("/api/providers/openai/v1/threads/:thread_id/messages", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/threads/:thread_id/messages/:message_id", getPassThroughHandler(prod, private, client, timeOut)) - router.POST("/api/providers/openai/v1/threads/:thread_id/messages/:message_id", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/threads/:thread_id/messages", getPassThroughHandler(prod, private, client, timeOut)) + router.POST("/api/providers/openai/v1/threads/:thread_id/messages", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/threads/:thread_id/messages/:message_id", getPassThroughHandler(prod, private, client)) + router.POST("/api/providers/openai/v1/threads/:thread_id/messages/:message_id", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/threads/:thread_id/messages", getPassThroughHandler(prod, private, client)) // message files - router.GET("/api/providers/openai/v1/threads/:thread_id/messages/:message_id/files/:file_id", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/threads/:thread_id/messages/:message_id/files", getPassThroughHandler(prod, private, client, timeOut)) + router.GET("/api/providers/openai/v1/threads/:thread_id/messages/:message_id/files/:file_id", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/threads/:thread_id/messages/:message_id/files", getPassThroughHandler(prod, private, client)) // runs - router.POST("/api/providers/openai/v1/threads/:thread_id/runs", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/threads/:thread_id/runs/:run_id", getPassThroughHandler(prod, private, client, timeOut)) - router.POST("/api/providers/openai/v1/threads/:thread_id/runs/:run_id", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/threads/:thread_id/runs", getPassThroughHandler(prod, private, client, timeOut)) - router.POST("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/submit_tool_outputs", getPassThroughHandler(prod, private, client, timeOut)) - router.POST("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/cancel", getPassThroughHandler(prod, private, client, timeOut)) - router.POST("/api/providers/openai/v1/threads/runs", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/steps/:step_id", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/steps", getPassThroughHandler(prod, private, client, timeOut)) + router.POST("/api/providers/openai/v1/threads/:thread_id/runs", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/threads/:thread_id/runs/:run_id", getPassThroughHandler(prod, private, client)) + router.POST("/api/providers/openai/v1/threads/:thread_id/runs/:run_id", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/threads/:thread_id/runs", getPassThroughHandler(prod, private, client)) + router.POST("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/submit_tool_outputs", getPassThroughHandler(prod, private, client)) + router.POST("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/cancel", getPassThroughHandler(prod, private, client)) + router.POST("/api/providers/openai/v1/threads/runs", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/steps/:step_id", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/threads/:thread_id/runs/:run_id/steps", getPassThroughHandler(prod, private, client)) // files - router.GET("/api/providers/openai/v1/files", getPassThroughHandler(prod, private, client, timeOut)) - router.POST("/api/providers/openai/v1/files", getPassThroughHandler(prod, private, client, timeOut)) - router.DELETE("/api/providers/openai/v1/files/:file_id", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/files/:file_id", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/files/:file_id/content", getPassThroughHandler(prod, private, client, timeOut)) + router.GET("/api/providers/openai/v1/files", getPassThroughHandler(prod, private, client)) + router.POST("/api/providers/openai/v1/files", getPassThroughHandler(prod, private, client)) + router.DELETE("/api/providers/openai/v1/files/:file_id", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/files/:file_id", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/files/:file_id/content", getPassThroughHandler(prod, private, client)) // batch - router.POST("/api/providers/openai/v1/batches", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/batches/:batch_id", getPassThroughHandler(prod, private, client, timeOut)) - router.POST("/api/providers/openai/v1/batches/:batch_id/cancel", getPassThroughHandler(prod, private, client, timeOut)) - router.GET("/api/providers/openai/v1/batches", getPassThroughHandler(prod, private, client, timeOut)) + router.POST("/api/providers/openai/v1/batches", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/batches/:batch_id", getPassThroughHandler(prod, private, client)) + router.POST("/api/providers/openai/v1/batches/:batch_id/cancel", getPassThroughHandler(prod, private, client)) + router.GET("/api/providers/openai/v1/batches", getPassThroughHandler(prod, private, client)) // images - router.POST("/api/providers/openai/v1/images/generations", getPassThroughHandler(prod, private, client, timeOut)) - router.POST("/api/providers/openai/v1/images/edits", getPassThroughHandler(prod, private, client, timeOut)) - router.POST("/api/providers/openai/v1/images/variations", getPassThroughHandler(prod, private, client, timeOut)) + router.POST("/api/providers/openai/v1/images/generations", getPassThroughHandler(prod, private, client)) + router.POST("/api/providers/openai/v1/images/edits", getPassThroughHandler(prod, private, client)) + router.POST("/api/providers/openai/v1/images/variations", getPassThroughHandler(prod, private, client)) // azure - router.POST("/api/providers/azure/openai/deployments/:deployment_id/chat/completions", getAzureChatCompletionHandler(prod, private, client, aoe, timeOut)) - router.POST("/api/providers/azure/openai/deployments/:deployment_id/embeddings", getAzureEmbeddingsHandler(prod, private, client, aoe, timeOut)) - router.POST("/api/providers/azure/openai/deployments/:deployment_id/completions", getAzureCompletionsHandler(prod, private, client, aoe, timeOut)) + router.POST("/api/providers/azure/openai/deployments/:deployment_id/chat/completions", getAzureChatCompletionHandler(prod, private, client, aoe)) + router.POST("/api/providers/azure/openai/deployments/:deployment_id/embeddings", getAzureEmbeddingsHandler(prod, private, client, aoe)) + router.POST("/api/providers/azure/openai/deployments/:deployment_id/completions", getAzureCompletionsHandler(prod, private, client, aoe)) // anthropic - router.POST("/api/providers/anthropic/v1/complete", getCompletionHandler(prod, private, client, timeOut)) - router.POST("/api/providers/anthropic/v1/messages", getMessagesHandler(prod, private, client, ae, timeOut)) + router.POST("/api/providers/anthropic/v1/complete", getCompletionHandler(prod, private, client)) + router.POST("/api/providers/anthropic/v1/messages", getMessagesHandler(prod, private, client, ae)) // bedrock anthropic - router.POST("/api/providers/bedrock/anthropic/v1/complete", getBedrockCompletionHandler(prod, ae, timeOut)) - router.POST("/api/providers/bedrock/anthropic/v1/messages", getBedrockMessagesHandler(prod, ae, timeOut)) + router.POST("/api/providers/bedrock/anthropic/v1/complete", getBedrockCompletionHandler(prod, ae)) + router.POST("/api/providers/bedrock/anthropic/v1/messages", getBedrockMessagesHandler(prod, ae)) // vllm - router.POST("/api/providers/vllm/v1/chat/completions", getVllmChatCompletionsHandler(prod, private, client, timeOut)) - router.POST("/api/providers/vllm/v1/completions", getVllmCompletionsHandler(prod, private, client, timeOut)) + router.POST("/api/providers/vllm/v1/chat/completions", getVllmChatCompletionsHandler(prod, private, client)) + router.POST("/api/providers/vllm/v1/completions", getVllmCompletionsHandler(prod, private, client)) // deepinfra - router.POST("/api/providers/deepinfra/v1/chat/completions", getDeepinfraChatCompletionsHandler(prod, private, client, timeOut)) - router.POST("/api/providers/deepinfra/v1/completions", getDeepinfraCompletionsHandler(prod, private, client, timeOut)) - router.POST("/api/providers/deepinfra/v1/embeddings", getDeepinfraEmbeddingsHandler(prod, private, client, die, timeOut)) + router.POST("/api/providers/deepinfra/v1/chat/completions", getDeepinfraChatCompletionsHandler(prod, private, client)) + router.POST("/api/providers/deepinfra/v1/completions", getDeepinfraCompletionsHandler(prod, private, client)) + router.POST("/api/providers/deepinfra/v1/embeddings", getDeepinfraEmbeddingsHandler(prod, private, client, die)) // custom provider - router.POST("/api/custom/providers/:provider/*wildcard", getCustomProviderHandler(prod, client, timeOut)) + router.POST("/api/custom/providers/:provider/*wildcard", getCustomProviderHandler(prod, client)) // custom route router.POST("/api/routes/*route", getRouteHandler(prod, c, aoe, e, client, r)) // vector store - router.POST("/api/providers/openai/v1/vector_stores", getCreateVectorStoreHandler(prod, client, timeOut)) - router.GET("/api/providers/openai/v1/vector_stores", getListVectorStoresHandler(prod, client, timeOut)) - router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id", getGetVectorStoreHandler(prod, client, timeOut)) - router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id", getModifyVectorStoreHandler(prod, client, timeOut)) - router.DELETE("/api/providers/openai/v1/vector_stores/:vector_store_id", getDeleteVectorStoreHandler(prod, client, timeOut)) + router.POST("/api/providers/openai/v1/vector_stores", getCreateVectorStoreHandler(prod, client)) + router.GET("/api/providers/openai/v1/vector_stores", getListVectorStoresHandler(prod, client)) + router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id", getGetVectorStoreHandler(prod, client)) + router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id", getModifyVectorStoreHandler(prod, client)) + router.DELETE("/api/providers/openai/v1/vector_stores/:vector_store_id", getDeleteVectorStoreHandler(prod, client)) // vector store files - router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/files", getCreateVectorStoreFileHandler(prod, client, timeOut)) - router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/files", getListVectorStoreFilesHandler(prod, client, timeOut)) - router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/files/:file_id", getGetVectorStoreFileHandler(prod, client, timeOut)) - router.DELETE("/api/providers/openai/v1/vector_stores/:vector_store_id/files/:file_id", getDeleteVectorStoreFileHandler(prod, client, timeOut)) + router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/files", getCreateVectorStoreFileHandler(prod, client)) + router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/files", getListVectorStoreFilesHandler(prod, client)) + router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/files/:file_id", getGetVectorStoreFileHandler(prod, client)) + router.DELETE("/api/providers/openai/v1/vector_stores/:vector_store_id/files/:file_id", getDeleteVectorStoreFileHandler(prod, client)) // vector store file batches - router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches", getCreateVectorStoreFileBatchHandler(prod, client, timeOut)) - router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id", getGetVectorStoreFileBatchHandler(prod, client, timeOut)) - router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/cancel", getCancelVectorStoreFileBatchHandler(prod, client, timeOut)) - router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/files", getListVectorStoreFileBatchFilesHandler(prod, client, timeOut)) + router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches", getCreateVectorStoreFileBatchHandler(prod, client)) + router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id", getGetVectorStoreFileBatchHandler(prod, client)) + router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/cancel", getCancelVectorStoreFileBatchHandler(prod, client)) + router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/files", getListVectorStoreFileBatchFilesHandler(prod, client)) srv := &http.Server{ Addr: ":8002", @@ -278,7 +279,7 @@ func writeFieldToBuffer(fields []string, c *gin.Context, writer *multipart.Write return nil } -func getPassThroughHandler(prod, private bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getPassThroughHandler(prod, private bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) @@ -293,7 +294,7 @@ func getPassThroughHandler(prod, private bool, client http.Client, timeOut time. return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() targetUrl, err := buildProxyUrl(c) diff --git a/internal/server/web/proxy/timeout_middleware.go b/internal/server/web/proxy/timeout_middleware.go new file mode 100644 index 0000000..b588146 --- /dev/null +++ b/internal/server/web/proxy/timeout_middleware.go @@ -0,0 +1,33 @@ +package proxy + +import ( + "net/http" + "time" + + "github.com/gin-gonic/gin" +) + +func getTimeoutMiddleware(timeout time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] request is empty") + c.Abort() + return + } + + timeoutHeader := c.GetHeader("x-request-timeout") + parsedTimeout := timeout + if len(timeoutHeader) != 0 { + parsed, err := time.ParseDuration(timeoutHeader) + if err != nil { + JSON(c, http.StatusBadRequest, "[BricksLLM] invalid timeout") + c.Abort() + return + } + + parsedTimeout = parsed + } + + c.Set("requestTimeout", parsedTimeout) + } +} diff --git a/internal/server/web/proxy/vector_store.go b/internal/server/web/proxy/vector_store.go index 02b25e6..423a821 100644 --- a/internal/server/web/proxy/vector_store.go +++ b/internal/server/web/proxy/vector_store.go @@ -13,7 +13,7 @@ import ( goopenai "github.com/sashabaranov/go-openai" ) -func getCreateVectorStoreHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getCreateVectorStoreHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_create_vector_store_handler.requests", nil, 1) @@ -23,7 +23,7 @@ func getCreateVectorStoreHandler(prod bool, client http.Client, timeOut time.Dur return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores", c.Request.Body) @@ -94,7 +94,7 @@ func getCreateVectorStoreHandler(prod bool, client http.Client, timeOut time.Dur } } -func getListVectorStoresHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getListVectorStoresHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_list_vector_stores_handler.requests", nil, 1) @@ -104,7 +104,7 @@ func getListVectorStoresHandler(prod bool, client http.Client, timeOut time.Dura return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores", c.Request.Body) @@ -175,7 +175,7 @@ func getListVectorStoresHandler(prod bool, client http.Client, timeOut time.Dura } } -func getGetVectorStoreHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getGetVectorStoreHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_get_vector_store_handler.requests", nil, 1) @@ -185,7 +185,7 @@ func getGetVectorStoreHandler(prod bool, client http.Client, timeOut time.Durati return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id"), c.Request.Body) @@ -256,7 +256,7 @@ func getGetVectorStoreHandler(prod bool, client http.Client, timeOut time.Durati } } -func getModifyVectorStoreHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getModifyVectorStoreHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_modify_vector_store_handler.requests", nil, 1) @@ -266,7 +266,7 @@ func getModifyVectorStoreHandler(prod bool, client http.Client, timeOut time.Dur return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id"), c.Request.Body) @@ -337,7 +337,7 @@ func getModifyVectorStoreHandler(prod bool, client http.Client, timeOut time.Dur } } -func getDeleteVectorStoreHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getDeleteVectorStoreHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_delete_vector_store_handler.requests", nil, 1) @@ -347,7 +347,7 @@ func getDeleteVectorStoreHandler(prod bool, client http.Client, timeOut time.Dur return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodDelete, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id"), c.Request.Body) diff --git a/internal/server/web/proxy/vector_store_file.go b/internal/server/web/proxy/vector_store_file.go index 2ef5206..91282c6 100644 --- a/internal/server/web/proxy/vector_store_file.go +++ b/internal/server/web/proxy/vector_store_file.go @@ -13,7 +13,7 @@ import ( goopenai "github.com/sashabaranov/go-openai" ) -func getCreateVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getCreateVectorStoreFileHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_create_vector_store_file_handler.requests", nil, 1) @@ -23,7 +23,7 @@ func getCreateVectorStoreFileHandler(prod bool, client http.Client, timeOut time return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/files", c.Request.Body) @@ -94,7 +94,7 @@ func getCreateVectorStoreFileHandler(prod bool, client http.Client, timeOut time } } -func getListVectorStoreFilesHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getListVectorStoreFilesHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_list_vector_store_files_handler.requests", nil, 1) @@ -104,7 +104,7 @@ func getListVectorStoreFilesHandler(prod bool, client http.Client, timeOut time. return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/files", c.Request.Body) @@ -175,7 +175,7 @@ func getListVectorStoreFilesHandler(prod bool, client http.Client, timeOut time. } } -func getGetVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getGetVectorStoreFileHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_get_vector_store_file_handler.requests", nil, 1) @@ -185,7 +185,7 @@ func getGetVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Du return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/files/"+c.Param("file_id"), c.Request.Body) @@ -256,7 +256,7 @@ func getGetVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Du } } -func getDeleteVectorStoreFileHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getDeleteVectorStoreFileHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_delete_vector_store_file_handler.requests", nil, 1) @@ -266,7 +266,7 @@ func getDeleteVectorStoreFileHandler(prod bool, client http.Client, timeOut time return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodDelete, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/files/"+c.Param("file_id"), c.Request.Body) diff --git a/internal/server/web/proxy/vector_store_file_batch.go b/internal/server/web/proxy/vector_store_file_batch.go index 12fce40..16e80eb 100644 --- a/internal/server/web/proxy/vector_store_file_batch.go +++ b/internal/server/web/proxy/vector_store_file_batch.go @@ -13,7 +13,7 @@ import ( goopenai "github.com/sashabaranov/go-openai" ) -func getCreateVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getCreateVectorStoreFileBatchHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_create_vector_store_file_batch_handler.requests", nil, 1) @@ -23,7 +23,7 @@ func getCreateVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/file_batches", c.Request.Body) @@ -94,7 +94,7 @@ func getCreateVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut } } -func getGetVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getGetVectorStoreFileBatchHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_get_vector_store_file_batch_handler.requests", nil, 1) @@ -104,7 +104,7 @@ func getGetVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut ti return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/file_batches/"+c.Param("batch_id"), c.Request.Body) @@ -175,7 +175,7 @@ func getGetVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut ti } } -func getCancelVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getCancelVectorStoreFileBatchHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_cancel_vector_store_file_batch_handler.requests", nil, 1) @@ -185,7 +185,7 @@ func getCancelVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/file_batches/"+c.Param("batch_id")+"/cancel", c.Request.Body) @@ -256,7 +256,7 @@ func getCancelVectorStoreFileBatchHandler(prod bool, client http.Client, timeOut } } -func getListVectorStoreFileBatchFilesHandler(prod bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getListVectorStoreFileBatchFilesHandler(prod bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_list_vector_store_file_batch_files_handler.requests", nil, 1) @@ -266,7 +266,7 @@ func getListVectorStoreFileBatchFilesHandler(prod bool, client http.Client, time return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/v1/vector_stores/"+c.Param("vector_store_id")+"/file_batches/"+c.Param("batch_id")+"/files", c.Request.Body) diff --git a/internal/server/web/proxy/vllm.go b/internal/server/web/proxy/vllm.go index 5415913..ec9ef93 100644 --- a/internal/server/web/proxy/vllm.go +++ b/internal/server/web/proxy/vllm.go @@ -20,7 +20,7 @@ import ( "go.uber.org/zap/zapcore" ) -func getVllmCompletionsHandler(prod, private bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getVllmCompletionsHandler(prod, private bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_vllm_completions_handler.requests", nil, 1) @@ -36,7 +36,7 @@ func getVllmCompletionsHandler(prod, private bool, client http.Client, timeOut t return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, url+"/v1/completions", c.Request.Body) @@ -368,7 +368,7 @@ func logVllmCompletionResponse(log *zap.Logger, cr *goopenai.CompletionResponse, } } -func getVllmChatCompletionsHandler(prod, private bool, client http.Client, timeOut time.Duration) gin.HandlerFunc { +func getVllmChatCompletionsHandler(prod, private bool, client http.Client) gin.HandlerFunc { return func(c *gin.Context) { log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_vllm_chat_completions_handler.requests", nil, 1) @@ -384,7 +384,7 @@ func getVllmChatCompletionsHandler(prod, private bool, client http.Client, timeO return } - ctx, cancel := context.WithTimeout(context.Background(), timeOut) + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) defer cancel() req, err := http.NewRequestWithContext(ctx, http.MethodPost, url+"/v1/chat/completions", c.Request.Body) From ca59ca699f21ecb3b7a44910cb861c4fd1b6847e Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Wed, 23 Oct 2024 19:07:26 -0700 Subject: [PATCH 25/51] update doc --- CHANGELOG.md | 4 + docs/proxy.yaml | 305 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 309 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39fe41f..cd6f57c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.37.0 - 2024-10-23 +### Added +- Added request level timeout with HTTP header `x-request-timeout` + ## 1.36.5 - 2024-10-16 ### Changed - Updated `gpt-4o` pricing according to OpenAI updates diff --git a/docs/proxy.yaml b/docs/proxy.yaml index a0613ac..2e6c492 100644 --- a/docs/proxy.yaml +++ b/docs/proxy.yaml @@ -50,6 +50,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: OpenAI Chat Completions @@ -68,6 +73,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Call OpenAI embeddings @@ -86,6 +96,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Call OpenAI moderations @@ -99,6 +114,11 @@ paths: schema: type: string description: Custom Id that can be used to retrieve an event associated with each proxy request. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Get OpenAI models @@ -121,6 +141,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: model required: true @@ -141,6 +166,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: List files @@ -158,6 +188,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Upload a file @@ -180,6 +215,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: file_id required: true @@ -198,6 +238,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: file_id required: true @@ -222,6 +267,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: file_id required: true @@ -245,6 +295,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Create a batch @@ -262,6 +317,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: List batches @@ -284,6 +344,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: batch_id required: true @@ -302,6 +367,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Cancel a batch @@ -320,6 +390,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Generate images @@ -338,6 +413,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Edit images @@ -356,6 +436,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Create image variations @@ -374,6 +459,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Create speech @@ -392,6 +482,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Create transcriptions @@ -410,6 +505,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Create translations @@ -428,6 +528,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Create assistant @@ -445,6 +550,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: List assistants @@ -467,6 +577,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: assistant_id required: true @@ -490,6 +605,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Modify assistant @@ -512,6 +632,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Delete assistant @@ -535,6 +660,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Create assistant file @@ -557,6 +687,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: List assistant files @@ -575,6 +710,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: assistant_id required: true @@ -612,6 +752,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Delete assistant file @@ -630,6 +775,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Create thread @@ -652,6 +802,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: thread_id required: true @@ -670,6 +825,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: thread_id required: true @@ -692,6 +852,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Delete thread @@ -710,6 +875,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: thread_id required: true @@ -732,6 +902,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: thread_id required: true @@ -760,6 +935,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: message_id required: true @@ -782,6 +962,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: message_id required: true @@ -814,6 +999,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: file_id required: true @@ -853,6 +1043,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: List message files @@ -871,6 +1066,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: thread_id required: true @@ -892,6 +1092,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: thread_id required: true @@ -920,6 +1125,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: run_id required: true @@ -954,6 +1164,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Modify run @@ -972,6 +1187,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: thread_id required: true @@ -1000,6 +1220,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - OpenAI summary: Create thread and run @@ -1022,6 +1247,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: step_id required: true @@ -1052,6 +1282,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: thread_id required: true @@ -1084,6 +1319,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: deployment_id required: true @@ -1114,6 +1354,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: deployment_id required: true @@ -1143,6 +1388,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: deployment_id required: true @@ -1169,6 +1419,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: header name: Content-Type schema: @@ -1197,6 +1452,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: header name: Content-Type schema: @@ -1225,6 +1485,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - Bedrock summary: Create Bedrock Anthropic completion @@ -1238,6 +1503,11 @@ paths: schema: type: string description: Custom Id that can be used to retrieve an event associated with each proxy request. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: header name: Content-Type schema: @@ -1261,6 +1531,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - vLLM summary: Create chat completions @@ -1279,6 +1554,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - vLLM summary: Create completions @@ -1297,6 +1577,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - DeepInfra summary: Create chat completions @@ -1315,6 +1600,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - DeepInfra summary: Create completions @@ -1333,6 +1623,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - DeepInfra summary: Create embeddings @@ -1351,6 +1646,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. - in: path name: provider required: true @@ -1375,6 +1675,11 @@ paths: schema: type: string description: Metadata in stringified JSON format. + - in: header + name: X-REQUEST-TIMEOUT + schema: + type: string + description: Timeout for the request. Format can be `1s`, `1m`, `1h`, etc. tags: - Route summary: Call a route From 768ab1debfceaa4655431be150dd12a2cf52693f Mon Sep 17 00:00:00 2001 From: Lei Lei Date: Wed, 30 Oct 2024 12:50:21 +0800 Subject: [PATCH 26/51] add support for AWS elastic cache. --- cmd/bricksllm/main.go | 92 +++++++++++++-------------------------- internal/config/config.go | 2 + 2 files changed, 32 insertions(+), 62 deletions(-) diff --git a/cmd/bricksllm/main.go b/cmd/bricksllm/main.go index e88022f..84c1170 100644 --- a/cmd/bricksllm/main.go +++ b/cmd/bricksllm/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "crypto/tls" "flag" "fmt" "os" @@ -173,22 +174,25 @@ func main() { } rMemStore.Listen() - rateLimitRedisCache := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), - Password: cfg.RedisPassword, - DB: 0, - }) + defaultRedisOption := func(cfg *config.Config, dbIndex int) *redis.Options { + return &redis.Options{ + Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), + Password: cfg.RedisPassword, + DB: cfg.RedisDBStartIndex + dbIndex, + TLSConfig: &tls.Config{ + InsecureSkipVerify: cfg.RedisInsecureSkipVerify, + }, + } + } + + rateLimitRedisCache := redis.NewClient(defaultRedisOption(cfg, 0)) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() if err := rateLimitRedisCache.Ping(ctx).Err(); err != nil { log.Sugar().Fatalf("error connecting to rate limit redis cache: %v", err) } - costLimitRedisCache := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), - Password: cfg.RedisPassword, - DB: 1, - }) + costLimitRedisCache := redis.NewClient(defaultRedisOption(cfg, 1)) ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -196,11 +200,7 @@ func main() { log.Sugar().Fatalf("error connecting to cost limit redis cache: %v", err) } - costRedisStorage := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), - Password: cfg.RedisPassword, - DB: 2, - }) + costRedisStorage := redis.NewClient(defaultRedisOption(cfg, 2)) ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -208,11 +208,7 @@ func main() { log.Sugar().Fatalf("error connecting to cost limit redis storage: %v", err) } - apiRedisCache := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), - Password: cfg.RedisPassword, - DB: 3, - }) + apiRedisCache := redis.NewClient(defaultRedisOption(cfg, 3)) ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -220,87 +216,59 @@ func main() { log.Sugar().Fatalf("error connecting to api redis cache: %v", err) } - accessRedisCache := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), - Password: cfg.RedisPassword, - DB: 4, - }) + accessRedisCache := redis.NewClient(defaultRedisOption(cfg, 4)) ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - if err := apiRedisCache.Ping(ctx).Err(); err != nil { + if err := accessRedisCache.Ping(ctx).Err(); err != nil { log.Sugar().Fatalf("error connecting to api redis cache: %v", err) } - userRateLimitRedisCache := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), - Password: cfg.RedisPassword, - DB: 5, - }) + userRateLimitRedisCache := redis.NewClient(defaultRedisOption(cfg, 5)) ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - if err := rateLimitRedisCache.Ping(ctx).Err(); err != nil { + if err := userRateLimitRedisCache.Ping(ctx).Err(); err != nil { log.Sugar().Fatalf("error connecting to user rate limit redis cache: %v", err) } - userCostLimitRedisCache := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), - Password: cfg.RedisPassword, - DB: 6, - }) + userCostLimitRedisCache := redis.NewClient(defaultRedisOption(cfg, 6)) ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - if err := costLimitRedisCache.Ping(ctx).Err(); err != nil { + if err := userCostLimitRedisCache.Ping(ctx).Err(); err != nil { log.Sugar().Fatalf("error connecting to user cost limit redis cache: %v", err) } - userCostRedisStorage := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), - Password: cfg.RedisPassword, - DB: 7, - }) + userCostRedisStorage := redis.NewClient(defaultRedisOption(cfg, 7)) ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - if err := apiRedisCache.Ping(ctx).Err(); err != nil { + if err := userCostRedisStorage.Ping(ctx).Err(); err != nil { log.Sugar().Fatalf("error connecting to user cost redis cache: %v", err) } - userAccessRedisCache := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), - Password: cfg.RedisPassword, - DB: 8, - }) + userAccessRedisCache := redis.NewClient(defaultRedisOption(cfg, 8)) ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - if err := costRedisStorage.Ping(ctx).Err(); err != nil { + if err := userAccessRedisCache.Ping(ctx).Err(); err != nil { log.Sugar().Fatalf("error connecting to user access redis storage: %v", err) } - providerSettingsRedisCache := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), - Password: cfg.RedisPassword, - DB: 9, - }) + providerSettingsRedisCache := redis.NewClient(defaultRedisOption(cfg, 9)) ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - if err := costRedisStorage.Ping(ctx).Err(); err != nil { + if err := providerSettingsRedisCache.Ping(ctx).Err(); err != nil { log.Sugar().Fatalf("error connecting to provider settings redis storage: %v", err) } - keysRedisCache := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), - Password: cfg.RedisPassword, - DB: 10, - }) + keysRedisCache := redis.NewClient(defaultRedisOption(cfg, 10)) ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) defer cancel() - if err := costRedisStorage.Ping(ctx).Err(); err != nil { + if err := keysRedisCache.Ping(ctx).Err(); err != nil { log.Sugar().Fatalf("error connecting to keys redis storage: %v", err) } diff --git a/internal/config/config.go b/internal/config/config.go index 2f2fc8c..fdc3028 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -25,6 +25,8 @@ type Config struct { RedisPort string `koanf:"redis_port" env:"REDIS_PORT" envDefault:"6379"` RedisUsername string `koanf:"redis_username" env:"REDIS_USERNAME"` RedisPassword string `koanf:"redis_password" env:"REDIS_PASSWORD"` + RedisInsecureSkipVerify bool `koanf:"redis_insecure_skip_verify" env:"REDIS_INSECURE_SKIP_VERIFY" envDefault:"false"` + RedisDBStartIndex int `koanf:"redis_db_start_index" env:"REDIS_DB_START_INDEX" envDefault:"0"` RedisReadTimeout time.Duration `koanf:"redis_read_time_out" env:"REDIS_READ_TIME_OUT" envDefault:"1s"` RedisWriteTimeout time.Duration `koanf:"redis_write_time_out" env:"REDIS_WRITE_TIME_OUT" envDefault:"500ms"` PostgresqlReadTimeout time.Duration `koanf:"postgresql_read_time_out" env:"POSTGRESQL_READ_TIME_OUT" envDefault:"10m"` From 3009774abea1e18870788759dfedbb07aec6b1db Mon Sep 17 00:00:00 2001 From: Lei Lei Date: Wed, 30 Oct 2024 12:52:14 +0800 Subject: [PATCH 27/51] run `go mod tidy`. --- go.mod | 11 ++++------- go.sum | 15 --------------- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/go.mod b/go.mod index b099bce..bef3be1 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/DataDog/datadog-go/v5 v5.3.0 github.com/asticode/go-astisub v0.26.2 github.com/aws/aws-sdk-go-v2/config v1.27.7 + github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.16.2 github.com/aws/aws-sdk-go-v2/service/comprehend v1.31.2 github.com/caarlos0/env v3.5.0+incompatible github.com/cenkalti/backoff/v4 v4.3.0 @@ -25,29 +26,25 @@ require ( require ( github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 // indirect - github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.16.2 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 // indirect - github.com/google/go-cmp v0.6.0 // indirect github.com/knadh/koanf/maps v0.1.1 // indirect - github.com/kr/pretty v0.3.1 // indirect + github.com/kr/text v0.2.0 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/prometheus/client_model v0.5.0 // indirect github.com/prometheus/common v0.48.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect - github.com/rogpeppe/go-internal v1.10.0 // indirect go.uber.org/atomic v1.7.0 // indirect - gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect ) require ( github.com/Microsoft/go-winio v0.5.0 // indirect github.com/asticode/go-astikit v0.20.0 // indirect github.com/asticode/go-astits v1.8.0 // indirect - github.com/aws/aws-sdk-go-v2 v1.30.5 // indirect - github.com/aws/aws-sdk-go-v2/credentials v1.17.7 // indirect + github.com/aws/aws-sdk-go-v2 v1.30.5 + github.com/aws/aws-sdk-go-v2/credentials v1.17.7 github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.3 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 // indirect github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 // indirect diff --git a/go.sum b/go.sum index aaf0e9b..716e361 100644 --- a/go.sum +++ b/go.sum @@ -8,8 +8,6 @@ github.com/asticode/go-astisub v0.26.2 h1:cdEXcm+SUSmYCEPTQYbbfCECnmQoIFfH6pF8wD github.com/asticode/go-astisub v0.26.2/go.mod h1:WTkuSzFB+Bp7wezuSf2Oxulj5A8zu2zLRVFf6bIFQK8= github.com/asticode/go-astits v1.8.0 h1:rf6aiiGn/QhlFjNON1n5plqF3Fs025XLUwiQ0NB6oZg= github.com/asticode/go-astits v1.8.0/go.mod h1:DkOWmBNQpnr9mv24KfZjq4JawCFX1FCqjLVGvO0DygQ= -github.com/aws/aws-sdk-go-v2 v1.25.3 h1:xYiLpZTQs1mzvz5PaI6uR0Wh57ippuEthxS4iK5v0n0= -github.com/aws/aws-sdk-go-v2 v1.25.3/go.mod h1:35hUlJVYd+M++iLI3ALmVwMOyRYMmRqUXpTtRGW+K9I= github.com/aws/aws-sdk-go-v2 v1.30.5 h1:mWSRTwQAb0aLE17dSzztCVJWI9+cRMgqebndjwDyK0g= github.com/aws/aws-sdk-go-v2 v1.30.5/go.mod h1:CT+ZPWXbYrci8chcARI3OmI/qgd+f6WtuLOoaIA8PR0= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 h1:70PVAiL15/aBMh5LThwgXdSQorVr91L127ttckI9QQU= @@ -20,12 +18,8 @@ github.com/aws/aws-sdk-go-v2/credentials v1.17.7 h1:WJd+ubWKoBeRh7A5iNMnxEOs982S github.com/aws/aws-sdk-go-v2/credentials v1.17.7/go.mod h1:UQi7LMR0Vhvs+44w5ec8Q+VS+cd10cjwgHwiVkE0YGU= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.3 h1:p+y7FvkK2dxS+FEwRIDHDe//ZX+jDhP8HHE50ppj4iI= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.3/go.mod h1:/fYB+FZbDlwlAiynK9KDXlzZl3ANI9JkD0Uhz5FjNT4= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.3 h1:ifbIbHZyGl1alsAhPIYsHOg5MuApgqOvVeI8wIugXfs= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.3/go.mod h1:oQZXg3c6SNeY6OZrDY+xHcF4VGIEoNotX2B4PrDeoJI= github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 h1:pI7Bzt0BJtYA0N/JEC6B8fJ4RBrEMi1LBrkMdFYNSnQ= github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17/go.mod h1:Dh5zzJYMtxfIjYW+/evjQ8uj2OyR/ve2KROHGHlSFqE= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.3 h1:Qvodo9gHG9F3E8SfYOspPeBt0bjSbsevK8WhRAUHcoY= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.3/go.mod h1:vCKrdLXtybdf/uQd/YfVR2r5pcbNuEYKzMQpcxmeSJw= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 h1:Mqr/V5gvrhA2gvgnF42Zh5iMiQNcOYthFYwCyrnuWlc= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17/go.mod h1:aLJpZlCmjE+V+KtN1q1uyZkfnUWpQGpbsn89XPKyzfU= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 h1:hT8rVHwugYE2lEfdFE0QWVo81lF7jMrYJVDWI+f+VxU= @@ -44,8 +38,6 @@ github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.2 h1:pi0Skl6mNl2w8qWZXcdOyg19 github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.2/go.mod h1:JYzLoEVeLXk+L4tn1+rrkfhkxl6mLDEVaDSvGq9og90= github.com/aws/aws-sdk-go-v2/service/sts v1.28.4 h1:Ppup1nVNAOWbBOrcoOxaxPeEnSFB2RnnQdguhXpmeQk= github.com/aws/aws-sdk-go-v2/service/sts v1.28.4/go.mod h1:+K1rNPVyGxkRuv9NNiaZ4YhBFuyw2MMA9SlIJ1Zlpz8= -github.com/aws/smithy-go v1.20.1 h1:4SZlSlMr36UEqC7XOyRVb27XMeZubNcBNN+9IgEPIQw= -github.com/aws/smithy-go v1.20.1/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E= github.com/aws/smithy-go v1.20.4 h1:2HK1zBdPgRbjFOHlfeQZfpC4r72MOb9bZkiFwggKO+4= github.com/aws/smithy-go v1.20.4/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= @@ -119,11 +111,8 @@ github.com/knadh/koanf/providers/file v0.1.0 h1:fs6U7nrV58d3CFAFh8VTde8TM262ObYf github.com/knadh/koanf/providers/file v0.1.0/go.mod h1:rjJ/nHQl64iYCtAW2QQnF0eSmDEX/YZ/eNFj5yR6BvA= github.com/knadh/koanf/v2 v2.1.1 h1:/R8eXqasSTsmDCsAyYj+81Wteg8AqrV9CP6gvsTsOmM= github.com/knadh/koanf/v2 v2.1.1/go.mod h1:4mnTRbZCK+ALuBXHZMjDfG9y714L7TykVnZkXbMU3Es= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= @@ -146,7 +135,6 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= -github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/profile v1.4.0/go.mod h1:NWz/XGvpEW1FyYQ7fCx4dqYBLlfTcE+A9FLAkNKqjFE= @@ -164,11 +152,8 @@ github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= github.com/redis/go-redis/v9 v9.0.5 h1:CuQcn5HIEeK7BgElubPP8CGtE0KakrnbBSTLjathl5o= github.com/redis/go-redis/v9 v9.0.5/go.mod h1:WqMKv5vnQbRuZstUwxQI195wHy+t4PuXDOjzMvcuQHk= -github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/sashabaranov/go-openai v1.24.0 h1:4H4Pg8Bl2RH/YSnU8DYumZbuHnnkfioor/dtNlB20D4= -github.com/sashabaranov/go-openai v1.24.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/sashabaranov/go-openai v1.26.3 h1:Tjnh4rcvsSU68f66r05mys+Zou4vo4qyvkne6AIRJPI= github.com/sashabaranov/go-openai v1.26.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= From 8e07ef4fdcc769e57507545bd0fe53856627e8f3 Mon Sep 17 00:00:00 2001 From: Lei Lei Date: Wed, 30 Oct 2024 13:40:06 +0800 Subject: [PATCH 28/51] add gpt-4o latest model. --- internal/manager/route.go | 4 ++++ internal/provider/azure/cost.go | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/internal/manager/route.go b/internal/manager/route.go index 6a2d07b..904f776 100644 --- a/internal/manager/route.go +++ b/internal/manager/route.go @@ -101,6 +101,7 @@ func checkModelValidity(provider, model string) bool { var ( azureSupportedModels = []string{ + "gpt-4o-2024-08-26", "gpt-4o-2024-05-13", "gpt-4o", "gpt-4o-mini", @@ -128,6 +129,7 @@ var ( } openaiSupportedModels = []string{ + "gpt-4o-2024-08-16", "gpt-4o-2024-05-13", "gpt-4o", "gpt-4o-mini", @@ -158,6 +160,7 @@ var ( } supportedModels = []string{ + "gpt-4o-2024-08-16", "gpt-4o-2024-05-13", "gpt-4o", "gpt-4o-mini", @@ -212,6 +215,7 @@ var ( "gpt-35-turbo-0613", "gpt-35-turbo-16k", "gpt-35-turbo-16k-0613", + "gpt-4o-2024-08-16", "gpt-4o-2024-05-13", "gpt-4o", "gpt-4o-mini", diff --git a/internal/provider/azure/cost.go b/internal/provider/azure/cost.go index 1b7a8ed..eebd64a 100644 --- a/internal/provider/azure/cost.go +++ b/internal/provider/azure/cost.go @@ -9,8 +9,11 @@ import ( ) var AzureOpenAiPerThousandTokenCost = map[string]map[string]float64{ + // updated according to this link: + // https://azure.microsoft.com/en-gb/pricing/details/cognitive-services/openai-service/ "prompt": { - "gpt-4o": 0.005, + "gpt-4o": 0.0025, + "gpt-4o-2024-08-16": 0.0025, "gpt-4o-2024-05-13": 0.005, "gpt-4-turbo": 0.01, "gpt-4-turbo-2024-04-09": 0.01, @@ -28,7 +31,8 @@ var AzureOpenAiPerThousandTokenCost = map[string]map[string]float64{ "text-embedding-3-small": 0.00002, }, "completion": { - "gpt-4o": 0.015, + "gpt-4o": 0.01, + "gpt-4o-2024-08-16": 0.01, "gpt-4o-2024-05-13": 0.015, "gpt-4-turbo": 0.03, "gpt-4-turbo-2024-04-09": 0.03, From 2ace998c97d58c7265a8d6ef34211dd046d9c7fd Mon Sep 17 00:00:00 2001 From: Lei Lei Date: Wed, 30 Oct 2024 13:40:27 +0800 Subject: [PATCH 29/51] upgrade dependency of goopenai to support structured output. --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index b099bce..280a469 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/mattn/go-colorable v0.1.13 github.com/pkoukk/tiktoken-go v0.1.7 github.com/redis/go-redis/v9 v9.0.5 - github.com/sashabaranov/go-openai v1.26.3 + github.com/sashabaranov/go-openai v1.32.5 github.com/stretchr/testify v1.8.4 github.com/tidwall/gjson v1.17.0 go.uber.org/zap v1.24.0 diff --git a/go.sum b/go.sum index aaf0e9b..faa3b51 100644 --- a/go.sum +++ b/go.sum @@ -171,6 +171,8 @@ github.com/sashabaranov/go-openai v1.24.0 h1:4H4Pg8Bl2RH/YSnU8DYumZbuHnnkfioor/d github.com/sashabaranov/go-openai v1.24.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/sashabaranov/go-openai v1.26.3 h1:Tjnh4rcvsSU68f66r05mys+Zou4vo4qyvkne6AIRJPI= github.com/sashabaranov/go-openai v1.26.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= +github.com/sashabaranov/go-openai v1.32.5 h1:/eNVa8KzlE7mJdKPZDj6886MUzZQjoVHyn0sLvIt5qA= +github.com/sashabaranov/go-openai v1.32.5/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= From a4baa74179b799c0fd42f541650b7a1725430345 Mon Sep 17 00:00:00 2001 From: Lei Lei Date: Thu, 31 Oct 2024 23:04:37 +0800 Subject: [PATCH 30/51] revert changes to gpt-4o cost. --- internal/provider/azure/cost.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/provider/azure/cost.go b/internal/provider/azure/cost.go index eebd64a..bec7eb7 100644 --- a/internal/provider/azure/cost.go +++ b/internal/provider/azure/cost.go @@ -12,7 +12,7 @@ var AzureOpenAiPerThousandTokenCost = map[string]map[string]float64{ // updated according to this link: // https://azure.microsoft.com/en-gb/pricing/details/cognitive-services/openai-service/ "prompt": { - "gpt-4o": 0.0025, + "gpt-4o": 0.005, "gpt-4o-2024-08-16": 0.0025, "gpt-4o-2024-05-13": 0.005, "gpt-4-turbo": 0.01, @@ -31,7 +31,7 @@ var AzureOpenAiPerThousandTokenCost = map[string]map[string]float64{ "text-embedding-3-small": 0.00002, }, "completion": { - "gpt-4o": 0.01, + "gpt-4o": 0.015, "gpt-4o-2024-08-16": 0.01, "gpt-4o-2024-05-13": 0.015, "gpt-4-turbo": 0.03, From a0f7edbf049c38218aecb402936bcb0b45fe01f7 Mon Sep 17 00:00:00 2001 From: Lei Lei Date: Fri, 1 Nov 2024 18:56:32 +0800 Subject: [PATCH 31/51] fix model version number. --- internal/manager/route.go | 6 +++--- internal/provider/azure/cost.go | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/manager/route.go b/internal/manager/route.go index 904f776..0d96127 100644 --- a/internal/manager/route.go +++ b/internal/manager/route.go @@ -129,7 +129,7 @@ var ( } openaiSupportedModels = []string{ - "gpt-4o-2024-08-16", + "gpt-4o-2024-08-06", "gpt-4o-2024-05-13", "gpt-4o", "gpt-4o-mini", @@ -160,7 +160,7 @@ var ( } supportedModels = []string{ - "gpt-4o-2024-08-16", + "gpt-4o-2024-08-06", "gpt-4o-2024-05-13", "gpt-4o", "gpt-4o-mini", @@ -215,7 +215,7 @@ var ( "gpt-35-turbo-0613", "gpt-35-turbo-16k", "gpt-35-turbo-16k-0613", - "gpt-4o-2024-08-16", + "gpt-4o-2024-08-06", "gpt-4o-2024-05-13", "gpt-4o", "gpt-4o-mini", diff --git a/internal/provider/azure/cost.go b/internal/provider/azure/cost.go index bec7eb7..2ed5556 100644 --- a/internal/provider/azure/cost.go +++ b/internal/provider/azure/cost.go @@ -13,7 +13,7 @@ var AzureOpenAiPerThousandTokenCost = map[string]map[string]float64{ // https://azure.microsoft.com/en-gb/pricing/details/cognitive-services/openai-service/ "prompt": { "gpt-4o": 0.005, - "gpt-4o-2024-08-16": 0.0025, + "gpt-4o-2024-08-06": 0.0025, "gpt-4o-2024-05-13": 0.005, "gpt-4-turbo": 0.01, "gpt-4-turbo-2024-04-09": 0.01, @@ -32,7 +32,7 @@ var AzureOpenAiPerThousandTokenCost = map[string]map[string]float64{ }, "completion": { "gpt-4o": 0.015, - "gpt-4o-2024-08-16": 0.01, + "gpt-4o-2024-08-06": 0.01, "gpt-4o-2024-05-13": 0.015, "gpt-4-turbo": 0.03, "gpt-4-turbo-2024-04-09": 0.03, From 4796a61a2c9036bd35ac5f1760a235c5a5084fa2 Mon Sep 17 00:00:00 2001 From: Andrew Rothstein Date: Mon, 4 Nov 2024 21:44:36 +0000 Subject: [PATCH 32/51] go:1.23.2 setup-go@5 checkout@v4 docker/login-action@v3 docker/setup-buildx-action@v3 docker/metadata-action@v5 docker/build-push-action@v6 alpine:3.20 for upstream --- .github/workflows/release.yml | 24 ++++++++++++------------ Dockerfile.dev | 4 ++-- Dockerfile.prod | 4 ++-- docker-compose.yml | 6 +++--- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 55277ee..5884f46 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,13 +10,13 @@ jobs: runs-on: macos-latest steps: - name: Install Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: - go-version: 1.22.x + go-version: '^1.23.2' check-latest: true - name: Check Out Repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install the Apple certificate and provisioning profile env: @@ -58,27 +58,27 @@ jobs: runs-on: ubuntu-latest steps: - name: Install Go - uses: actions/setup-go@v3 + uses: actions/setup-go@v5 with: - go-version: 1.22.x + go-version: '^1.23.2' check-latest: true - name: Check Out Repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Login to Docker Hub - uses: docker/login-action@v1 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_HUB_USERNAME }} password: ${{ secrets.DOCKER_HUB_PASSWORD }} - name: Install Buildx id: buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Docker meta id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 with: images: luyuanxin1995/bricksllm tags: | @@ -88,7 +88,7 @@ jobs: - name: Docker meta id: meta-datadog - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 with: images: luyuanxin1995/bricksllm-datadog tags: | @@ -97,7 +97,7 @@ jobs: type=semver,pattern={{major}} - name: Build and push - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v6 with: context: ./ file: ./Dockerfile.prod @@ -107,7 +107,7 @@ jobs: tags: ${{ steps.meta.outputs.tags }} - name: Build and push - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v6 with: context: ./ file: ./Dockerfile.datadog diff --git a/Dockerfile.dev b/Dockerfile.dev index a0e5954..33b3f4d 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -1,4 +1,4 @@ -FROM golang:1.22.1 AS build +FROM golang:1.23.2 AS build ENV CGO_ENABLED=0 ENV GOOS=linux @@ -6,7 +6,7 @@ WORKDIR /go/src/github.com/bricks-cloud/bricksllm/ COPY . /go/src/github.com/bricks-cloud/bricksllm/ RUN go build -ldflags="-s -w" -o ./bin/bricksllm ./cmd/bricksllm/main.go -FROM alpine:3.17 +FROM alpine:3.20 RUN apk --no-cache add ca-certificates WORKDIR /usr/bin COPY --from=build /go/src/github.com/bricks-cloud/bricksllm/bin /go/bin diff --git a/Dockerfile.prod b/Dockerfile.prod index 64e16de..1eb16a4 100644 --- a/Dockerfile.prod +++ b/Dockerfile.prod @@ -1,4 +1,4 @@ -FROM golang:1.22.1 AS build +FROM golang:1.23.2 AS build ENV CGO_ENABLED=0 ENV GOOS=linux @@ -6,7 +6,7 @@ WORKDIR /go/src/github.com/bricks-cloud/bricksllm/ COPY . /go/src/github.com/bricks-cloud/bricksllm/ RUN go build -ldflags="-s -w" -o ./bin/bricksllm ./cmd/bricksllm/main.go -FROM alpine:3.17 +FROM alpine:3.20 RUN apk --no-cache add ca-certificates WORKDIR /usr/bin COPY --from=build /go/src/github.com/bricks-cloud/bricksllm/bin /go/bin diff --git a/docker-compose.yml b/docker-compose.yml index a044fa1..a29949a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,7 +5,7 @@ services: ports: - '6379:6379' command: redis-server --save 20 1 --loglevel warning --requirepass eYVX7EwVmmxKPCDmwMtyKVge8oLd2t81 - volumes: + volumes: - redis:/data postgresql: image: postgres:14.1-alpine @@ -15,10 +15,10 @@ services: - POSTGRES_PASSWORD=postgres ports: - '5432:5432' - volumes: + volumes: - postgresql:/var/lib/postgresql/data # bricksllm: - # depends_on: + # depends_on: # - redis # - postgresql # image: luyuanxin1995/bricksllm From b70ed3388c72adf1446212f68795f8e26f12d570 Mon Sep 17 00:00:00 2001 From: Lei Lei Date: Thu, 7 Nov 2024 10:33:08 +0800 Subject: [PATCH 33/51] add fix --- go.mod | 1 + go.sum | 5 +++-- internal/server/web/proxy/middleware.go | 9 ++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 15bf54c..bab0c2e 100644 --- a/go.mod +++ b/go.mod @@ -21,6 +21,7 @@ require ( github.com/sashabaranov/go-openai v1.32.5 github.com/stretchr/testify v1.8.4 github.com/tidwall/gjson v1.17.0 + github.com/tidwall/sjson v1.2.5 go.uber.org/zap v1.24.0 ) diff --git a/go.sum b/go.sum index 324f3bc..3607d7d 100644 --- a/go.sum +++ b/go.sum @@ -154,8 +154,6 @@ github.com/redis/go-redis/v9 v9.0.5 h1:CuQcn5HIEeK7BgElubPP8CGtE0KakrnbBSTLjathl github.com/redis/go-redis/v9 v9.0.5/go.mod h1:WqMKv5vnQbRuZstUwxQI195wHy+t4PuXDOjzMvcuQHk= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/sashabaranov/go-openai v1.26.3 h1:Tjnh4rcvsSU68f66r05mys+Zou4vo4qyvkne6AIRJPI= -github.com/sashabaranov/go-openai v1.26.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/sashabaranov/go-openai v1.32.5 h1:/eNVa8KzlE7mJdKPZDj6886MUzZQjoVHyn0sLvIt5qA= github.com/sashabaranov/go-openai v1.32.5/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= @@ -174,6 +172,7 @@ github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/gjson v1.17.0 h1:/Jocvlh98kcTfpN2+JzGQWQcqrPQwDrVEMApx/M5ZwM= github.com/tidwall/gjson v1.17.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= @@ -181,6 +180,8 @@ github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JT github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go index 30b9eb0..9ce91ab 100644 --- a/internal/server/web/proxy/middleware.go +++ b/internal/server/web/proxy/middleware.go @@ -23,6 +23,7 @@ import ( "github.com/bricks-cloud/bricksllm/internal/util" "github.com/gin-gonic/gin" "github.com/tidwall/gjson" + "github.com/tidwall/sjson" "go.uber.org/zap" goopenai "github.com/sashabaranov/go-openai" @@ -763,7 +764,13 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag if c.FullPath() == "/api/providers/openai/v1/chat/completions" { ccr := &goopenai.ChatCompletionRequest{} - err = json.Unmarshal(body, ccr) + // this is a hack around an open issue in go-openai. + // https://github.com/sashabaranov/go-openai/issues/884 + cleaned, err := sjson.Delete(string(body), "response_format.json_schema") + if err != nil { + logWithCid.Warn("removing response_format.json_schema", zap.Error(err)) + } + err = json.Unmarshal([]byte(cleaned), ccr) if err != nil { logError(logWithCid, "error when unmarshalling chat completion request", prod, err) return From 0643668eed8ccba2c74a19f9cb6bb3af39f1e8b8 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sat, 9 Nov 2024 18:17:04 -0800 Subject: [PATCH 34/51] add new env variables for enabling redis tls --- cmd/bricksllm/.env | 2 ++ cmd/bricksllm/config_local.json | 2 ++ cmd/bricksllm/main.go | 12 +++++++++--- internal/config/config.go | 1 + 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/cmd/bricksllm/.env b/cmd/bricksllm/.env index 09ae4ee..f24859a 100644 --- a/cmd/bricksllm/.env +++ b/cmd/bricksllm/.env @@ -7,6 +7,8 @@ POSTGRESQL_PASSWORD= POSTGRESQL_SSL_MODE=disable POSTGRESQL_PORT=5432 REDIS_HOSTS=localhost +REDIS_ENABLE_TLS=false +REDIS_INSECURE_SKIP_VERIFY=false REDIS_PORT=6379 REDIS_USERNAME= REDIS_PASSWORD= diff --git a/cmd/bricksllm/config_local.json b/cmd/bricksllm/config_local.json index ae92a01..b1ec5bd 100644 --- a/cmd/bricksllm/config_local.json +++ b/cmd/bricksllm/config_local.json @@ -7,6 +7,8 @@ "postgresql_port": "5432", "redis_hosts": "localhost", "redis_port": "6379", + "redis_enable_tls": false, + "redis_insecure_skip_verify": false, "redis_username": "", "redis_password": "", "redis_read_time_out": "1s", diff --git a/cmd/bricksllm/main.go b/cmd/bricksllm/main.go index 84c1170..46166ce 100644 --- a/cmd/bricksllm/main.go +++ b/cmd/bricksllm/main.go @@ -175,14 +175,20 @@ func main() { rMemStore.Listen() defaultRedisOption := func(cfg *config.Config, dbIndex int) *redis.Options { - return &redis.Options{ + + options := &redis.Options{ Addr: fmt.Sprintf("%s:%s", cfg.RedisHosts, cfg.RedisPort), Password: cfg.RedisPassword, DB: cfg.RedisDBStartIndex + dbIndex, - TLSConfig: &tls.Config{ + } + + if cfg.RedisEnableTLS { + options.TLSConfig = &tls.Config{ InsecureSkipVerify: cfg.RedisInsecureSkipVerify, - }, + } } + + return options } rateLimitRedisCache := redis.NewClient(defaultRedisOption(cfg, 0)) diff --git a/internal/config/config.go b/internal/config/config.go index fdc3028..b1d3f49 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -25,6 +25,7 @@ type Config struct { RedisPort string `koanf:"redis_port" env:"REDIS_PORT" envDefault:"6379"` RedisUsername string `koanf:"redis_username" env:"REDIS_USERNAME"` RedisPassword string `koanf:"redis_password" env:"REDIS_PASSWORD"` + RedisEnableTLS bool `koanf:"redis_enable_tls" env:"REDIS_ENABLE_TLS" envDefault:"false"` RedisInsecureSkipVerify bool `koanf:"redis_insecure_skip_verify" env:"REDIS_INSECURE_SKIP_VERIFY" envDefault:"false"` RedisDBStartIndex int `koanf:"redis_db_start_index" env:"REDIS_DB_START_INDEX" envDefault:"0"` RedisReadTimeout time.Duration `koanf:"redis_read_time_out" env:"REDIS_READ_TIME_OUT" envDefault:"1s"` From 9afbfb34eee156731d3c6de3166fdb720b177803 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sat, 9 Nov 2024 18:29:36 -0800 Subject: [PATCH 35/51] update cost --- internal/provider/anthropic/cost.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/provider/anthropic/cost.go b/internal/provider/anthropic/cost.go index 2fb0a58..4ec546b 100644 --- a/internal/provider/anthropic/cost.go +++ b/internal/provider/anthropic/cost.go @@ -13,6 +13,7 @@ var AnthropicPerMillionTokenCost = map[string]map[string]float64{ "claude-3-opus": 15, "claude-3-sonnet": 3, "claude-3.5-sonnet": 3, + "claude-3.5-haiku": 1, "claude-3-haiku": 0.25, }, "completion": { @@ -21,6 +22,7 @@ var AnthropicPerMillionTokenCost = map[string]map[string]float64{ "claude-3-opus": 75, "claude-3-sonnet": 15, "claude-3.5-sonnet": 15, + "claude-3.5-haiku": 5, "claude-3-haiku": 1.25, }, } @@ -77,8 +79,10 @@ func selectModel(model string) string { return "claude-3-opus" } else if strings.HasPrefix(model, "claude-3-sonnet") { return "claude-3-sonnet" - } else if strings.HasPrefix(model, "claude-3.5-sonnet") { + } else if strings.HasPrefix(model, "claude-3.5-sonnet") || strings.HasPrefix(model, "claude-3-5-sonnet") { return "claude-3.5-sonnet" + } else if strings.HasPrefix(model, "claude-3.5-haiku") || strings.HasPrefix(model, "claude-3-5-haiku") { + return "claude-3.5-haiku" } else if strings.HasPrefix(model, "claude-3-haiku") { return "claude-3-haiku" } else if strings.HasPrefix(model, "claude-instant") { From 78d1f24d87dc9768c5423af64d6e0e48b39e11c2 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sat, 9 Nov 2024 18:34:09 -0800 Subject: [PATCH 36/51] update CHANGELOG --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd6f57c..0116c18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## 1.38.0 - 2024-11-09 +### Added +- Added support for `claude-3-5-haiku` +- Added support for Redis TLS config +- Added support for `gpt-4o-2024-08-06` + ## 1.37.0 - 2024-10-23 ### Added - Added request level timeout with HTTP header `x-request-timeout` From d3e7cc8ec680333db8d83c1a5413eec2569cc8bc Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Fri, 15 Nov 2024 22:24:14 -0800 Subject: [PATCH 37/51] add encryption --- .gitignore | 3 +- CHANGELOG.md | 8 ++ cmd/bricksllm/.env | 2 - cmd/bricksllm/config_local.json | 2 - cmd/bricksllm/main.go | 14 +-- internal/authenticator/authenticator.go | 50 ++++++++-- internal/config/config.go | 11 ++- internal/encryptor/encryptor.go | 120 ++++++++++++++++++++++++ internal/manager/provider_setting.go | 57 ++++++++++- 9 files changed, 237 insertions(+), 30 deletions(-) create mode 100644 internal/encryptor/encryptor.go diff --git a/.gitignore b/.gitignore index ba36687..0f708b6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ release_notes.md target .DS_STORE -.vscode/launch.json \ No newline at end of file +.vscode/launch.json +.env \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 0116c18..7b89999 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## 1.39.0 - 2024-11-15 +### Added +- Added encryption integration + +### Changed +- Removed support for Redis TLS config + + ## 1.38.0 - 2024-11-09 ### Added - Added support for `claude-3-5-haiku` diff --git a/cmd/bricksllm/.env b/cmd/bricksllm/.env index f24859a..09ae4ee 100644 --- a/cmd/bricksllm/.env +++ b/cmd/bricksllm/.env @@ -7,8 +7,6 @@ POSTGRESQL_PASSWORD= POSTGRESQL_SSL_MODE=disable POSTGRESQL_PORT=5432 REDIS_HOSTS=localhost -REDIS_ENABLE_TLS=false -REDIS_INSECURE_SKIP_VERIFY=false REDIS_PORT=6379 REDIS_USERNAME= REDIS_PASSWORD= diff --git a/cmd/bricksllm/config_local.json b/cmd/bricksllm/config_local.json index b1ec5bd..ae92a01 100644 --- a/cmd/bricksllm/config_local.json +++ b/cmd/bricksllm/config_local.json @@ -7,8 +7,6 @@ "postgresql_port": "5432", "redis_hosts": "localhost", "redis_port": "6379", - "redis_enable_tls": false, - "redis_insecure_skip_verify": false, "redis_username": "", "redis_password": "", "redis_read_time_out": "1s", diff --git a/cmd/bricksllm/main.go b/cmd/bricksllm/main.go index 46166ce..e5d2b7a 100644 --- a/cmd/bricksllm/main.go +++ b/cmd/bricksllm/main.go @@ -2,7 +2,6 @@ package main import ( "context" - "crypto/tls" "flag" "fmt" "os" @@ -13,6 +12,7 @@ import ( auth "github.com/bricks-cloud/bricksllm/internal/authenticator" "github.com/bricks-cloud/bricksllm/internal/cache" "github.com/bricks-cloud/bricksllm/internal/config" + "github.com/bricks-cloud/bricksllm/internal/encryptor" "github.com/bricks-cloud/bricksllm/internal/logger/zap" "github.com/bricks-cloud/bricksllm/internal/manager" "github.com/bricks-cloud/bricksllm/internal/message" @@ -182,12 +182,6 @@ func main() { DB: cfg.RedisDBStartIndex + dbIndex, } - if cfg.RedisEnableTLS { - options.TLSConfig = &tls.Config{ - InsecureSkipVerify: cfg.RedisInsecureSkipVerify, - } - } - return options } @@ -292,9 +286,11 @@ func main() { psCache := redisStorage.NewProviderSettingsCache(providerSettingsRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout) keysCache := redisStorage.NewKeysCache(keysRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout) + encryptor := encryptor.NewEncryptor(cfg.DecryptionEndpoint, cfg.EncryptionEndpoint, cfg.EnableEncrytion, cfg.EncryptionTimeout) + m := manager.NewManager(store, costLimitCache, rateLimitCache, accessCache, keysCache) krm := manager.NewReportingManager(costStorage, store, store) - psm := manager.NewProviderSettingsManager(store, psCache) + psm := manager.NewProviderSettingsManager(store, psCache, encryptor) cpm := manager.NewCustomProvidersManager(store, cpMemStore) rm := manager.NewRouteManager(store, store, rMemStore, psm) pm := manager.NewPolicyManager(store, rMemStore) @@ -332,7 +328,7 @@ func main() { rec := recorder.NewRecorder(costStorage, userCostStorage, costLimitCache, userCostLimitCache, ce, store) rlm := manager.NewRateLimitManager(rateLimitCache, userRateLimitCache) - a := auth.NewAuthenticator(psm, m, rm, store) + a := auth.NewAuthenticator(psm, m, rm, store, encryptor) c := cache.NewCache(apiCache) diff --git a/internal/authenticator/authenticator.go b/internal/authenticator/authenticator.go index 38c53b8..68959d3 100644 --- a/internal/authenticator/authenticator.go +++ b/internal/authenticator/authenticator.go @@ -5,6 +5,7 @@ import ( "fmt" "math/rand" "net/http" + "strconv" "strings" internal_errors "github.com/bricks-cloud/bricksllm/internal/errors" @@ -34,19 +35,26 @@ type keyStorage interface { GetKeyByHash(hash string) (*key.ResponseKey, error) } +type Decryptor interface { + Decrypt(input string, headers map[string]string) (string, error) + Enabled() bool +} + type Authenticator struct { - psm providerSettingsManager - kc keysCache - rm routesManager - ks keyStorage + psm providerSettingsManager + kc keysCache + rm routesManager + ks keyStorage + decryptor Decryptor } -func NewAuthenticator(psm providerSettingsManager, kc keysCache, rm routesManager, ks keyStorage) *Authenticator { +func NewAuthenticator(psm providerSettingsManager, kc keysCache, rm routesManager, ks keyStorage, decryptor Decryptor) *Authenticator { return &Authenticator{ - psm: psm, - kc: kc, - rm: rm, - ks: ks, + psm: psm, + kc: kc, + rm: rm, + ks: ks, + decryptor: decryptor, } } @@ -268,6 +276,30 @@ func (a *Authenticator) AuthenticateHttpRequest(req *http.Request) (*key.Respons used = selected[rand.Intn(len(selected))] } + if a.decryptor.Enabled() { + encryptedParam := "" + if used.Provider == "amazon" { + encryptedParam = used.Setting["awsSecretAccessKey"] + } else if len(used.Setting["apikey"]) != 0 { + encryptedParam = used.Setting["apikey"] + } + + if len(encryptedParam) != 0 { + decryptedSecret, err := a.decryptor.Decrypt(encryptedParam, map[string]string{"X-UPDATED-AT": strconv.FormatInt(used.UpdatedAt, 10)}) + if err == nil { + if used.Provider == "amazon" { + used.Setting["awsSecretAccessKey"] = decryptedSecret + } else { + used.Setting["apikey"] = decryptedSecret + } + } + + if err != nil { + fmt.Println(fmt.Printf("error when encrypting %v", err)) + } + } + } + err := rewriteHttpAuthHeader(req, used) if err != nil { return nil, nil, err diff --git a/internal/config/config.go b/internal/config/config.go index b1d3f49..ed55462 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1,6 +1,7 @@ package config import ( + "errors" "os" "path/filepath" "time" @@ -25,8 +26,6 @@ type Config struct { RedisPort string `koanf:"redis_port" env:"REDIS_PORT" envDefault:"6379"` RedisUsername string `koanf:"redis_username" env:"REDIS_USERNAME"` RedisPassword string `koanf:"redis_password" env:"REDIS_PASSWORD"` - RedisEnableTLS bool `koanf:"redis_enable_tls" env:"REDIS_ENABLE_TLS" envDefault:"false"` - RedisInsecureSkipVerify bool `koanf:"redis_insecure_skip_verify" env:"REDIS_INSECURE_SKIP_VERIFY" envDefault:"false"` RedisDBStartIndex int `koanf:"redis_db_start_index" env:"REDIS_DB_START_INDEX" envDefault:"0"` RedisReadTimeout time.Duration `koanf:"redis_read_time_out" env:"REDIS_READ_TIME_OUT" envDefault:"1s"` RedisWriteTimeout time.Duration `koanf:"redis_write_time_out" env:"REDIS_WRITE_TIME_OUT" envDefault:"500ms"` @@ -47,6 +46,10 @@ type Config struct { AmazonRequestTimeout time.Duration `koanf:"amazon_request_timeout" env:"AMAZON_REQUEST_TIMEOUT" envDefault:"5s"` AmazonConnectionTimeout time.Duration `koanf:"amazon_connection_timeout" env:"AMAZON_CONNECTION_TIMEOUT" envDefault:"10s"` RemoveUserAgent bool `koanf:"remove_user_agent" env:"REMOVE_USER_AGENT" envDefault:"false"` + EnableEncrytion bool `koanf:"enable_encryption" env:"ENABLE_ENCRYPTION" envDefault:"false"` + EncryptionEndpoint string `koanf:"encryption_endpoint" env:"ENCRYPTION_ENDPOINT"` + DecryptionEndpoint string `koanf:"decryption_endpoint" env:"DECRYPTION_ENDPOINT"` + EncryptionTimeout time.Duration `koanf:"encryption_timeout" env:"ENCRYPTION_TIMEOUT" envDefault:"5s"` } func prepareDotEnv(envFilePath string) error { @@ -82,6 +85,10 @@ func LoadConfig(log *zap.Logger) (*Config, error) { return nil, err } + if cfg.EnableEncrytion && len(cfg.EncryptionEndpoint) == 0 { + return nil, errors.New("encryption endpoint cannot be empty") + } + err = prepareDotEnv(".env") if err != nil { log.Sugar().Infof("error loading config from .env file: %v", err) diff --git a/internal/encryptor/encryptor.go b/internal/encryptor/encryptor.go new file mode 100644 index 0000000..eef5256 --- /dev/null +++ b/internal/encryptor/encryptor.go @@ -0,0 +1,120 @@ +package encryptor + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "time" +) + +type Encryptor struct { + decryptionURL string + encryptionURL string + enabled bool + client http.Client + timeout time.Duration +} + +type Secret struct { + Secret string `json:"secret"` +} + +type EncryptionResponse struct { + EncryptedSecret string `json:"encryptedSecret"` +} + +type DecryptionResponse struct { + DecryptedSecret string `json:"decryptedSecret"` +} + +func NewEncryptor(decryptionURL string, encryptionURL string, enabled bool, timeout time.Duration) Encryptor { + return Encryptor{ + decryptionURL: decryptionURL, + encryptionURL: encryptionURL, + client: http.Client{}, + enabled: enabled, + timeout: timeout, + } +} + +func (e Encryptor) Encrypt(input string, headers map[string]string) (string, error) { + data, err := json.Marshal(Secret{ + Secret: input, + }) + if err != nil { + return "", err + } + + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodPost, e.encryptionURL, bytes.NewBuffer(data)) + if err != nil { + return "", err + } + + for header, value := range headers { + req.Header.Add(header, value) + } + + res, err := e.client.Do(req) + if err != nil { + return "", err + } + + bytes, err := io.ReadAll(res.Body) + if err != nil { + return "", err + } + + encryptionResponse := EncryptionResponse{} + err = json.Unmarshal(bytes, &encryptionResponse) + if err != nil { + return "", err + } + + return encryptionResponse.EncryptedSecret, nil +} + +func (e Encryptor) Enabled() bool { + return e.enabled && len(e.decryptionURL) != 0 && len(e.encryptionURL) != 0 +} + +func (e Encryptor) Decrypt(input string, headers map[string]string) (string, error) { + data, err := json.Marshal(Secret{ + Secret: input, + }) + if err != nil { + return "", err + } + + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodPost, e.decryptionURL, bytes.NewBuffer(data)) + if err != nil { + return "", err + } + + for header, value := range headers { + req.Header.Add(header, value) + } + + res, err := e.client.Do(req) + if err != nil { + return "", err + } + + bytes, err := io.ReadAll(res.Body) + if err != nil { + return "", err + } + + decryptionSecret := DecryptionResponse{} + err = json.Unmarshal(bytes, &decryptionSecret) + if err != nil { + return "", err + } + + return decryptionSecret.DecryptedSecret, nil +} diff --git a/internal/manager/provider_setting.go b/internal/manager/provider_setting.go index 5b246d1..8ed9bca 100644 --- a/internal/manager/provider_setting.go +++ b/internal/manager/provider_setting.go @@ -3,6 +3,7 @@ package manager import ( "encoding/json" "fmt" + "strconv" "strings" "time" @@ -27,15 +28,22 @@ type ProviderSettingsCache interface { Delete(pid string) error } +type Encryptor interface { + Encrypt(input string, headers map[string]string) (string, error) + Enabled() bool +} + type ProviderSettingsManager struct { - Storage ProviderSettingsStorage - Cache ProviderSettingsCache + Storage ProviderSettingsStorage + Cache ProviderSettingsCache + Encryptor Encryptor } -func NewProviderSettingsManager(s ProviderSettingsStorage, cache ProviderSettingsCache) *ProviderSettingsManager { +func NewProviderSettingsManager(s ProviderSettingsStorage, cache ProviderSettingsCache, encryptor Encryptor) *ProviderSettingsManager { return &ProviderSettingsManager{ - Storage: s, - Cache: cache, + Storage: s, + Cache: cache, + Encryptor: encryptor, } } @@ -118,6 +126,27 @@ func (m *ProviderSettingsManager) validateSettings(providerName string, setting return nil } +func (m *ProviderSettingsManager) EncryptParams(updatedAt int64, provider string, params map[string]string) (map[string]string, error) { + if provider == "amazon" { + encryted, err := m.Encryptor.Encrypt(params["awsSecretAccessKey"], map[string]string{"X-UPDATED-AT": strconv.FormatInt(updatedAt, 10)}) + if err != nil { + return nil, err + } + + params["awsSecretAccessKey"] = encryted + + } else if provider == "openai" || provider == "anthropic" || provider == "deepinfra" || provider == "azure" { + encryted, err := m.Encryptor.Encrypt(params["apikey"], map[string]string{"X-UPDATED-AT": strconv.FormatInt(updatedAt, 10)}) + if err != nil { + return nil, err + } + + params["apikey"] = encryted + } + + return params, nil +} + func (m *ProviderSettingsManager) CreateSetting(setting *provider.Setting) (*provider.Setting, error) { if len(setting.Provider) == 0 { return nil, internal_errors.NewValidationError("provider field cannot be empty") @@ -131,6 +160,15 @@ func (m *ProviderSettingsManager) CreateSetting(setting *provider.Setting) (*pro setting.CreatedAt = time.Now().Unix() setting.UpdatedAt = time.Now().Unix() + if m.Encryptor.Enabled() { + params, err := m.EncryptParams(setting.UpdatedAt, setting.Provider, setting.Setting) + if err != nil { + return nil, err + } + + setting.Setting = params + } + return m.Storage.CreateProviderSetting(setting) } @@ -164,6 +202,15 @@ func (m *ProviderSettingsManager) UpdateSetting(id string, setting *provider.Upd telemetry.Incr("bricksllm.provider_settings_manager.update_setting.delete_cache_error", nil, 1) } + if m.Encryptor.Enabled() { + params, err := m.EncryptParams(existing.UpdatedAt, existing.Provider, setting.Setting) + if err != nil { + return nil, err + } + + setting.Setting = params + } + return m.Storage.UpdateProviderSetting(id, setting) } From c755d64f046d3f9c0a22e9c05e226b36fa5b8d2c Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Fri, 15 Nov 2024 23:14:34 -0800 Subject: [PATCH 38/51] add debug log --- internal/encryptor/encryptor.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/encryptor/encryptor.go b/internal/encryptor/encryptor.go index eef5256..4a05e7f 100644 --- a/internal/encryptor/encryptor.go +++ b/internal/encryptor/encryptor.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/json" + "fmt" "io" "net/http" "time" @@ -110,6 +111,8 @@ func (e Encryptor) Decrypt(input string, headers map[string]string) (string, err return "", err } + fmt.Println(string(bytes)) + decryptionSecret := DecryptionResponse{} err = json.Unmarshal(bytes, &decryptionSecret) if err != nil { From f2da1bb8894212027b7e5b74cd6084a6d8fb2fbc Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sun, 17 Nov 2024 14:50:14 -0800 Subject: [PATCH 39/51] add auth integration --- cmd/bricksllm/main.go | 5 +- go.mod | 33 ++++-- go.sum | 145 +++++++++++++++++++++--- internal/authenticator/authenticator.go | 4 - internal/config/config.go | 1 + internal/encryptor/encryptor.go | 21 ++-- 6 files changed, 175 insertions(+), 34 deletions(-) diff --git a/cmd/bricksllm/main.go b/cmd/bricksllm/main.go index e5d2b7a..66c337c 100644 --- a/cmd/bricksllm/main.go +++ b/cmd/bricksllm/main.go @@ -286,7 +286,10 @@ func main() { psCache := redisStorage.NewProviderSettingsCache(providerSettingsRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout) keysCache := redisStorage.NewKeysCache(keysRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout) - encryptor := encryptor.NewEncryptor(cfg.DecryptionEndpoint, cfg.EncryptionEndpoint, cfg.EnableEncrytion, cfg.EncryptionTimeout) + encryptor, err := encryptor.NewEncryptor(cfg.DecryptionEndpoint, cfg.EncryptionEndpoint, cfg.EnableEncrytion, cfg.EncryptionTimeout, cfg.Audience) + if cfg.EnableEncrytion && err != nil { + log.Sugar().Fatalf("error creating encryption client: %v", err) + } m := manager.NewManager(store, costLimitCache, rateLimitCache, accessCache, keysCache) krm := manager.NewReportingManager(costStorage, store, store) diff --git a/go.mod b/go.mod index bab0c2e..d917dea 100644 --- a/go.mod +++ b/go.mod @@ -19,17 +19,28 @@ require ( github.com/pkoukk/tiktoken-go v0.1.7 github.com/redis/go-redis/v9 v9.0.5 github.com/sashabaranov/go-openai v1.32.5 - github.com/stretchr/testify v1.8.4 + github.com/stretchr/testify v1.9.0 github.com/tidwall/gjson v1.17.0 github.com/tidwall/sjson v1.2.5 go.uber.org/zap v1.24.0 + google.golang.org/api v0.206.0 ) require ( + cloud.google.com/go/auth v0.10.2 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.5 // indirect + cloud.google.com/go/compute/metadata v0.5.2 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/google/s2a-go v0.1.8 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect + github.com/googleapis/gax-go/v2 v2.14.0 // indirect github.com/knadh/koanf/maps v0.1.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect @@ -37,7 +48,15 @@ require ( github.com/prometheus/client_model v0.5.0 // indirect github.com/prometheus/common v0.48.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect + go.opencensus.io v0.24.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect + go.opentelemetry.io/otel v1.29.0 // indirect + go.opentelemetry.io/otel/metric v1.29.0 // indirect + go.opentelemetry.io/otel/trace v1.29.0 // indirect go.uber.org/atomic v1.7.0 // indirect + golang.org/x/oauth2 v0.24.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/grpc v1.67.1 // indirect ) require ( @@ -57,7 +76,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/sts v1.28.4 // indirect github.com/aws/smithy-go v1.20.4 // indirect github.com/bytedance/sonic v1.9.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect @@ -86,10 +105,10 @@ require ( github.com/ugorji/go/codec v1.2.11 // indirect go.uber.org/multierr v1.10.0 // indirect golang.org/x/arch v0.3.0 // indirect - golang.org/x/crypto v0.18.0 // indirect - golang.org/x/net v0.20.0 // indirect - golang.org/x/sys v0.17.0 // indirect - golang.org/x/text v0.14.0 // indirect - google.golang.org/protobuf v1.33.0 // indirect + golang.org/x/crypto v0.29.0 // indirect + golang.org/x/net v0.31.0 // indirect + golang.org/x/sys v0.27.0 // indirect + golang.org/x/text v0.20.0 // indirect + google.golang.org/protobuf v1.35.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 3607d7d..4e61f2c 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,11 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go/auth v0.10.2 h1:oKF7rgBfSHdp/kuhXtqU/tNDr0mZqhYbEh+6SiqzkKo= +cloud.google.com/go/auth v0.10.2/go.mod h1:xxA5AqpDrvS+Gkmo9RqrGGRh6WSNKKOXhY3zNOr38tI= +cloud.google.com/go/auth/oauth2adapt v0.2.5 h1:2p29+dePqsCHPP1bqDJcKj4qxRyYCcbzKpFyKGt3MTk= +cloud.google.com/go/auth/oauth2adapt v0.2.5/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8= +cloud.google.com/go/compute/metadata v0.5.2 h1:UxK4uu/Tn+I3p2dYWTfiX4wva7aYlKixAHn3fyqngqo= +cloud.google.com/go/compute/metadata v0.5.2/go.mod h1:C66sj2AluDcIqakBq/M8lw8/ybHgOZqin2obFxa/E5k= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/DataDog/datadog-go/v5 v5.3.0 h1:2q2qjFOb3RwAZNU+ez27ZVDwErJv5/VpbBPprz7Z+s8= github.com/DataDog/datadog-go/v5 v5.3.0/go.mod h1:XRDJk1pTc00gm+ZDiBKsjh7oOOtJfYfglVCmFb8C2+Q= github.com/Microsoft/go-winio v0.5.0 h1:Elr9Wn+sGKPlkaBvwu4mTrxtmOp3F3yV9qhaHbXGjwU= @@ -55,11 +63,14 @@ github.com/caarlos0/env v3.5.0+incompatible h1:Yy0UN8o9Wtr/jGHZDpCBLpNrzcFLLM2yi github.com/caarlos0/env v3.5.0+incompatible/go.mod h1:tdCsowwCzMLdkqRYDlHpZCp2UooDD3MspDBjZ2AD02Y= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -68,8 +79,14 @@ github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/r github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= @@ -78,6 +95,11 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= @@ -90,12 +112,39 @@ github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 h1:TQcrn6Wq+sKGkpyPvppOz99zsM github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= +github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw= +github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= +github.com/googleapis/gax-go/v2 v2.14.0 h1:f+jMrjBPl+DL9nI4IQzLUxMq7XrAqFYB7hBPqMNIe8o= +github.com/googleapis/gax-go/v2 v2.14.0/go.mod h1:lhBCnjdLrWRaPvLWhmc8IS24m9mr07qSYnHncrgo+zk= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -144,6 +193,7 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSzKKE= @@ -159,8 +209,9 @@ github.com/sashabaranov/go-openai v1.32.5/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adO github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= @@ -170,8 +221,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/gjson v1.17.0 h1:/Jocvlh98kcTfpN2+JzGQWQcqrPQwDrVEMApx/M5ZwM= github.com/tidwall/gjson v1.17.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= @@ -187,6 +238,18 @@ github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2 github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= +go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0/go.mod h1:B9yO6b04uB80CzjedvewuqDhxJxi11s7/GtiGa8bAjI= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8= +go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw= +go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8= +go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc= +go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8= +go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4= +go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ= go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= @@ -201,21 +264,39 @@ golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc= -golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= +golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= +golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= +golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= +golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -224,22 +305,54 @@ golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= -golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg= +golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/api v0.206.0 h1:A27GClesCSheW5P2BymVHjpEeQ2XHH8DI8Srs2HI2L8= +google.golang.org/api v0.206.0/go.mod h1:BtB8bfjTYIrai3d8UyvPmV9REGgox7coh+ZRwm0b+W8= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -247,4 +360,6 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/internal/authenticator/authenticator.go b/internal/authenticator/authenticator.go index 68959d3..bec413c 100644 --- a/internal/authenticator/authenticator.go +++ b/internal/authenticator/authenticator.go @@ -293,10 +293,6 @@ func (a *Authenticator) AuthenticateHttpRequest(req *http.Request) (*key.Respons used.Setting["apikey"] = decryptedSecret } } - - if err != nil { - fmt.Println(fmt.Printf("error when encrypting %v", err)) - } } } diff --git a/internal/config/config.go b/internal/config/config.go index ed55462..7abedde 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -50,6 +50,7 @@ type Config struct { EncryptionEndpoint string `koanf:"encryption_endpoint" env:"ENCRYPTION_ENDPOINT"` DecryptionEndpoint string `koanf:"decryption_endpoint" env:"DECRYPTION_ENDPOINT"` EncryptionTimeout time.Duration `koanf:"encryption_timeout" env:"ENCRYPTION_TIMEOUT" envDefault:"5s"` + Audience string `koanf:"audience" env:"AUDIENCE"` } func prepareDotEnv(envFilePath string) error { diff --git a/internal/encryptor/encryptor.go b/internal/encryptor/encryptor.go index 4a05e7f..7f53d8b 100644 --- a/internal/encryptor/encryptor.go +++ b/internal/encryptor/encryptor.go @@ -4,17 +4,18 @@ import ( "bytes" "context" "encoding/json" - "fmt" "io" "net/http" "time" + + "google.golang.org/api/idtoken" ) type Encryptor struct { decryptionURL string encryptionURL string enabled bool - client http.Client + client *http.Client timeout time.Duration } @@ -30,14 +31,22 @@ type DecryptionResponse struct { DecryptedSecret string `json:"decryptedSecret"` } -func NewEncryptor(decryptionURL string, encryptionURL string, enabled bool, timeout time.Duration) Encryptor { +func NewEncryptor(decryptionURL string, encryptionURL string, enabled bool, timeout time.Duration, audience string) (Encryptor, error) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + client, err := idtoken.NewClient(ctx, audience) + if err != nil { + return Encryptor{}, err + } + return Encryptor{ decryptionURL: decryptionURL, encryptionURL: encryptionURL, - client: http.Client{}, enabled: enabled, timeout: timeout, - } + client: client, + }, nil } func (e Encryptor) Encrypt(input string, headers map[string]string) (string, error) { @@ -111,8 +120,6 @@ func (e Encryptor) Decrypt(input string, headers map[string]string) (string, err return "", err } - fmt.Println(string(bytes)) - decryptionSecret := DecryptionResponse{} err = json.Unmarshal(bytes, &decryptionSecret) if err != nil { From 25627cce4c9a71cc339928bd814bc9c867f89e24 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Mon, 18 Nov 2024 09:32:00 -0800 Subject: [PATCH 40/51] fix bug --- internal/manager/provider_setting.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/manager/provider_setting.go b/internal/manager/provider_setting.go index 8ed9bca..d02e699 100644 --- a/internal/manager/provider_setting.go +++ b/internal/manager/provider_setting.go @@ -203,7 +203,7 @@ func (m *ProviderSettingsManager) UpdateSetting(id string, setting *provider.Upd } if m.Encryptor.Enabled() { - params, err := m.EncryptParams(existing.UpdatedAt, existing.Provider, setting.Setting) + params, err := m.EncryptParams(setting.UpdatedAt, existing.Provider, setting.Setting) if err != nil { return nil, err } From 89021844048edc16d670b516cefcc885f3c326f1 Mon Sep 17 00:00:00 2001 From: Andrew Rothstein Date: Tue, 5 Nov 2024 02:18:28 +0000 Subject: [PATCH 41/51] first swing. broken. --- kubernetes/helm-charts/bricksllm/.gitignore | 1 + kubernetes/helm-charts/bricksllm/.helmignore | 23 ++++ kubernetes/helm-charts/bricksllm/Chart.lock | 9 ++ kubernetes/helm-charts/bricksllm/Chart.yaml | 21 ++++ .../helm-charts/bricksllm/templates/NOTES.txt | 0 .../bricksllm/templates/_helpers.tpl | 62 ++++++++++ .../bricksllm/templates/deployment.yaml | 77 ++++++++++++ .../helm-charts/bricksllm/templates/hpa.yaml | 32 +++++ .../bricksllm/templates/ingress.yaml | 43 +++++++ .../bricksllm/templates/service.yaml | 21 ++++ .../bricksllm/templates/serviceaccount.yaml | 13 ++ .../templates/tests/test-connection.yaml | 19 +++ kubernetes/helm-charts/bricksllm/values.yaml | 116 ++++++++++++++++++ 13 files changed, 437 insertions(+) create mode 100644 kubernetes/helm-charts/bricksllm/.gitignore create mode 100644 kubernetes/helm-charts/bricksllm/.helmignore create mode 100644 kubernetes/helm-charts/bricksllm/Chart.lock create mode 100644 kubernetes/helm-charts/bricksllm/Chart.yaml create mode 100644 kubernetes/helm-charts/bricksllm/templates/NOTES.txt create mode 100644 kubernetes/helm-charts/bricksllm/templates/_helpers.tpl create mode 100644 kubernetes/helm-charts/bricksllm/templates/deployment.yaml create mode 100644 kubernetes/helm-charts/bricksllm/templates/hpa.yaml create mode 100644 kubernetes/helm-charts/bricksllm/templates/ingress.yaml create mode 100644 kubernetes/helm-charts/bricksllm/templates/service.yaml create mode 100644 kubernetes/helm-charts/bricksllm/templates/serviceaccount.yaml create mode 100644 kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml create mode 100644 kubernetes/helm-charts/bricksllm/values.yaml diff --git a/kubernetes/helm-charts/bricksllm/.gitignore b/kubernetes/helm-charts/bricksllm/.gitignore new file mode 100644 index 0000000..948259a --- /dev/null +++ b/kubernetes/helm-charts/bricksllm/.gitignore @@ -0,0 +1 @@ +charts/*.tgz diff --git a/kubernetes/helm-charts/bricksllm/.helmignore b/kubernetes/helm-charts/bricksllm/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/kubernetes/helm-charts/bricksllm/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/kubernetes/helm-charts/bricksllm/Chart.lock b/kubernetes/helm-charts/bricksllm/Chart.lock new file mode 100644 index 0000000..322694a --- /dev/null +++ b/kubernetes/helm-charts/bricksllm/Chart.lock @@ -0,0 +1,9 @@ +dependencies: +- name: redis + repository: oci://registry-1.docker.io/bitnamicharts + version: 20.2.1 +- name: postgresql + repository: oci://registry-1.docker.io/bitnamicharts + version: 16.1.1 +digest: sha256:d380aeee84575489c7b48727ff37b9e47747e8c7e855655fc815455243421660 +generated: "2024-11-04T22:16:50.627919824Z" diff --git a/kubernetes/helm-charts/bricksllm/Chart.yaml b/kubernetes/helm-charts/bricksllm/Chart.yaml new file mode 100644 index 0000000..7b918a3 --- /dev/null +++ b/kubernetes/helm-charts/bricksllm/Chart.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: v2 +name: bricksllm +description: A Helm chart for deploying BricksLLM and its dependencies + +type: application + +version: 0.1.0 + +# bricksllm version +appVersion: "1.37.0" + +dependencies: + - condition: redis.enabled + name: redis + repository: oci://registry-1.docker.io/bitnamicharts + version: ~20 + - conditions: postgresql.enabled + name: postgresql + repository: oci://registry-1.docker.io/bitnamicharts + version: ~16 diff --git a/kubernetes/helm-charts/bricksllm/templates/NOTES.txt b/kubernetes/helm-charts/bricksllm/templates/NOTES.txt new file mode 100644 index 0000000..e69de29 diff --git a/kubernetes/helm-charts/bricksllm/templates/_helpers.tpl b/kubernetes/helm-charts/bricksllm/templates/_helpers.tpl new file mode 100644 index 0000000..7a986ec --- /dev/null +++ b/kubernetes/helm-charts/bricksllm/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "bricksllm.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "bricksllm.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "bricksllm.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "bricksllm.labels" -}} +helm.sh/chart: {{ include "bricksllm.chart" . }} +{{ include "bricksllm.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "bricksllm.selectorLabels" -}} +app.kubernetes.io/name: {{ include "bricksllm.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "bricksllm.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "bricksllm.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/kubernetes/helm-charts/bricksllm/templates/deployment.yaml b/kubernetes/helm-charts/bricksllm/templates/deployment.yaml new file mode 100644 index 0000000..4c2f7c6 --- /dev/null +++ b/kubernetes/helm-charts/bricksllm/templates/deployment.yaml @@ -0,0 +1,77 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "bricksllm.fullname" . }} + labels: + {{- include "bricksllm.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "bricksllm.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "bricksllm.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "bricksllm.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: control + containerPort: {{ .Values.services.control.port }} + protocol: TCP + - name: data + containerPort: {{ .Values.services.data.port }} + protocol: TCP + {{- with .Values.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.readinesProbe }} + readinessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/kubernetes/helm-charts/bricksllm/templates/hpa.yaml b/kubernetes/helm-charts/bricksllm/templates/hpa.yaml new file mode 100644 index 0000000..bd8bff1 --- /dev/null +++ b/kubernetes/helm-charts/bricksllm/templates/hpa.yaml @@ -0,0 +1,32 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "bricksllm.fullname" . }} + labels: + {{- include "bricksllm.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "bricksllm.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/kubernetes/helm-charts/bricksllm/templates/ingress.yaml b/kubernetes/helm-charts/bricksllm/templates/ingress.yaml new file mode 100644 index 0000000..672c0e3 --- /dev/null +++ b/kubernetes/helm-charts/bricksllm/templates/ingress.yaml @@ -0,0 +1,43 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "bricksllm.fullname" . }} + labels: + {{- include "bricksllm.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.ingress.className }} + ingressClassName: {{ . }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- with .pathType }} + pathType: {{ . }} + {{- end }} + backend: + service: + name: {{ include "bricksllm.fullname" $ }} + port: + number: {{ $.Values.service.port }} + {{- end }} + {{- end }} +{{- end }} diff --git a/kubernetes/helm-charts/bricksllm/templates/service.yaml b/kubernetes/helm-charts/bricksllm/templates/service.yaml new file mode 100644 index 0000000..94606e3 --- /dev/null +++ b/kubernetes/helm-charts/bricksllm/templates/service.yaml @@ -0,0 +1,21 @@ +{{- $fullName := include "bricksllm.fullname" . }} +{{- $labels := include "bricksllm.labels" . }} +{{- $selectorLabels := include "bricksllm.selectorLabels" . }} +{{- range $name, $v := .Values.services -}} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $fullName }}-{{ $name }} + labels: + {{- $labels | nindent 4 }} +spec: + type: {{ $v.type }} + ports: + - port: {{ $v.port }} + targetPort: http + protocol: TCP + name: {{ $name | quote }} + selector: + {{- $selectorLabels | nindent 4 }} +{{- end }} diff --git a/kubernetes/helm-charts/bricksllm/templates/serviceaccount.yaml b/kubernetes/helm-charts/bricksllm/templates/serviceaccount.yaml new file mode 100644 index 0000000..453e1e0 --- /dev/null +++ b/kubernetes/helm-charts/bricksllm/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "bricksllm.serviceAccountName" . }} + labels: + {{- include "bricksllm.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml b/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml new file mode 100644 index 0000000..ab6f4e2 --- /dev/null +++ b/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "bricksllm.fullname" . }}-test-connection" + labels: + {{- include "bricksllm.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "bricksllm.fullname" . }}-control:{{ .Values.services.control.port }}'] + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "bricksllm.fullname" . }}-data:{{ .Values.services.data.port }}'] + restartPolicy: Never diff --git a/kubernetes/helm-charts/bricksllm/values.yaml b/kubernetes/helm-charts/bricksllm/values.yaml new file mode 100644 index 0000000..a4a8289 --- /dev/null +++ b/kubernetes/helm-charts/bricksllm/values.yaml @@ -0,0 +1,116 @@ +--- +# replica count for the bricksllm Deployment +replicaCount: 1 + +# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ +image: + repository: luyuanxin1995/bricksllm + # This sets the pull policy for images. + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + +# for private registries hosting the imagery +imagePullSecrets: [] + +# This is to override the chart name. +nameOverride: "" +fullnameOverride: "" + +# establish a dedicated service account +serviceAccount: + # Specifies whether a service account should be created + create: true + # Automatically mount a ServiceAccount's API credentials? + automount: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +# additional pod annotations +podAnnotations: {} + +# additional pod labels +podLabels: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +services: + control: + type: ClusterIP + port: 8001 + data: + type: ClusterIP + port: 8002 + +# Configuring Ingress +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +#resources: +# limits: +# cpu: 100m +# memory: 128Mi +# requests: +# cpu: 100m +# memory: 128Mi + +#livenessProbe: +# httpGet: +# path: / +# port: http + +#readinessProbe: +# httpGet: +# path: / +# port: http + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# Additional volumes for the Deployments +#volumes: +# - name: foo +# secret: +# secretName: mysecret +# optional: false + +# Additional volumeMounts for the deployments +#volumeMounts: +# - name: foo +# mountPath: "/etc/foo" +# readOnly: true + +nodeSelector: {} + +tolerations: [] + +affinity: {} From ae55b11097aed9a9d81d7c31a348643e99f6813e Mon Sep 17 00:00:00 2001 From: Andrew Rothstein Date: Wed, 6 Nov 2024 03:15:39 +0000 Subject: [PATCH 42/51] gussy up the services with named ports and named ingresses --- kubernetes/helm-charts/Taskfile.yml | 12 ++++ .../bricksllm/templates/deployment.yaml | 30 ++++++-- .../bricksllm/templates/ingress.yaml | 29 +++++--- .../templates/tests/test-connection.yaml | 16 +++-- kubernetes/helm-charts/bricksllm/values.yaml | 72 ++++++++++++------- 5 files changed, 109 insertions(+), 50 deletions(-) create mode 100644 kubernetes/helm-charts/Taskfile.yml diff --git a/kubernetes/helm-charts/Taskfile.yml b/kubernetes/helm-charts/Taskfile.yml new file mode 100644 index 0000000..d50108a --- /dev/null +++ b/kubernetes/helm-charts/Taskfile.yml @@ -0,0 +1,12 @@ +--- +version: '3' +tasks: + default: + cmds: + - | + helm upgrade \ + --create-namespace \ + -n bricksllm \ + --install \ + bricksllm \ + ./bricksllm diff --git a/kubernetes/helm-charts/bricksllm/templates/deployment.yaml b/kubernetes/helm-charts/bricksllm/templates/deployment.yaml index 4c2f7c6..8a97665 100644 --- a/kubernetes/helm-charts/bricksllm/templates/deployment.yaml +++ b/kubernetes/helm-charts/bricksllm/templates/deployment.yaml @@ -1,7 +1,9 @@ +{{ $fullname := include "bricksllm.fullname" . -}} +--- apiVersion: apps/v1 kind: Deployment metadata: - name: {{ include "bricksllm.fullname" . }} + name: {{ $fullname }} labels: {{- include "bricksllm.labels" . | nindent 4 }} spec: @@ -35,14 +37,30 @@ spec: securityContext: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + env: + - name: POSTGRESQL_HOSTS + value: '{{ $fullname }}-postgresql' + - name: POSTGRESQL_USERNAME + value: postgres + - name: POSTGRESQL_PASSWORD + valueFrom: + secretKeyRef: + name: '{{ $fullname }}-postgresql' + key: postgres-password + - name: REDIS_HOSTS + value: '{{ $fullname }}-redis-master' + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: '{{ $fullname }}-redis' + key: redis-password imagePullPolicy: {{ .Values.image.pullPolicy }} ports: - - name: control - containerPort: {{ .Values.services.control.port }} - protocol: TCP - - name: data - containerPort: {{ .Values.services.data.port }} + {{- range $name, $v := .Values.services }} + - name: {{ $name }} + containerPort: {{ $v.port }} protocol: TCP + {{- end }} {{- with .Values.livenessProbe }} livenessProbe: {{- toYaml . | nindent 12 }} diff --git a/kubernetes/helm-charts/bricksllm/templates/ingress.yaml b/kubernetes/helm-charts/bricksllm/templates/ingress.yaml index 672c0e3..c3039f7 100644 --- a/kubernetes/helm-charts/bricksllm/templates/ingress.yaml +++ b/kubernetes/helm-charts/bricksllm/templates/ingress.yaml @@ -1,30 +1,36 @@ -{{- if .Values.ingress.enabled -}} +{{- $fullname := include "bricksllm.fullname" . }} +{{- $labels := include "bricksllm.labels" . }} +{{- range $name, $v := .Values.ingresses }} +{{- if $v.enabled }} +--- apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - name: {{ include "bricksllm.fullname" . }} + name: {{ $fullname }}-{{ $name }} labels: - {{- include "bricksllm.labels" . | nindent 4 }} - {{- with .Values.ingress.annotations }} + {{- $labels | nindent 4 }} + {{- with $v.annotations }} annotations: {{- toYaml . | nindent 4 }} {{- end }} spec: - {{- with .Values.ingress.className }} + {{- with $v.className }} ingressClassName: {{ . }} {{- end }} - {{- if .Values.ingress.tls }} + {{- if $v.tls }} tls: - {{- range .Values.ingress.tls }} + {{- range $v.tls }} - hosts: {{- range .hosts }} - {{ . | quote }} {{- end }} - secretName: {{ .secretName }} + {{- with .secretName }} + secretName: {{ . }} + {{- end }} {{- end }} {{- end }} rules: - {{- range .Values.ingress.hosts }} + {{- range $v.hosts }} - host: {{ .host | quote }} http: paths: @@ -35,9 +41,10 @@ spec: {{- end }} backend: service: - name: {{ include "bricksllm.fullname" $ }} + name: {{ $fullname }}-{{ $name }} port: - number: {{ $.Values.service.port }} + number: {{ get (get $.Values.services $name) "port" }} {{- end }} {{- end }} {{- end }} +{{- end }} diff --git a/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml b/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml index ab6f4e2..db01831 100644 --- a/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml +++ b/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml @@ -1,19 +1,21 @@ +{{ $fullname := include "bricksllm.fullname" . -}} +--- apiVersion: v1 kind: Pod metadata: - name: "{{ include "bricksllm.fullname" . }}-test-connection" + name: "{{ $fullname }}-test-connection" labels: {{- include "bricksllm.labels" . | nindent 4 }} annotations: "helm.sh/hook": test spec: containers: +{{- range $name, $v := .Values.services }} - name: wget image: busybox - command: ['wget'] - args: ['{{ include "bricksllm.fullname" . }}-control:{{ .Values.services.control.port }}'] - - name: wget - image: busybox - command: ['wget'] - args: ['{{ include "bricksllm.fullname" . }}-data:{{ .Values.services.data.port }}'] + command: + - wget + args: + - '{{ $fullname }}-{{ $name }}:{{ $v.port }}' +{{- end }} restartPolicy: Never diff --git a/kubernetes/helm-charts/bricksllm/values.yaml b/kubernetes/helm-charts/bricksllm/values.yaml index a4a8289..c080514 100644 --- a/kubernetes/helm-charts/bricksllm/values.yaml +++ b/kubernetes/helm-charts/bricksllm/values.yaml @@ -47,29 +47,49 @@ securityContext: {} # runAsUser: 1000 services: - control: + admin: type: ClusterIP port: 8001 - data: + proxy: type: ClusterIP port: 8002 -# Configuring Ingress -ingress: - enabled: false - className: "" - annotations: {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - hosts: - - host: chart-example.local - paths: - - path: / - pathType: ImplementationSpecific - tls: [] - # - secretName: chart-example-tls - # hosts: - # - chart-example.local +# Configuring Ingresses +ingresses: + admin: + enabled: true + className: tailscale + #annotations: + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: bricksllm-admin.elephant-frog.ts.net + paths: + - path: / + pathType: ImplementationSpecific + tls: + - hosts: + - bricksllm-admin.elephant-frog.ts.net + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + proxy: + enabled: true + className: tailscale + #annotations: + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: bricksllm-proxy.elephant-frog.ts.net + paths: + - path: / + pathType: ImplementationSpecific + tls: + - hosts: + - bricksllm-proxy.elephant-frog.ts.net + # - secretName: chart-example-tls + # hosts: + # - chart-example.local #resources: # limits: @@ -79,15 +99,15 @@ ingress: # cpu: 100m # memory: 128Mi -#livenessProbe: -# httpGet: -# path: / -# port: http +livenessProbe: + httpGet: + path: /api/health + port: proxy -#readinessProbe: -# httpGet: -# path: / -# port: http +readinessProbe: + httpGet: + path: /api/health + port: proxy autoscaling: enabled: false From 6807c1bdddb3c0ec4c264c54965781bb0a12796c Mon Sep 17 00:00:00 2001 From: Andrew Rothstein Date: Wed, 6 Nov 2024 03:51:01 +0000 Subject: [PATCH 43/51] thats one service with multiple ports tho multiple ingresses --- kubernetes/helm-charts/Taskfile.yml | 4 ++++ .../bricksllm/templates/deployment.yaml | 6 +++--- .../helm-charts/bricksllm/templates/ingress.yaml | 4 ++-- .../helm-charts/bricksllm/templates/service.yaml | 16 ++++++++-------- .../templates/tests/test-connection.yaml | 4 ++-- kubernetes/helm-charts/bricksllm/values.yaml | 10 ++++------ 6 files changed, 23 insertions(+), 21 deletions(-) diff --git a/kubernetes/helm-charts/Taskfile.yml b/kubernetes/helm-charts/Taskfile.yml index d50108a..33821a8 100644 --- a/kubernetes/helm-charts/Taskfile.yml +++ b/kubernetes/helm-charts/Taskfile.yml @@ -10,3 +10,7 @@ tasks: --install \ bricksllm \ ./bricksllm + delete: + cmds: + - helm delete -n bricksllm bricksllm + diff --git a/kubernetes/helm-charts/bricksllm/templates/deployment.yaml b/kubernetes/helm-charts/bricksllm/templates/deployment.yaml index 8a97665..f015f69 100644 --- a/kubernetes/helm-charts/bricksllm/templates/deployment.yaml +++ b/kubernetes/helm-charts/bricksllm/templates/deployment.yaml @@ -56,9 +56,9 @@ spec: key: redis-password imagePullPolicy: {{ .Values.image.pullPolicy }} ports: - {{- range $name, $v := .Values.services }} - - name: {{ $name }} - containerPort: {{ $v.port }} + {{- range $n, $p := .Values.services.ports }} + - name: {{ $n }} + containerPort: {{ $p }} protocol: TCP {{- end }} {{- with .Values.livenessProbe }} diff --git a/kubernetes/helm-charts/bricksllm/templates/ingress.yaml b/kubernetes/helm-charts/bricksllm/templates/ingress.yaml index c3039f7..fcbb9dc 100644 --- a/kubernetes/helm-charts/bricksllm/templates/ingress.yaml +++ b/kubernetes/helm-charts/bricksllm/templates/ingress.yaml @@ -41,9 +41,9 @@ spec: {{- end }} backend: service: - name: {{ $fullname }}-{{ $name }} + name: {{ $fullname }} port: - number: {{ get (get $.Values.services $name) "port" }} + name: {{ $name | quote }} {{- end }} {{- end }} {{- end }} diff --git a/kubernetes/helm-charts/bricksllm/templates/service.yaml b/kubernetes/helm-charts/bricksllm/templates/service.yaml index 94606e3..f8441d1 100644 --- a/kubernetes/helm-charts/bricksllm/templates/service.yaml +++ b/kubernetes/helm-charts/bricksllm/templates/service.yaml @@ -1,21 +1,21 @@ -{{- $fullName := include "bricksllm.fullname" . }} +{{- $fullname := include "bricksllm.fullname" . }} {{- $labels := include "bricksllm.labels" . }} {{- $selectorLabels := include "bricksllm.selectorLabels" . }} -{{- range $name, $v := .Values.services -}} --- apiVersion: v1 kind: Service metadata: - name: {{ $fullName }}-{{ $name }} + name: {{ $fullname }} labels: {{- $labels | nindent 4 }} spec: - type: {{ $v.type }} + type: {{ .Values.services.type }} ports: - - port: {{ $v.port }} - targetPort: http +{{- range $n, $p := .Values.services.ports }} + - port: {{ $p }} + targetPort: {{ $p }} protocol: TCP - name: {{ $name | quote }} + name: {{ $n | quote }} +{{- end }} selector: {{- $selectorLabels | nindent 4 }} -{{- end }} diff --git a/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml b/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml index db01831..32b4f42 100644 --- a/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml +++ b/kubernetes/helm-charts/bricksllm/templates/tests/test-connection.yaml @@ -10,12 +10,12 @@ metadata: "helm.sh/hook": test spec: containers: -{{- range $name, $v := .Values.services }} +{{- range $n, $p := .Values.services.ports }} - name: wget image: busybox command: - wget args: - - '{{ $fullname }}-{{ $name }}:{{ $v.port }}' + - '{{ $fullname }}:{{ $p }}' {{- end }} restartPolicy: Never diff --git a/kubernetes/helm-charts/bricksllm/values.yaml b/kubernetes/helm-charts/bricksllm/values.yaml index c080514..eca72c4 100644 --- a/kubernetes/helm-charts/bricksllm/values.yaml +++ b/kubernetes/helm-charts/bricksllm/values.yaml @@ -47,12 +47,10 @@ securityContext: {} # runAsUser: 1000 services: - admin: - type: ClusterIP - port: 8001 - proxy: - type: ClusterIP - port: 8002 + type: ClusterIP + ports: + admin: 8001 + proxy: 8002 # Configuring Ingresses ingresses: From 0bbcd747603f46862bf18e82a2818aa61b1331eb Mon Sep 17 00:00:00 2001 From: Andrew Rothstein Date: Wed, 27 Nov 2024 14:45:17 +0000 Subject: [PATCH 44/51] default values --- kubernetes/helm-charts/bricksllm/values.yaml | 46 +++++++++----------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/kubernetes/helm-charts/bricksllm/values.yaml b/kubernetes/helm-charts/bricksllm/values.yaml index eca72c4..72bc6ea 100644 --- a/kubernetes/helm-charts/bricksllm/values.yaml +++ b/kubernetes/helm-charts/bricksllm/values.yaml @@ -55,39 +55,33 @@ services: # Configuring Ingresses ingresses: admin: - enabled: true - className: tailscale + enabled: false + #className: "tailscale" #annotations: # kubernetes.io/ingress.class: nginx # kubernetes.io/tls-acme: "true" - hosts: - - host: bricksllm-admin.elephant-frog.ts.net - paths: - - path: / - pathType: ImplementationSpecific - tls: - - hosts: - - bricksllm-admin.elephant-frog.ts.net - # - secretName: chart-example-tls - # hosts: - # - chart-example.local + #hosts: + # - host: bricksllm-admin.elephant-frog.ts.net + # paths: + # - path: / + # pathType: ImplementationSpecific + #tls: + # - hosts: + # - bricksllm-admin.elephant-frog.ts.net proxy: - enabled: true - className: tailscale + enabled: false + #className: tailscale #annotations: # kubernetes.io/ingress.class: nginx # kubernetes.io/tls-acme: "true" - hosts: - - host: bricksllm-proxy.elephant-frog.ts.net - paths: - - path: / - pathType: ImplementationSpecific - tls: - - hosts: - - bricksllm-proxy.elephant-frog.ts.net - # - secretName: chart-example-tls - # hosts: - # - chart-example.local + #hosts: + # - host: bricksllm-proxy.elephant-frog.ts.net + # paths: + # - path: / + # pathType: ImplementationSpecific + #tls: + # - hosts: + # - bricksllm-proxy.elephant-frog.ts.net #resources: # limits: From d88f94b5549700b994cd9146cf0f60159b6ec596 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Thu, 26 Dec 2024 22:23:12 -0800 Subject: [PATCH 45/51] add support for amazon bedrock model --- internal/provider/anthropic/cost.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/internal/provider/anthropic/cost.go b/internal/provider/anthropic/cost.go index 4ec546b..6785fea 100644 --- a/internal/provider/anthropic/cost.go +++ b/internal/provider/anthropic/cost.go @@ -94,13 +94,28 @@ func selectModel(model string) string { return "" } +func convertAmazonModelToAnthropicModel(model string) string { + parts := strings.Split(model, ".") + if len(parts) < 3 { + return model + } + + return selectModel(parts[2]) +} + func (ce *CostEstimator) EstimateCompletionCost(model string, tks int) (float64, error) { costMap, ok := ce.tokenCostMap["completion"] if !ok { return 0, errors.New("prompt token cost is not provided") } - selected := selectModel(model) + selected := "" + if strings.HasPrefix(model, "us") { + selected = convertAmazonModelToAnthropicModel(model) + } else { + selected = selectModel(model) + } + cost, ok := costMap[selected] if !ok { return 0, errors.New("model is not present in the cost map provided") From 621578881bb34c0d2ebd6deda49f8a32a1fe8aec Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Fri, 27 Dec 2024 08:04:16 -0800 Subject: [PATCH 46/51] add cost tracking for o1 --- internal/provider/openai/cost.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go index dfa9040..641a577 100644 --- a/internal/provider/openai/cost.go +++ b/internal/provider/openai/cost.go @@ -34,6 +34,8 @@ func parseFinetuneModel(model string) string { var OpenAiPerThousandTokenCost = map[string]map[string]float64{ "prompt": { + "o1": 0.015, + "o1-2024-12-17": 0.015, "o1-preview": 0.015, "o1-preview-2024-09-12": 0.015, "gpt-4o": 0.0025, @@ -98,6 +100,8 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{ "tts-1-hd": 0.03, }, "completion": { + "o1": 0.06, + "o1-2024-12-17": 0.06, "o1-preview": 0.06, "o1-preview-2024-09-12": 0.06, "gpt-3.5-turbo-1106": 0.002, From 2a8a1dfd84386f6f43577051aa04198a1c047fdc Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Wed, 1 Jan 2025 19:52:05 -0800 Subject: [PATCH 47/51] add pushing to aws --- .github/workflows/release.yml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5884f46..46ef2b7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -115,3 +115,34 @@ jobs: platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.meta-datadog.outputs.tags }} + + aws: + runs-on: ubuntu-latest + steps: + - name: Install Go + uses: actions/setup-go@v3 + with: + go-version: 1.21.x + check-latest: true + + - name: Check Out Repo + uses: actions/checkout@v3 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: ${{ secrets.AWS_DEFAULT_REGION }} + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Build and push + uses: mr-smithers-excellent/docker-build-push@v6 + with: + image: bricksllm + tags: latest + dockerfile: Dockerfile.aws.datadog + registry: ${{ steps.login-ecr.outputs.registry }} \ No newline at end of file From 34ebf3401808fe6580fbc843a07999076918d316 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Wed, 1 Jan 2025 20:03:37 -0800 Subject: [PATCH 48/51] update workflow --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 46ef2b7..f7424c7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -144,5 +144,5 @@ jobs: with: image: bricksllm tags: latest - dockerfile: Dockerfile.aws.datadog + dockerfile: Dockerfile.datadog registry: ${{ steps.login-ecr.outputs.registry }} \ No newline at end of file From 5fb4f931c9a91a0f43fe3c2dd5ecdba11db8b5ee Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sun, 5 Jan 2025 15:16:10 -0800 Subject: [PATCH 49/51] fix bug --- internal/provider/anthropic/cost.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/provider/anthropic/cost.go b/internal/provider/anthropic/cost.go index 6785fea..36680ca 100644 --- a/internal/provider/anthropic/cost.go +++ b/internal/provider/anthropic/cost.go @@ -64,7 +64,13 @@ func (ce *CostEstimator) EstimatePromptCost(model string, tks int) (float64, err } - selected := selectModel(model) + selected := "" + if strings.HasPrefix(model, "us") { + selected = convertAmazonModelToAnthropicModel(model) + } else { + selected = selectModel(model) + } + cost, ok := costMap[selected] if !ok { return 0, fmt.Errorf("%s is not present in the cost map provided", model) From c7d80a62c2de95733c1e6c95a4eaedc50afaad75 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Sun, 5 Jan 2025 15:47:30 -0800 Subject: [PATCH 50/51] update encryptor initialization logic --- internal/encryptor/encryptor.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/internal/encryptor/encryptor.go b/internal/encryptor/encryptor.go index 7f53d8b..c576b2b 100644 --- a/internal/encryptor/encryptor.go +++ b/internal/encryptor/encryptor.go @@ -35,6 +35,16 @@ func NewEncryptor(decryptionURL string, encryptionURL string, enabled bool, time ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() + if len(audience) == 0 { + return Encryptor{ + decryptionURL: decryptionURL, + encryptionURL: encryptionURL, + enabled: enabled, + timeout: timeout, + client: &http.Client{}, + }, nil + } + client, err := idtoken.NewClient(ctx, audience) if err != nil { return Encryptor{}, err From 80bb56b8ca23f8fd8d820c84298b0289c2df766b Mon Sep 17 00:00:00 2001 From: Sergei Bronnikov <48258384+sergei-bronnikov@users.noreply.github.com> Date: Thu, 6 Mar 2025 11:10:24 +0000 Subject: [PATCH 51/51] wip (#8) --- cmd/bricksllm/main.go | 16 +++- internal/authenticator/authenticator.go | 39 ++++++++- internal/errors/expiration_err.go | 5 +- internal/event/key_reporting.go | 21 +++-- internal/key/key.go | 11 +++ internal/manager/key.go | 34 +++++--- internal/manager/provider_setting.go | 71 +++++++++++++++-- internal/manager/reporting.go | 10 +-- internal/message/consumer.go | 1 + internal/message/handler.go | 6 ++ internal/provider/xcustom/xcustom.go | 101 ++++++++++++++++++++++++ internal/recorder/recorder.go | 32 +++++--- internal/server/web/proxy/middleware.go | 5 +- internal/server/web/proxy/proxy.go | 3 + internal/server/web/proxy/x_custom.go | 74 +++++++++++++++++ internal/storage/postgresql/event.go | 40 ++++++---- internal/storage/postgresql/key.go | 26 ++++-- internal/validator/validator.go | 40 ++++++++-- 18 files changed, 454 insertions(+), 81 deletions(-) create mode 100644 internal/provider/xcustom/xcustom.go create mode 100644 internal/server/web/proxy/x_custom.go diff --git a/cmd/bricksllm/main.go b/cmd/bricksllm/main.go index 388e4e9..0bdd4b3 100644 --- a/cmd/bricksllm/main.go +++ b/cmd/bricksllm/main.go @@ -272,6 +272,14 @@ func main() { log.Sugar().Fatalf("error connecting to keys redis storage: %v", err) } + requestsLimitRedisStorage := redis.NewClient(defaultRedisOption(cfg, 11)) + + ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + if err := requestsLimitRedisStorage.Ping(ctx).Err(); err != nil { + log.Sugar().Fatalf("error connecting to requests limit redis storage: %v", err) + } + rateLimitCache := redisStorage.NewCache(rateLimitRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout) costLimitCache := redisStorage.NewCache(costLimitRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout) costStorage := redisStorage.NewStore(costRedisStorage, cfg.RedisWriteTimeout, cfg.RedisReadTimeout) @@ -285,15 +293,15 @@ func main() { psCache := redisStorage.NewProviderSettingsCache(providerSettingsRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout) keysCache := redisStorage.NewKeysCache(keysRedisCache, cfg.RedisWriteTimeout, cfg.RedisReadTimeout) + requestsLimitStorage := redisStorage.NewStore(requestsLimitRedisStorage, cfg.RedisWriteTimeout, cfg.RedisReadTimeout) encryptor, err := encryptor.NewEncryptor(cfg.DecryptionEndpoint, cfg.EncryptionEndpoint, cfg.EnableEncrytion, cfg.EncryptionTimeout, cfg.Audience) if cfg.EnableEncrytion && err != nil { log.Sugar().Fatalf("error creating encryption client: %v", err) } - v := validator.NewValidator(costLimitCache, rateLimitCache, costStorage) - + v := validator.NewValidator(costLimitCache, rateLimitCache, costStorage, requestsLimitStorage) - m := manager.NewManager(store, costLimitCache, rateLimitCache, accessCache, keysCache) + m := manager.NewManager(store, costLimitCache, rateLimitCache, accessCache, keysCache, requestsLimitStorage) krm := manager.NewReportingManager(costStorage, store, store, v) psm := manager.NewProviderSettingsManager(store, psCache, encryptor) cpm := manager.NewCustomProvidersManager(store, cpMemStore) @@ -330,7 +338,7 @@ func main() { uv := validator.NewUserValidator(userCostLimitCache, userRateLimitCache, userCostStorage) - rec := recorder.NewRecorder(costStorage, userCostStorage, costLimitCache, userCostLimitCache, ce, store) + rec := recorder.NewRecorder(costStorage, userCostStorage, costLimitCache, userCostLimitCache, ce, store, requestsLimitStorage) rlm := manager.NewRateLimitManager(rateLimitCache, userRateLimitCache) a := auth.NewAuthenticator(psm, m, rm, store, encryptor) diff --git a/internal/authenticator/authenticator.go b/internal/authenticator/authenticator.go index bec413c..638a9ee 100644 --- a/internal/authenticator/authenticator.go +++ b/internal/authenticator/authenticator.go @@ -3,6 +3,7 @@ package auth import ( "errors" "fmt" + "github.com/bricks-cloud/bricksllm/internal/provider/xcustom" "math/rand" "net/http" "strconv" @@ -204,8 +205,20 @@ func anonymize(input string) string { return string(input[0:5]) + "**********************************************" } -func (a *Authenticator) AuthenticateHttpRequest(req *http.Request) (*key.ResponseKey, []*provider.Setting, error) { - raw, err := getApiKey(req) +func (a *Authenticator) AuthenticateHttpRequest(req *http.Request, xCustomProviderId string) (*key.ResponseKey, []*provider.Setting, error) { + var raw string + var err error + var settings []*provider.Setting + if xcustom.IsXCustomRequest(req) { + providerSetting, er := a.psm.GetSettingViaCache(xCustomProviderId) + if er != nil { + return nil, nil, er + } + settings = []*provider.Setting{providerSetting} + raw, err = xcustom.ExtractApiKey(req, providerSetting) + } else { + raw, err = getApiKey(req) + } if err != nil { return nil, nil, err } @@ -238,6 +251,28 @@ func (a *Authenticator) AuthenticateHttpRequest(req *http.Request) (*key.Respons return nil, nil, internal_errors.NewAuthError(fmt.Sprintf("key %s has been revoked", anonymize(raw))) } + if xcustom.IsXCustomRequest(req) { + pSetting := settings[0] + authString := strings.Replace( + pSetting.GetParam(xcustom.XCustomSettingFields.AuthMask), + "{{apikey}}", + pSetting.GetParam(xcustom.XCustomSettingFields.ApiKey), -1, + ) + location := xcustom.GetAuthLocation(pSetting.GetParam(xcustom.XCustomSettingFields.AuthLocation)) + target := pSetting.GetParam(xcustom.XCustomSettingFields.AuthTarget) + switch location { + case xcustom.AuthLocations.Query: + params := req.URL.Query() + params.Set(target, authString) + req.URL.RawQuery = params.Encode() + case xcustom.AuthLocations.Header: + req.Header.Set(target, authString) + default: + return nil, nil, errors.New("invalid xCustomAuth location") + } + return key, settings, nil + } + if strings.HasPrefix(req.URL.Path, "/api/routes") { err = a.canKeyAccessCustomRoute(req.URL.Path, key.KeyId) if err != nil { diff --git a/internal/errors/expiration_err.go b/internal/errors/expiration_err.go index cce8380..59aec34 100644 --- a/internal/errors/expiration_err.go +++ b/internal/errors/expiration_err.go @@ -1,8 +1,9 @@ package errors const ( - TtlExpiration string = "ttl" - CostLimitExpiration string = "cost-limit" + TtlExpiration string = "ttl" + CostLimitExpiration string = "cost-limit" + RequestsLimitExpiration string = "requests-limit" ) type ExpirationError struct { diff --git a/internal/event/key_reporting.go b/internal/event/key_reporting.go index 9550861..6ebf3f4 100644 --- a/internal/event/key_reporting.go +++ b/internal/event/key_reporting.go @@ -29,11 +29,13 @@ type KeyRingReportingRequest struct { Limit int `json:"limit"` Offset int `json:"offset"` Revoked *bool `json:"revoked"` + TopBy string `json:"topBy"` } type KeyRingDataPoint struct { KeyRing string `json:"keyRing"` CostInUsd float64 `json:"costInUsd"` + Requests int `json:"requests"` } type KeyRingReportingResponse struct { @@ -47,8 +49,13 @@ type SpentKeyReportingRequest struct { Offset int `json:"offset"` } +type SpentKey struct { + KeyRing string `json:"keyRing"` + LinkedKeyId string `json:"linkedKeyId"` +} + type SpentKeyReportingResponse struct { - KeyRings []string `json:"keyRings"` + Keys []SpentKey `json:"keys"` } type UsageReportingRequest struct { @@ -56,10 +63,14 @@ type UsageReportingRequest struct { } type UsageData struct { - LastDayUsage float64 `json:"lastDayUsage"` - LastWeekUsage float64 `json:"lastWeekUsage"` - LastMonthUsage float64 `json:"lastMonthUsage"` - TotalUsage float64 `json:"totalUsage"` + LastDayUsage float64 `json:"lastDayUsage"` + LastWeekUsage float64 `json:"lastWeekUsage"` + LastMonthUsage float64 `json:"lastMonthUsage"` + TotalUsage float64 `json:"totalUsage"` + LastDayUsageRequests int `json:"lastDayUsageRequests"` + LastWeekUsageRequests int `json:"lastWeekUsageRequests"` + LastMonthUsageRequests int `json:"lastMonthUsageRequests"` + TotalUsageRequests int `json:"totalUsageRequests"` } type UsageReportingResponse struct { diff --git a/internal/key/key.go b/internal/key/key.go index 59c7b18..94eb537 100644 --- a/internal/key/key.go +++ b/internal/key/key.go @@ -25,6 +25,7 @@ type UpdateKey struct { CostLimitInUsdUnit *TimeUnit `json:"costLimitInUsdUnit"` RateLimitOverTime *int `json:"rateLimitOverTime"` RateLimitUnit *TimeUnit `json:"rateLimitUnit"` + RequestsLimit *int `json:"requestsLimit"` AllowedPaths *[]PathConfig `json:"allowedPaths,omitempty"` ShouldLogRequest *bool `json:"shouldLogRequest"` ShouldLogResponse *bool `json:"shouldLogResponse"` @@ -51,6 +52,10 @@ func (uk *UpdateKey) Validate() error { invalid = append(invalid, "costLimitInUsd") } + if uk.RequestsLimit != nil && *uk.RequestsLimit < 0 { + invalid = append(invalid, "requestsLimit") + } + if uk.UpdatedAt <= 0 { invalid = append(invalid, "updatedAt") } @@ -173,6 +178,7 @@ type RequestKey struct { RotationEnabled bool `json:"rotationEnabled"` PolicyId string `json:"policyId"` IsKeyNotHashed bool `json:"isKeyNotHashed"` + RequestsLimit int `json:"requestsLimit"` } func (rk *RequestKey) Validate() error { @@ -237,6 +243,10 @@ func (rk *RequestKey) Validate() error { invalid = append(invalid, "rateLimitOverTime") } + if rk.RequestsLimit < 0 { + invalid = append(invalid, "requestsLimit") + } + if len(rk.Ttl) != 0 { _, err := time.ParseDuration(rk.Ttl) if err != nil { @@ -317,6 +327,7 @@ type ResponseKey struct { CostLimitInUsdUnit TimeUnit `json:"costLimitInUsdUnit"` RateLimitOverTime int `json:"rateLimitOverTime"` RateLimitUnit TimeUnit `json:"rateLimitUnit"` + RequestsLimit int `json:"requestsLimit"` Ttl string `json:"ttl"` KeyRing string `json:"keyRing"` SettingId string `json:"settingId"` diff --git a/internal/manager/key.go b/internal/manager/key.go index 3401db4..f4f5b2d 100644 --- a/internal/manager/key.go +++ b/internal/manager/key.go @@ -46,21 +46,27 @@ type keyCache interface { Get(keyId string) (*key.ResponseKey, error) } +type requestsLimitStorage interface { + DeleteCounter(keyId string) error +} + type Manager struct { - s Storage - clc costLimitCache - rlc rateLimitCache - ac accessCache - kc keyCache + s Storage + clc costLimitCache + rlc rateLimitCache + ac accessCache + kc keyCache + rqls requestsLimitStorage } -func NewManager(s Storage, clc costLimitCache, rlc rateLimitCache, ac accessCache, kc keyCache) *Manager { +func NewManager(s Storage, clc costLimitCache, rlc rateLimitCache, ac accessCache, kc keyCache, rqls requestsLimitStorage) *Manager { return &Manager{ - s: s, - clc: clc, - rlc: rlc, - ac: ac, - kc: kc, + s: s, + clc: clc, + rlc: rlc, + ac: ac, + kc: kc, + rqls: rqls, } } @@ -175,6 +181,12 @@ func (m *Manager) UpdateKey(id string, uk *key.UpdateKey) (*key.ResponseKey, err return nil, err } } + if uk.RequestsLimit != nil { + err := m.rqls.DeleteCounter(id) + if err != nil { + return nil, err + } + } if uk.PolicyId != nil { if len(*uk.PolicyId) != 0 { diff --git a/internal/manager/provider_setting.go b/internal/manager/provider_setting.go index d02e699..c67fc0a 100644 --- a/internal/manager/provider_setting.go +++ b/internal/manager/provider_setting.go @@ -3,6 +3,8 @@ package manager import ( "encoding/json" "fmt" + "github.com/bricks-cloud/bricksllm/internal/provider/xcustom" + "slices" "strconv" "strings" "time" @@ -39,6 +41,8 @@ type ProviderSettingsManager struct { Encryptor Encryptor } +var nativelySupportedProviders = []string{"openai", "anthropic", "azure", "vllm", "deepinfra", "bedrock", "xCustom"} + func NewProviderSettingsManager(s ProviderSettingsStorage, cache ProviderSettingsCache, encryptor Encryptor) *ProviderSettingsManager { return &ProviderSettingsManager{ Storage: s, @@ -48,7 +52,7 @@ func NewProviderSettingsManager(s ProviderSettingsStorage, cache ProviderSetting } func isProviderNativelySupported(provider string) bool { - return provider == "openai" || provider == "anthropic" || provider == "azure" || provider == "vllm" || provider == "deepinfra" || provider == "bedrock" + return slices.Contains(nativelySupportedProviders, provider) } func findMissingAuthParams(providerName string, params map[string]string) string { @@ -99,6 +103,25 @@ func findMissingAuthParams(providerName string, params map[string]string) string } } + if providerName == "xCustom" { + val := params["apikey"] + if len(val) == 0 { + missingFields = append(missingFields, "apikey") + } + val = params["endpoint"] + if len(val) == 0 { + missingFields = append(missingFields, "endpoint") + } + val = params["authLocation"] + if len(val) == 0 { + missingFields = append(missingFields, "authLocation") + } + val = params["authTemplate"] + if !strings.Contains(val, "{{apikey}}") { + missingFields = append(missingFields, "authTemplate") + } + } + return strings.Join(missingFields, ",") } @@ -160,6 +183,18 @@ func (m *ProviderSettingsManager) CreateSetting(setting *provider.Setting) (*pro setting.CreatedAt = time.Now().Unix() setting.UpdatedAt = time.Now().Unix() + if setting.Provider == "xCustom" { + advancedSetting, err := xcustom.AdvancedXCustomSetting(setting.Setting) + if err != nil { + return nil, err + } + merged := setting.Setting + for k, v := range advancedSetting { + merged[k] = v + } + setting.Setting = merged + } + if m.Encryptor.Enabled() { params, err := m.EncryptParams(setting.UpdatedAt, setting.Provider, setting.Setting) if err != nil { @@ -183,15 +218,10 @@ func (m *ProviderSettingsManager) UpdateSetting(id string, setting *provider.Upd } if len(setting.Setting) != 0 { - if err := m.validateSettings(existing.Provider, setting.Setting); err != nil { + merged, err := m.getMergedSettings(existing, setting.Setting) + if err != nil { return nil, err } - - merged := existing.Setting - for k, v := range setting.Setting { - merged[k] = v - } - setting.Setting = merged } @@ -214,6 +244,31 @@ func (m *ProviderSettingsManager) UpdateSetting(id string, setting *provider.Upd return m.Storage.UpdateProviderSetting(id, setting) } +func (m *ProviderSettingsManager) getMergedSettings(existing *provider.Setting, setting map[string]string) (map[string]string, error) { + merged := existing.Setting + apikey, ok := setting["apikey"] + if ok && apikey == "revoked" { + merged["apikey"] = apikey + return merged, nil + } + for k, v := range setting { + merged[k] = v + } + if existing.Provider == "xCustom" { + advancedSetting, err := xcustom.AdvancedXCustomSetting(setting) + if err != nil { + return nil, err + } + for k, v := range advancedSetting { + merged[k] = v + } + } + if err := m.validateSettings(existing.Provider, merged); err != nil { + return nil, err + } + return merged, nil +} + func (m *ProviderSettingsManager) GetSettingViaCache(id string) (*provider.Setting, error) { setting, _ := m.Cache.Get(id) diff --git a/internal/manager/reporting.go b/internal/manager/reporting.go index 068fcfc..2560a37 100644 --- a/internal/manager/reporting.go +++ b/internal/manager/reporting.go @@ -14,7 +14,7 @@ type costStorage interface { type keyStorage interface { GetKey(keyId string) (*key.ResponseKey, error) - GetSpentKeyRings(tags []string, order string, limit, offset int, validator func(*key.ResponseKey) bool) ([]string, error) + GetSpentKeys(tags []string, order string, limit, offset int, validator func(*key.ResponseKey) bool) ([]event.SpentKey, error) } type keyValidator interface { @@ -31,7 +31,7 @@ type eventStorage interface { GetCustomIds(keyId string) ([]string, error) GetTopKeyDataPoints(start, end int64, tags, keyIds []string, order string, limit, offset int, name string, revoked *bool) ([]*event.KeyDataPoint, error) - GetTopKeyRingDataPoints(start, end int64, tags []string, order string, limit, offset int, revoked *bool) ([]*event.KeyRingDataPoint, error) + GetTopKeyRingDataPoints(start, end int64, tags []string, order string, limit, offset int, revoked *bool, topBy string) ([]*event.KeyRingDataPoint, error) GetUsageData(tags []string) (*event.UsageData, error) } @@ -130,7 +130,7 @@ func (rm *ReportingManager) GetTopKeyRingReporting(r *event.KeyRingReportingRequ return nil, internal_errors.NewValidationError("key reporting request order can only be desc or asc") } - dataPoints, err := rm.es.GetTopKeyRingDataPoints(r.Start, r.End, r.Tags, r.Order, r.Limit, r.Offset, r.Revoked) + dataPoints, err := rm.es.GetTopKeyRingDataPoints(r.Start, r.End, r.Tags, r.Order, r.Limit, r.Offset, r.Revoked, r.TopBy) if err != nil { return nil, err } @@ -160,12 +160,12 @@ func (rm *ReportingManager) GetSpentKeyReporting(r *event.SpentKeyReportingReque return true } - spentKeys, err := rm.ks.GetSpentKeyRings(r.Tags, r.Order, r.Limit, r.Offset, validator) + spentKeys, err := rm.ks.GetSpentKeys(r.Tags, r.Order, r.Limit, r.Offset, validator) if err != nil { return nil, err } return &event.SpentKeyReportingResponse{ - KeyRings: spentKeys, + Keys: spentKeys, }, nil } diff --git a/internal/message/consumer.go b/internal/message/consumer.go index 115f590..5d9dcd2 100644 --- a/internal/message/consumer.go +++ b/internal/message/consumer.go @@ -18,6 +18,7 @@ type recorder interface { RecordKeySpend(keyId string, micros int64, costLimitUnit key.TimeUnit) error RecordUserSpend(userId string, micros int64, costLimitUnit key.TimeUnit) error RecordEvent(e *event.Event) error + RecordKeyRequestSpent(keyId string) error } func NewConsumer(mc <-chan Message, log *zap.Logger, num int, handle func(Message) error) *Consumer { diff --git a/internal/message/handler.go b/internal/message/handler.go index 1e578c1..f03f3b8 100644 --- a/internal/message/handler.go +++ b/internal/message/handler.go @@ -331,6 +331,12 @@ func (h *Handler) HandleEventWithRequestAndResponse(m Message) error { var u *user.User + err = h.recorder.RecordKeyRequestSpent(e.Event.KeyId) + if err != nil { + telemetry.Incr("bricksllm.message.handler.handle_event_with_request_and_response.record_key_request_spend_error", nil, 1) + h.log.Debug("error when recording key request spend", zap.Error(err)) + } + if e.Event.CostInUsd != 0 { micros := int64(e.Event.CostInUsd * 1000000) err = h.recorder.RecordKeySpend(e.Event.KeyId, micros, e.Key.CostLimitInUsdUnit) diff --git a/internal/provider/xcustom/xcustom.go b/internal/provider/xcustom/xcustom.go new file mode 100644 index 0000000..746ce86 --- /dev/null +++ b/internal/provider/xcustom/xcustom.go @@ -0,0 +1,101 @@ +package xcustom + +import ( + "fmt" + "github.com/bricks-cloud/bricksllm/internal/provider" + "net/http" + "regexp" + + "strings" +) + +var XCustomSettingFields = struct { + ApiKey string + Endpoint string + AuthLocation string + AuthTemplate string + AuthTarget string + AuthMask string +}{ + ApiKey: "apikey", + Endpoint: "endpoint", + AuthLocation: "authLocation", + AuthTemplate: "authTemplate", + AuthTarget: "authTarget", + AuthMask: "authMask", +} + +type AuthLocation string + +var AuthLocations = struct { + Header AuthLocation + Query AuthLocation + Unknown AuthLocation +}{ + Header: AuthLocation("header"), + Query: AuthLocation("query"), + Unknown: AuthLocation("unknown"), +} + +const XProviderIdParam = "x_provider_id" + +func IsXCustomRequest(req *http.Request) bool { + return strings.HasPrefix(req.URL.RequestURI(), "/api/providers/xCustom/") +} + +func AdvancedXCustomSetting(src map[string]string) (map[string]string, error) { + rawLocation := src[XCustomSettingFields.AuthLocation] + location := GetAuthLocation(rawLocation) + var templateSeparator string + switch location { + case AuthLocations.Header: + templateSeparator = ":" + case AuthLocations.Query: + templateSeparator = "=" + default: + return nil, fmt.Errorf("unknown auth location: %s", location) + } + templateArr := strings.Split(src[XCustomSettingFields.AuthTemplate], templateSeparator) + if len(templateArr) != 2 { + return nil, fmt.Errorf("invalid auth template: %s", src[XCustomSettingFields.AuthTemplate]) + } + target := strings.TrimSpace(templateArr[0]) + mask := strings.TrimSpace(templateArr[1]) + return map[string]string{ + XCustomSettingFields.AuthTarget: target, + XCustomSettingFields.AuthMask: mask, + }, nil +} + +func ExtractApiKey(req *http.Request, pSetting *provider.Setting) (string, error) { + location := GetAuthLocation(pSetting.GetParam(XCustomSettingFields.AuthLocation)) + target := strings.TrimSpace(pSetting.GetParam(XCustomSettingFields.AuthTarget)) + var reqAuthStr string + switch location { + case AuthLocations.Header: + reqAuthStr = req.Header.Get(target) + case AuthLocations.Query: + reqAuthStr = req.URL.Query().Get(target) + default: + return "", fmt.Errorf("unknown auth location: %s", location) + } + mask := strings.TrimSpace(pSetting.GetParam(XCustomSettingFields.AuthMask)) + regexStr := strings.Replace(mask, "{{apikey}}", "(?P.*)", -1) + regex := regexp.MustCompile(regexStr) + matches := regex.FindStringSubmatch(reqAuthStr) + if len(matches) < 2 { + return "", fmt.Errorf("error extracting apikey: %s", pSetting.Id) + } + return strings.TrimSpace(matches[1]), nil +} + +func GetAuthLocation(raw string) AuthLocation { + switch raw { + case "header": + return AuthLocations.Header + case "query": + return AuthLocations.Query + default: + return AuthLocations.Unknown + } +} diff --git a/internal/recorder/recorder.go b/internal/recorder/recorder.go index a48f210..5c8b73f 100644 --- a/internal/recorder/recorder.go +++ b/internal/recorder/recorder.go @@ -6,12 +6,13 @@ import ( ) type Recorder struct { - s Store - c Cache - us Store - uc Cache - ce CostEstimator - es EventsStore + s Store + c Cache + us Store + uc Cache + ce CostEstimator + es EventsStore + reqLimitStore Store } type EventsStore interface { @@ -31,14 +32,15 @@ type CostEstimator interface { EstimateCompletionCost(model string, tks int) (float64, error) } -func NewRecorder(s, us Store, c, uc Cache, ce CostEstimator, es EventsStore) *Recorder { +func NewRecorder(s, us Store, c, uc Cache, ce CostEstimator, es EventsStore, reqLimitStore Store) *Recorder { return &Recorder{ - s: s, - c: c, - us: us, - uc: uc, - ce: ce, - es: es, + s: s, + c: c, + us: us, + uc: uc, + ce: ce, + es: es, + reqLimitStore: reqLimitStore, } } @@ -74,6 +76,10 @@ func (r *Recorder) RecordKeySpend(keyId string, micros int64, costLimitUnit key. return nil } +func (r *Recorder) RecordKeyRequestSpent(keyId string) error { + return r.reqLimitStore.IncrementCounter(keyId, 1) +} + func (r *Recorder) RecordEvent(e *event.Event) error { return r.es.InsertEvent(e) } diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go index 6e2a75e..5a215f5 100644 --- a/internal/server/web/proxy/middleware.go +++ b/internal/server/web/proxy/middleware.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/json" "fmt" + "github.com/bricks-cloud/bricksllm/internal/provider/xcustom" "io" "net/http" "strconv" @@ -67,7 +68,7 @@ type deepinfraEstimator interface { } type authenticator interface { - AuthenticateHttpRequest(req *http.Request) (*key.ResponseKey, []*provider.Setting, error) + AuthenticateHttpRequest(req *http.Request, xCustomProviderId string) (*key.ResponseKey, []*provider.Setting, error) } type validator interface { @@ -304,7 +305,7 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag return } - kc, settings, err := a.AuthenticateHttpRequest(c.Request) + kc, settings, err := a.AuthenticateHttpRequest(c.Request, c.Param(xcustom.XProviderIdParam)) enrichedEvent.Key = kc _, ok := err.(notAuthorizedError) if ok { diff --git a/internal/server/web/proxy/proxy.go b/internal/server/web/proxy/proxy.go index e6078f4..8bf70b9 100644 --- a/internal/server/web/proxy/proxy.go +++ b/internal/server/web/proxy/proxy.go @@ -220,6 +220,9 @@ func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyMan router.POST("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/cancel", getCancelVectorStoreFileBatchHandler(prod, client)) router.GET("/api/providers/openai/v1/vector_stores/:vector_store_id/file_batches/:batch_id/files", getListVectorStoreFileBatchFilesHandler(prod, client)) + // codio xCustom + router.Any("/api/providers/xCustom/:x_provider_id/*wildcard", getXCustomHandler(prod)) + srv := &http.Server{ Addr: ":8002", Handler: router, diff --git a/internal/server/web/proxy/x_custom.go b/internal/server/web/proxy/x_custom.go new file mode 100644 index 0000000..c0132f6 --- /dev/null +++ b/internal/server/web/proxy/x_custom.go @@ -0,0 +1,74 @@ +package proxy + +import ( + "context" + "errors" + "fmt" + "github.com/bricks-cloud/bricksllm/internal/provider" + "github.com/bricks-cloud/bricksllm/internal/provider/xcustom" + "github.com/bricks-cloud/bricksllm/internal/telemetry" + "github.com/bricks-cloud/bricksllm/internal/util" + "github.com/gin-gonic/gin" + "net/http" + "net/http/httputil" + "net/url" + "strings" +) + +func getXCustomHandler(prod bool) gin.HandlerFunc { + return func(c *gin.Context) { + log := util.GetLogFromCtx(c) + telemetry.Incr("bricksllm.proxy.get_x_custom_handler.requests", nil, 1) + + if c == nil || c.Request == nil { + JSON(c, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), c.GetDuration("requestTimeout")) + defer cancel() + + providerId := c.Param(xcustom.XProviderIdParam) + rawProviderSettings, exists := c.Get("settings") + if !exists { + logError(log, "error provider setting", prod, errors.New("provider setting not found")) + c.JSON(http.StatusInternalServerError, "[BricksLLM] no settings found") + return + } + settings, ok := rawProviderSettings.([]*provider.Setting) + if !ok { + logError(log, "error provider setting", prod, errors.New("incorrect setting")) + c.JSON(http.StatusInternalServerError, "[BricksLLM] incorrect provider setting") + return + } + var providerSetting *provider.Setting + for _, setting := range settings { + if setting.Id == providerId { + providerSetting = setting + } + } + if providerSetting == nil { + logError(log, "error provider setting", prod, errors.New("provider setting not found")) + c.JSON(http.StatusInternalServerError, "[BricksLLM] no settings found") + return + } + wildcard := c.Param("wildcard") + endpoint := strings.TrimSuffix(providerSetting.GetParam("endpoint"), "/") + targetUrl := fmt.Sprintf("%s%s", endpoint, wildcard) + target, e := url.Parse(targetUrl) + if e != nil { + logError(log, "error parsing target url", prod, e) + c.JSON(http.StatusInternalServerError, "[BricksLLM] invalid endpoint") + return + } + + proxy := &httputil.ReverseProxy{ + Rewrite: func(r *httputil.ProxyRequest) { + r.SetURL(target) + r.Out.URL.Path, r.Out.URL.RawPath = target.Path, target.RawPath + r.Out.WithContext(ctx) + }, + } + proxy.ServeHTTP(c.Writer, c.Request) + } +} diff --git a/internal/storage/postgresql/event.go b/internal/storage/postgresql/event.go index 50d5a41..274d19b 100644 --- a/internal/storage/postgresql/event.go +++ b/internal/storage/postgresql/event.go @@ -6,6 +6,7 @@ import ( "encoding/json" "errors" "fmt" + "slices" "strings" "time" @@ -13,6 +14,8 @@ import ( "github.com/lib/pq" ) +var allowedTopBy = []string{"total_cost_in_usd", "total_requests"} + func (s *Store) CreateEventsByDayTable() error { createTableQuery := ` CREATE TABLE IF NOT EXISTS event_agg_by_day ( @@ -471,7 +474,7 @@ func (s *Store) GetTopKeyDataPoints(start, end int64, tags, keyIds []string, ord return data, nil } -func (s *Store) GetTopKeyRingDataPoints(start, end int64, tags []string, order string, limit, offset int, revoked *bool) ([]*event.KeyRingDataPoint, error) { +func (s *Store) GetTopKeyRingDataPoints(start, end int64, tags []string, order string, limit, offset int, revoked *bool, topBy string) ([]*event.KeyRingDataPoint, error) { args := []any{} condition := "" condition2 := "" @@ -494,21 +497,12 @@ func (s *Store) GetTopKeyRingDataPoints(start, end int64, tags []string, order s } if len(tags) > 0 { - condition2 += fmt.Sprintf("AND keys.tags @> $%d", index) + condition2 += fmt.Sprintf("AND events.tags @> $%d", index) args = append(args, pq.Array(tags)) index++ } - if revoked != nil { - bools := "False" - if *revoked { - bools = "True" - } - - condition2 += fmt.Sprintf(" AND keys.revoked = %s", bools) - } - query := fmt.Sprintf(` WITH keys_table AS ( @@ -517,7 +511,8 @@ func (s *Store) GetTopKeyRingDataPoints(start, end int64, tags []string, order s ( SELECT key_ring, - SUM(cost_in_usd) AS total_cost_in_usd + SUM(cost_in_usd) AS total_cost_in_usd, + COUNT(*) AS total_requests FROM events LEFT JOIN keys ON keys.key_id = events.key_id @@ -531,9 +526,13 @@ func (s *Store) GetTopKeyRingDataPoints(start, end int64, tags []string, order s qorder = "ASC" } + qtopBy := "total_cost_in_usd" + if topBy != "" && slices.Contains(allowedTopBy, topBy) { + qtopBy = topBy + } query += fmt.Sprintf(` - ORDER BY total_cost_in_usd %s -`, qorder) + ORDER BY %s %s +`, qtopBy, qorder) if limit != 0 { query += fmt.Sprintf(` @@ -558,6 +557,7 @@ func (s *Store) GetTopKeyRingDataPoints(start, end int64, tags []string, order s additional := []any{ &keyRing, &e.CostInUsd, + &e.Requests, } if err := rows.Scan( @@ -597,10 +597,14 @@ func (s *Store) GetUsageData(tags []string) (*event.UsageData, error) { COALESCE(SUM(cost_in_usd), 0) AS total_cost_in_usd, COALESCE(SUM(CASE WHEN created_at > %d THEN cost_in_usd ELSE 0 END), 0) AS total_cost_in_usd_last_day, COALESCE(SUM(CASE WHEN created_at > %d THEN cost_in_usd ELSE 0 END), 0) AS total_cost_in_usd_last_week, - COALESCE(SUM(CASE WHEN created_at > %d THEN cost_in_usd ELSE 0 END), 0) AS total_cost_in_usd_last_month + COALESCE(SUM(CASE WHEN created_at > %d THEN cost_in_usd ELSE 0 END), 0) AS total_cost_in_usd_last_month, + COALESCE(SUM(1), 0) AS total_requests, + COALESCE(SUM(CASE WHEN created_at > %d THEN 1 ELSE 0 END), 0) AS total_requests_last_day, + COALESCE(SUM(CASE WHEN created_at > %d THEN 1 ELSE 0 END), 0) AS total_requests_last_week, + COALESCE(SUM(CASE WHEN created_at > %d THEN 1 ELSE 0 END), 0) AS total_requests_last_month FROM events WHERE %s - `, dayAgo, weekAgo, monthAgo, condition) + `, dayAgo, weekAgo, monthAgo, dayAgo, weekAgo, monthAgo, condition) ctx, cancel := context.WithTimeout(context.Background(), s.rt) defer cancel() @@ -611,6 +615,10 @@ func (s *Store) GetUsageData(tags []string) (*event.UsageData, error) { &data.LastDayUsage, &data.LastWeekUsage, &data.LastMonthUsage, + &data.TotalUsageRequests, + &data.LastDayUsageRequests, + &data.LastWeekUsageRequests, + &data.LastMonthUsageRequests, ); err != nil { if err == sql.ErrNoRows { return nil, nil diff --git a/internal/storage/postgresql/key.go b/internal/storage/postgresql/key.go index c129aa9..e92f114 100644 --- a/internal/storage/postgresql/key.go +++ b/internal/storage/postgresql/key.go @@ -5,6 +5,7 @@ import ( "database/sql" "encoding/json" "fmt" + "github.com/bricks-cloud/bricksllm/internal/event" "strings" internal_errors "github.com/bricks-cloud/bricksllm/internal/errors" @@ -56,7 +57,7 @@ func (s *Store) AlterKeysTable() error { END IF; END $$; - ALTER TABLE keys ADD COLUMN IF NOT EXISTS setting_id VARCHAR(255), ADD COLUMN IF NOT EXISTS allowed_paths JSONB, ADD COLUMN IF NOT EXISTS setting_ids VARCHAR(255)[] NOT NULL DEFAULT ARRAY[]::VARCHAR(255)[], ADD COLUMN IF NOT EXISTS should_log_request BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS should_log_response BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS rotation_enabled BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS policy_id VARCHAR(255) NOT NULL DEFAULT '', ADD COLUMN IF NOT EXISTS is_key_not_hashed BOOLEAN NOT NULL DEFAULT FALSE; + ALTER TABLE keys ADD COLUMN IF NOT EXISTS setting_id VARCHAR(255), ADD COLUMN IF NOT EXISTS allowed_paths JSONB, ADD COLUMN IF NOT EXISTS setting_ids VARCHAR(255)[] NOT NULL DEFAULT ARRAY[]::VARCHAR(255)[], ADD COLUMN IF NOT EXISTS should_log_request BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS should_log_response BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS rotation_enabled BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS policy_id VARCHAR(255) NOT NULL DEFAULT '', ADD COLUMN IF NOT EXISTS is_key_not_hashed BOOLEAN NOT NULL DEFAULT FALSE, ADD COLUMN IF NOT EXISTS requests_limit INT NOT NULL DEFAULT 0; ` ctxTimeout, cancel := context.WithTimeout(context.Background(), s.wt) @@ -189,6 +190,7 @@ func (s *Store) GetKeys(tags, keyIds []string, provider string) ([]*key.Response &k.RotationEnabled, &k.PolicyId, &k.IsKeyNotHashed, + &k.RequestsLimit, ); err != nil { return nil, err } @@ -314,6 +316,7 @@ func (s *Store) GetKeysV2(tags, keyIds []string, revoked *bool, limit, offset in &k.RotationEnabled, &k.PolicyId, &k.IsKeyNotHashed, + &k.RequestsLimit, ); err != nil { return nil, err } @@ -393,6 +396,7 @@ func (s *Store) GetKeyByHash(hash string) (*key.ResponseKey, error) { &k.RotationEnabled, &k.PolicyId, &k.IsKeyNotHashed, + &k.RequestsLimit, ) if err != nil { @@ -457,6 +461,7 @@ func (s *Store) GetKey(keyId string) (*key.ResponseKey, error) { &k.RotationEnabled, &k.PolicyId, &k.IsKeyNotHashed, + &k.RequestsLimit, ); err != nil { return nil, err } @@ -483,7 +488,7 @@ func (s *Store) GetKey(keyId string) (*key.ResponseKey, error) { return keys[0], nil } -func (s *Store) GetSpentKeyRings(tags []string, order string, limit, offset int, validator func(*key.ResponseKey) bool) ([]string, error) { +func (s *Store) GetSpentKeys(tags []string, order string, limit, offset int, validator func(*key.ResponseKey) bool) ([]event.SpentKey, error) { args := []any{} condition := "" @@ -524,7 +529,7 @@ func (s *Store) GetSpentKeyRings(tags []string, order string, limit, offset int, } defer rows.Close() - invalidKeyRings := []string{} + invalidKeyRings := []event.SpentKey{} for rows.Next() { var k key.ResponseKey var settingId sql.NullString @@ -553,6 +558,7 @@ func (s *Store) GetSpentKeyRings(tags []string, order string, limit, offset int, &k.RotationEnabled, &k.PolicyId, &k.IsKeyNotHashed, + &k.RequestsLimit, ); err != nil { return nil, err } @@ -569,7 +575,10 @@ func (s *Store) GetSpentKeyRings(tags []string, order string, limit, offset int, } if !validator(pk) { - invalidKeyRings = append(invalidKeyRings, pk.KeyRing) + invalidKeyRings = append(invalidKeyRings, event.SpentKey{ + KeyRing: pk.KeyRing, + LinkedKeyId: pk.KeyId, + }) } } @@ -615,6 +624,7 @@ func (s *Store) GetAllKeys() ([]*key.ResponseKey, error) { &k.RotationEnabled, &k.PolicyId, &k.IsKeyNotHashed, + &k.RequestsLimit, ); err != nil { return nil, err } @@ -675,6 +685,7 @@ func (s *Store) GetUpdatedKeys(updatedAt int64) ([]*key.ResponseKey, error) { &k.RotationEnabled, &k.PolicyId, &k.IsKeyNotHashed, + &k.RequestsLimit, ); err != nil { return nil, err } @@ -853,6 +864,7 @@ func (s *Store) UpdateKey(id string, uk *key.UpdateKey) (*key.ResponseKey, error &k.RotationEnabled, &k.PolicyId, &k.IsKeyNotHashed, + &k.RequestsLimit, ); err != nil { if err == sql.ErrNoRows { return nil, internal_errors.NewNotFoundError(fmt.Sprintf("key not found for id: %s", id)) @@ -877,8 +889,8 @@ func (s *Store) UpdateKey(id string, uk *key.UpdateKey) (*key.ResponseKey, error func (s *Store) CreateKey(rk *key.RequestKey) (*key.ResponseKey, error) { query := ` - INSERT INTO keys (name, created_at, updated_at, tags, revoked, key_id, key, revoked_reason, cost_limit_in_usd, cost_limit_in_usd_over_time, cost_limit_in_usd_unit, rate_limit_over_time, rate_limit_unit, ttl, key_ring, setting_id, allowed_paths, setting_ids, should_log_request, should_log_response, rotation_enabled, policy_id, is_key_not_hashed) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23) + INSERT INTO keys (name, created_at, updated_at, tags, revoked, key_id, key, revoked_reason, cost_limit_in_usd, cost_limit_in_usd_over_time, cost_limit_in_usd_unit, rate_limit_over_time, rate_limit_unit, ttl, key_ring, setting_id, allowed_paths, setting_ids, should_log_request, should_log_response, rotation_enabled, policy_id, is_key_not_hashed, requests_limit) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24) RETURNING *; ` @@ -911,6 +923,7 @@ func (s *Store) CreateKey(rk *key.RequestKey) (*key.ResponseKey, error) { rk.RotationEnabled, rk.PolicyId, rk.IsKeyNotHashed, + rk.RequestsLimit, } ctxTimeout, cancel := context.WithTimeout(context.Background(), s.wt) @@ -944,6 +957,7 @@ func (s *Store) CreateKey(rk *key.RequestKey) (*key.ResponseKey, error) { &k.RotationEnabled, &k.PolicyId, &k.IsKeyNotHashed, + &k.RequestsLimit, ); err != nil { return nil, err } diff --git a/internal/validator/validator.go b/internal/validator/validator.go index 7efdfb9..c3df819 100644 --- a/internal/validator/validator.go +++ b/internal/validator/validator.go @@ -21,21 +21,28 @@ type costLimitStorage interface { GetCounter(keyId string) (int64, error) } +type requestsLimitStorage interface { + GetCounter(keyId string) (int64, error) +} + type Validator struct { - clc costLimitCache - rlc rateLimitCache - cls costLimitStorage + clc costLimitCache + rlc rateLimitCache + cls costLimitStorage + rqls requestsLimitStorage } func NewValidator( clc costLimitCache, rlc rateLimitCache, cls costLimitStorage, + rqls requestsLimitStorage, ) *Validator { return &Validator{ - clc: clc, - rlc: rlc, - cls: cls, + clc: clc, + rlc: rlc, + cls: cls, + rqls: rqls, } } @@ -53,7 +60,12 @@ func (v *Validator) Validate(k *key.ResponseKey, promptCost float64) error { return internal_errors.NewExpirationError("api key expired", internal_errors.TtlExpiration) } - err := v.validateRateLimitOverTime(k.KeyId, k.RateLimitOverTime, k.RateLimitUnit) + err := v.validateRequestsLimit(k.KeyId, k.RequestsLimit) + if err != nil { + return err + } + + err = v.validateRateLimitOverTime(k.KeyId, k.RateLimitOverTime, k.RateLimitUnit) if err != nil { return err } @@ -136,3 +148,17 @@ func (v *Validator) validateCostLimit(keyId string, costLimit float64) error { return nil } + +func (v *Validator) validateRequestsLimit(keyId string, requestsLimit int) error { + if requestsLimit == 0 { + return nil + } + existingTotalRequests, err := v.rqls.GetCounter(keyId) + if err != nil { + return errors.New("failed to get total requests") + } + if existingTotalRequests >= int64(requestsLimit) { + return internal_errors.NewExpirationError(fmt.Sprintf("total requests limit: %d, has been reached", requestsLimit), internal_errors.RequestsLimitExpiration) + } + return nil +}