From bbdd7fd47539907f00f896ece36e728d557c8f95 Mon Sep 17 00:00:00 2001 From: Sergei Bronnikov Date: Mon, 16 Mar 2026 13:10:25 +0000 Subject: [PATCH 1/8] videos --- internal/provider/openai/cost.go | 42 ++++++++ internal/provider/openai/types.go | 15 +++ internal/server/web/proxy/middleware.go | 1 + internal/server/web/proxy/proxy.go | 9 ++ internal/server/web/proxy/video.go | 132 ++++++++++++++++++++++++ 5 files changed, 199 insertions(+) create mode 100644 internal/server/web/proxy/video.go diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go index 777ab31..bd200af 100644 --- a/internal/provider/openai/cost.go +++ b/internal/provider/openai/cost.go @@ -152,6 +152,14 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{ "tts-1": 0.015, "tts-1-hd": 0.03, }, + "video": { // $ per sec + "sora-2": 0.1, + "sora-2-pro": 0.30, + "sora-2-720": 0.1, + "sora-2-pro-720": 0.30, + "sora-2-pro-1024": 0.5, + "sora-2-pro-1080": 0.7, + }, "completion": { "gpt-image-1.5": 0.010, "chatgpt-image-latest": 0.010, @@ -769,6 +777,40 @@ func (ce *CostEstimator) EstimateResponseApiToolCreateContainerCost(req *Respons return totalCost, nil } +func (ce *CostEstimator) EstimateVideoCost(metadata *VideoResponseMetadata) (float64, error) { + if metadata == nil { + return 0, errors.New("metadata is nil") + } + costMap, ok := ce.tokenCostMap["video"] + if !ok { + return 0, errors.New("video cost map is not provided") + } + model := metadata.Model + size, err := normalizedVideoSize(metadata.Size) + if err != nil { + return 0, err + } + costKey := fmt.Sprintf("%s-%s", model, size) + cost, ok := costMap[costKey] + if !ok { + return 0, errors.New("model with provided size is not present in the video cost map") + } + return cost * metadata.GetSecondsAsFloat(), nil +} + +func normalizedVideoSize(size string) (string, error) { + switch size { + case "720x1280", "1280x720": + return "720", nil + case "1024x1792", "1792x1024": + return "1024", nil + case "1080x1920", "1920x1080": + return "1080", nil + default: + return "", errors.New("size is not valid") + } +} + var reasoningModelPrefix = []string{"gpt-5", "o1", "o2", "o3"} func extendedToolType(toolType, model string) string { diff --git a/internal/provider/openai/types.go b/internal/provider/openai/types.go index 3f2dae3..299f947 100644 --- a/internal/provider/openai/types.go +++ b/internal/provider/openai/types.go @@ -1,5 +1,7 @@ package openai +import "strconv" + type ResponseRequest struct { Background *bool `json:"background,omitzero"` Conversation *any `json:"conversation,omitzero"` @@ -89,3 +91,16 @@ type ImageResponseMetadata struct { Size string `json:"size,omitempty"` Usage ImageResponseUsage `json:"usage,omitempty"` } + +type VideoResponseMetadata struct { + Model string `json:"model,omitempty"` + Size string `json:"size,omitempty"` + Seconds string `json:"seconds,omitempty"` +} + +func (v *VideoResponseMetadata) GetSecondsAsFloat() float64 { + if secondsFloat, err := strconv.ParseFloat(v.Seconds, 64); err == nil { + return secondsFloat + } + return 0 +} diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go index a994f72..ffed4c5 100644 --- a/internal/server/web/proxy/middleware.go +++ b/internal/server/web/proxy/middleware.go @@ -61,6 +61,7 @@ type estimator interface { EstimateResponseApiTotalCost(model string, usage responsesOpenai.ResponseUsage) (float64, error) EstimateResponseApiToolCallsCost(tools []responsesOpenai.ToolUnion, model string) (float64, error) EstimateResponseApiToolCreateContainerCost(req *openai.ResponseRequest) (float64, error) + EstimateVideoCost(metadata *openai.VideoResponseMetadata) (float64, error) } type azureEstimator interface { diff --git a/internal/server/web/proxy/proxy.go b/internal/server/web/proxy/proxy.go index 3a6cb82..4740867 100644 --- a/internal/server/web/proxy/proxy.go +++ b/internal/server/web/proxy/proxy.go @@ -104,6 +104,15 @@ func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyMan router.POST("/api/providers/openai/v1/audio/transcriptions", getTranscriptionsHandler(prod, client, e)) router.POST("/api/providers/openai/v1/audio/translations", getTranslationsHandler(prod, client, e)) + // videos + router.POST("/api/providers/openai/v1/videos", getVideoHandler(prod, client, e)) + router.POST("/api/providers/openai/v1/videos/edits", getVideoHandler(prod, client, e)) + router.POST("/api/providers/openai/v1/videos/extensions", getVideoHandler(prod, client, e)) + router.GET("/api/providers/openai/v1/videos/:video_id", getVideoHandler(prod, client, e)) + router.DELETE("/api/providers/openai/v1/videos/:video_id", getVideoHandler(prod, client, e)) + router.POST("/api/providers/openai/v1/videos/:video_id/remix", getVideoHandler(prod, client, e)) + router.GET("/api/providers/openai/v1/videos/:video_id/content", getVideoHandler(prod, client, e)) + // completions router.POST("/api/providers/openai/v1/chat/completions", getChatCompletionHandler(prod, private, client, e)) diff --git a/internal/server/web/proxy/video.go b/internal/server/web/proxy/video.go new file mode 100644 index 0000000..5854c59 --- /dev/null +++ b/internal/server/web/proxy/video.go @@ -0,0 +1,132 @@ +package proxy + +import ( + "context" + "encoding/json" + "errors" + "io" + "net/http" + "strings" + "time" + + "github.com/bricks-cloud/bricksllm/internal/provider/openai" + "github.com/bricks-cloud/bricksllm/internal/telemetry" + "github.com/bricks-cloud/bricksllm/internal/util" + "github.com/gin-gonic/gin" + goopenai "github.com/sashabaranov/go-openai" +) + +func getVideoHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc { + return func(ginCtx *gin.Context) { + log := util.GetLogFromCtx(ginCtx) + telemetry.Incr("bricksllm.proxy.get_responses_handler.requests", nil, 1) + + if ginCtx == nil || ginCtx.Request == nil { + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(ginCtx.Request.Context(), ginCtx.GetDuration("requestTimeout")) + defer cancel() + + videoURL, err := constructVideoURL(ginCtx.Request.URL.Path) + if err != nil { + logError(log, "failed to construct video URL", prod, err) + JSON(ginCtx, http.StatusBadRequest, "[BricksLLM] invalid video request") + return + } + + req, err := http.NewRequestWithContext(ctx, ginCtx.Request.Method, videoURL, ginCtx.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to create openai http request") + return + } + + copyHttpHeaders(ginCtx.Request, req, ginCtx.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_video_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + ginCtx.Header(name, value) + } + } + + if res.StatusCode != http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_video_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_video_handler.error_response", nil, 1) + + bytes, err2 := io.ReadAll(res.Body) + if err2 != nil { + logError(log, "error when reading openai http video response body", prod, err2) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err2 = json.Unmarshal(bytes, errorRes) + if err2 != nil { + logError(log, "error when unmarshalling openai video error response body", prod, err2) + } + + logOpenAiError(log, prod, errorRes) + + ginCtx.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_video_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http video response body", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + var cost float64 = 0 + respMetadata := &openai.VideoResponseMetadata{} + telemetry.Incr("bricksllm.proxy.get_video_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_video_handler.success_latency", dur, nil, 1) + + err = json.Unmarshal(bytes, respMetadata) + if err != nil { + logError(log, "error when unmarshalling openai http video response body", prod, err) + } + + isPaidRequest := ginCtx.Request.Method == http.MethodPost + if err == nil && isPaidRequest { + cost, err = e.EstimateVideoCost(respMetadata) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_video_handler.estimate_cost_error", nil, 1) + logError(log, "error when estimating video cost", prod, err) + } + } + ginCtx.Set("costInUsd", cost) + ginCtx.Data(res.StatusCode, "application/json", bytes) + return + } +} + +func constructVideoURL(fullPath string) (string, error) { + if fullPath == "" { + return "", errors.New("empty full path") + } + if !strings.HasPrefix(fullPath, "/api/providers/openai") { + return "", errors.New("invalid path prefix") + } + path := strings.TrimPrefix(fullPath, "/api/providers/openai") + return "https://api.openai.com" + path, nil +} From c18dbd2e531ad37967a6bfbfeb64c1705750111f Mon Sep 17 00:00:00 2001 From: Sergei Bronnikov Date: Mon, 16 Mar 2026 15:04:03 +0000 Subject: [PATCH 2/8] fix video resp. audio models --- internal/provider/openai/cost.go | 11 +++++++++-- internal/server/web/proxy/video.go | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go index bd200af..fb9699f 100644 --- a/internal/provider/openai/cost.go +++ b/internal/provider/openai/cost.go @@ -149,8 +149,15 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{ }, "audio": { "whisper-1": 0.006, - "tts-1": 0.015, - "tts-1-hd": 0.03, + + "tts-1": 0.015, + "tts-1-hd": 0.03, + + "gpt-4o-transcribe": 0.006, + "gpt-4o-transcribe-diarize": 0.006, + "gpt-4o-mini-transcribe": 0.003, + + "gpt-4o-mini-tts": 0.012, }, "video": { // $ per sec "sora-2": 0.1, diff --git a/internal/server/web/proxy/video.go b/internal/server/web/proxy/video.go index 5854c59..7e35130 100644 --- a/internal/server/web/proxy/video.go +++ b/internal/server/web/proxy/video.go @@ -115,7 +115,7 @@ func getVideoHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc } } ginCtx.Set("costInUsd", cost) - ginCtx.Data(res.StatusCode, "application/json", bytes) + ginCtx.Data(res.StatusCode, res.Header.Get("Content-Type"), bytes) return } } From b35399bdfd62359ea631cee775cdd171f5971d4e Mon Sep 17 00:00:00 2001 From: Sergei Bronnikov Date: Thu, 9 Apr 2026 15:50:05 +0100 Subject: [PATCH 3/8] processGPTTranscriptions --- internal/provider/openai/types.go | 16 +++ internal/server/web/proxy/audio.go | 6 + internal/server/web/proxy/audioV2.go | 205 +++++++++++++++++++++++++++ 3 files changed, 227 insertions(+) create mode 100644 internal/server/web/proxy/audioV2.go diff --git a/internal/provider/openai/types.go b/internal/provider/openai/types.go index 299f947..7566bef 100644 --- a/internal/provider/openai/types.go +++ b/internal/provider/openai/types.go @@ -104,3 +104,19 @@ func (v *VideoResponseMetadata) GetSecondsAsFloat() float64 { } return 0 } + +type TranscriptionResponseUsageInputTokenDetails struct { + TextTokens int `json:"text_tokens,omitempty"` + AudioTokens int `json:"audio_tokens,omitempty"` +} +type TranscriptionResponseUsage struct { + Type string `json:"type"` + TotalTokens int `json:"total_tokens,omitempty"` + InputTokens int `json:"input_tokens,omitempty"` + InputTokenDetails TranscriptionResponseUsageInputTokenDetails `json:"input_token_details,omitempty"` + OutputTokens int `json:"output_tokens,omitempty"` +} +type TranscriptionResponse struct { + Text string `json:"text,omitempty"` + Usage TranscriptionResponseUsage `json:"usage,omitempty"` +} diff --git a/internal/server/web/proxy/audio.go b/internal/server/web/proxy/audio.go index af6e2a9..8395ed3 100644 --- a/internal/server/web/proxy/audio.go +++ b/internal/server/web/proxy/audio.go @@ -169,6 +169,12 @@ func getContentType(format string) string { func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc { return func(c *gin.Context) { + model := c.PostForm("model") + if model == "gpt-4o-transcribe" || model == "gpt-4o-transcribe-diarize" || model == "gpt-4o-mini-transcribe" { + processGPTTranscriptions(c, prod, client, e, model) + return + } + log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.requests", nil, 1) diff --git a/internal/server/web/proxy/audioV2.go b/internal/server/web/proxy/audioV2.go new file mode 100644 index 0000000..5b8f6f5 --- /dev/null +++ b/internal/server/web/proxy/audioV2.go @@ -0,0 +1,205 @@ +package proxy + +import ( + "bytes" + "context" + "encoding/json" + "io" + "mime/multipart" + "net/http" + "time" + + "github.com/bricks-cloud/bricksllm/internal/provider/openai" + "github.com/bricks-cloud/bricksllm/internal/telemetry" + "github.com/bricks-cloud/bricksllm/internal/util" + "github.com/gin-gonic/gin" + goopenai "github.com/sashabaranov/go-openai" + "go.uber.org/zap" +) + +const transcriptionsUrl = "https://api.openai.com/v1/audio/transcriptions" + +func processGPTTranscriptions(ginCtx *gin.Context, prod bool, client http.Client, e estimator, model string) { + log := util.GetLogFromCtx(ginCtx) + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.requests", nil, 1) + + if ginCtx == nil || ginCtx.Request == nil { + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(context.Background(), ginCtx.GetDuration("requestTimeout")) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, ginCtx.Request.Method, transcriptionsUrl, ginCtx.Request.Body) + if err != nil { + logError(log, "error when creating transcriptions openai http request", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to create openai transcriptions http request") + return + } + + copyHttpHeaders(ginCtx.Request, req, ginCtx.GetBool("removeUserAgent")) + + isStreaming := ginCtx.PostForm("stream") == "True" || ginCtx.PostForm("stream") == "true" + + if isStreaming { + req.Header.Set("Accept", "*/*") + req.Header.Set("Cache-Control", "no-cache") + req.Header.Set("Connection", "keep-alive") + } + + if !isStreaming { + modifyGPTTranscriptionsRequest(ginCtx, prod, log, req) + } + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.http_client_error", nil, 1) + + logError(log, "error when sending transcriptions request to openai", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to send transcriptions request to openai") + return + } + + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + ginCtx.Header(name, value) + } + } + + if res.StatusCode == http.StatusOK && !isStreaming { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_transcriptions_handler.latency", dur, nil, 1) + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http transcriptions response body", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + var cost float64 = 0 + resp := &openai.TranscriptionResponse{} + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_transcriptions_handler.success_latency", dur, nil, 1) + + err = json.Unmarshal(bytes, resp) + if err != nil { + logError(log, "error when unmarshalling openai http response api response body", prod, err) + } + + if err == nil { + // estimate + } + + ginCtx.Set("costInUsd", cost) + + contentType := "application/json" + if ginCtx.PostForm("response_format") == "text" { + contentType = "text/plain; charset=utf-8" + } + + ginCtx.Data(res.StatusCode, contentType, bytes) + return + } + + if res.StatusCode != http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_transcriptions_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.error_response", nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai transcriptions response body", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai transcriptions response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err = json.Unmarshal(bytes, errorRes) + if err != nil { + logError(log, "error when unmarshalling openai transcriptions error response body", prod, err) + } + + logOpenAiError(log, prod, errorRes) + + ginCtx.Data(res.StatusCode, "application/json", bytes) + return + } +} + +func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, req *http.Request) { + var b bytes.Buffer + writer := multipart.NewWriter(&b) + defer writer.Close() + + responseFormat := c.PostForm("response_format") + if responseFormat == "text" { + responseFormat = "json" + } + + err := writePostFields(c, writer, map[string]string{ + "response_format": responseFormat, + }) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.write_field_to_buffer_error", nil, 1) + logError(log, "error when writing field to buffer", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot write field to buffer") + return + } + + var form TransriptionForm + c.ShouldBind(&form) + + if form.File != nil { + fieldWriter, err := writer.CreateFormFile("file", form.File.Filename) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.create_transcription_file_error", nil, 1) + logError(log, "error when creating transcription file", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot create transcription file") + return + } + + opened, err := form.File.Open() + if err != nil { + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.open_transcription_file_error", nil, 1) + logError(log, "error when openning transcription file", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot open transcription file") + return + } + + _, err = io.Copy(fieldWriter, opened) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.copy_transcription_file_error", nil, 1) + logError(log, "error when copying transcription file", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot copy transcription file") + return + } + } + + req.Header.Set("Content-Type", writer.FormDataContentType()) + req.Body = io.NopCloser(&b) +} + +func writePostFields(c *gin.Context, writer *multipart.Writer, overWrites map[string]string) error { + for k, v := range c.Request.PostForm { + if len(v) == 0 { + continue + } + val := v[0] + if len(overWrites) != 0 { + if ow := overWrites[k]; len(ow) != 0 { + val = ow + } + } + if len(val) == 0 { + continue + } + err := writer.WriteField(k, val) + if err != nil { + return err + } + } + return nil +} From b91057770086b4766aa1e6b3d40591617277955d Mon Sep 17 00:00:00 2001 From: Sergei Bronnikov Date: Fri, 10 Apr 2026 09:24:56 +0100 Subject: [PATCH 4/8] estimate --- internal/provider/openai/cost.go | 35 ++++++++++++++++++++++++- internal/server/web/proxy/audio.go | 4 +-- internal/server/web/proxy/audioV2.go | 20 +++++++++----- internal/server/web/proxy/middleware.go | 2 +- 4 files changed, 50 insertions(+), 11 deletions(-) diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go index fb9699f..a50a930 100644 --- a/internal/provider/openai/cost.go +++ b/internal/provider/openai/cost.go @@ -159,6 +159,16 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{ "gpt-4o-mini-tts": 0.012, }, + "transcription-input": { + "gpt-4o-transcribe": 0.0025, + "gpt-4o-transcribe-diarize": 0.0025, + "gpt-4o-mini-transcribe": 0.00125, + }, + "transcription-output": { + "gpt-4o-transcribe": 0.01, + "gpt-4o-transcribe-diarize": 0.01, + "gpt-4o-mini-transcribe": 0.005, + }, "video": { // $ per sec "sora-2": 0.1, "sora-2-pro": 0.30, @@ -664,7 +674,30 @@ func prepareGptImageQuality(quality string) (string, error) { return quality, nil } -func (ce *CostEstimator) EstimateTranscriptionCost(secs float64, model string) (float64, error) { +func (ce *CostEstimator) EstimateTranscriptionCost(secs float64, model string, usage *TranscriptionResponseUsage) (float64, error) { + if usage != nil { + inputTokens := usage.InputTokens + costMap, ok := ce.tokenCostMap["transcription-input"] + if !ok { + return 0, errors.New("transcription input token cost map is not provided") + } + inputCost, ok := costMap[model] + if !ok { + return 0, errors.New("model is not present in the transcription input token cost map") + } + + outputTokens := usage.OutputTokens + costMap, ok = ce.tokenCostMap["transcription-output"] + if !ok { + return 0, errors.New("transcription output token cost map is not provided") + } + outputCost, ok := costMap[model] + if !ok { + return 0, errors.New("model is not present in the transcription output token cost map") + } + + return (float64(inputTokens)/1000)*inputCost + (float64(outputTokens)/1000)*outputCost, nil + } costMap, ok := ce.tokenCostMap["audio"] if !ok { return 0, errors.New("audio cost map is not provided") diff --git a/internal/server/web/proxy/audio.go b/internal/server/web/proxy/audio.go index 8395ed3..40bd717 100644 --- a/internal/server/web/proxy/audio.go +++ b/internal/server/web/proxy/audio.go @@ -297,7 +297,7 @@ func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.Ha } if err == nil { - cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model")) + cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"), nil) if err != nil { telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.estimate_total_cost_error", nil, 1) logError(log, "error when estimating openai cost", prod, err) @@ -457,7 +457,7 @@ func getTranslationsHandler(prod bool, client http.Client, e estimator) gin.Hand } if err == nil { - cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model")) + cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"), nil) if err != nil { telemetry.Incr("bricksllm.proxy.get_translations_handler.estimate_total_cost_error", nil, 1) logError(log, "error when estimating openai cost", prod, err) diff --git a/internal/server/web/proxy/audioV2.go b/internal/server/web/proxy/audioV2.go index 5b8f6f5..3b6ac33 100644 --- a/internal/server/web/proxy/audioV2.go +++ b/internal/server/web/proxy/audioV2.go @@ -73,7 +73,7 @@ func processGPTTranscriptions(ginCtx *gin.Context, prod bool, client http.Client if res.StatusCode == http.StatusOK && !isStreaming { dur := time.Since(start) telemetry.Timing("bricksllm.proxy.get_transcriptions_handler.latency", dur, nil, 1) - bytes, err := io.ReadAll(res.Body) + readBytes, err := io.ReadAll(res.Body) if err != nil { logError(log, "error when reading openai http transcriptions response body", prod, err) JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") @@ -84,23 +84,29 @@ func processGPTTranscriptions(ginCtx *gin.Context, prod bool, client http.Client telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.success", nil, 1) telemetry.Timing("bricksllm.proxy.get_transcriptions_handler.success_latency", dur, nil, 1) - err = json.Unmarshal(bytes, resp) + err = json.Unmarshal(readBytes, resp) if err != nil { logError(log, "error when unmarshalling openai http response api response body", prod, err) } if err == nil { - // estimate + cost, err = e.EstimateTranscriptionCost(0, model, &resp.Usage) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.estimate_total_cost_error", nil, 1) + logError(log, "error when estimating openai cost", prod, err) + } } ginCtx.Set("costInUsd", cost) contentType := "application/json" + bytesToSend := readBytes if ginCtx.PostForm("response_format") == "text" { contentType = "text/plain; charset=utf-8" + bytesToSend = []byte(resp.Text + "\n") } - ginCtx.Data(res.StatusCode, contentType, bytes) + ginCtx.Data(res.StatusCode, contentType, bytesToSend) return } @@ -109,7 +115,7 @@ func processGPTTranscriptions(ginCtx *gin.Context, prod bool, client http.Client telemetry.Timing("bricksllm.proxy.get_transcriptions_handler.error_latency", dur, nil, 1) telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.error_response", nil, 1) - bytes, err := io.ReadAll(res.Body) + readBytes, err := io.ReadAll(res.Body) if err != nil { logError(log, "error when reading openai transcriptions response body", prod, err) JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai transcriptions response body") @@ -117,14 +123,14 @@ func processGPTTranscriptions(ginCtx *gin.Context, prod bool, client http.Client } errorRes := &goopenai.ErrorResponse{} - err = json.Unmarshal(bytes, errorRes) + err = json.Unmarshal(readBytes, errorRes) if err != nil { logError(log, "error when unmarshalling openai transcriptions error response body", prod, err) } logOpenAiError(log, prod, errorRes) - ginCtx.Data(res.StatusCode, "application/json", bytes) + ginCtx.Data(res.StatusCode, "application/json", readBytes) return } } diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go index ffed4c5..7a47387 100644 --- a/internal/server/web/proxy/middleware.go +++ b/internal/server/web/proxy/middleware.go @@ -49,7 +49,7 @@ type keyStorage interface { } type estimator interface { - EstimateTranscriptionCost(secs float64, model string) (float64, error) + EstimateTranscriptionCost(secs float64, model string, usage *openai.TranscriptionResponseUsage) (float64, error) EstimateSpeechCost(input string, model string) (float64, error) EstimateChatCompletionPromptCostWithTokenCounts(r *goopenai.ChatCompletionRequest) (int, float64, error) EstimateEmbeddingsCost(r *goopenai.EmbeddingRequest) (float64, error) From ac9c888b9ac0c52777a1f4a55f211096dfaca8d4 Mon Sep 17 00:00:00 2001 From: Sergei Bronnikov Date: Fri, 10 Apr 2026 11:49:01 +0100 Subject: [PATCH 5/8] stream --- internal/provider/openai/types.go | 26 +++++ .../proxy/{audioV2.go => audio_extended.go} | 105 +++++++++++++++++- 2 files changed, 126 insertions(+), 5 deletions(-) rename internal/server/web/proxy/{audioV2.go => audio_extended.go} (66%) diff --git a/internal/provider/openai/types.go b/internal/provider/openai/types.go index 7566bef..aa24f06 100644 --- a/internal/provider/openai/types.go +++ b/internal/provider/openai/types.go @@ -120,3 +120,29 @@ type TranscriptionResponse struct { Text string `json:"text,omitempty"` Usage TranscriptionResponseUsage `json:"usage,omitempty"` } + +type TranscriptionStreamChunk struct { + Type string `json:"type"` + Delta string `json:"delta,omitempty"` + Text string `json:"text,omitempty"` + Usage TranscriptionResponseUsage `json:"usage,omitempty"` +} + +func (c *TranscriptionStreamChunk) IsDone() bool { + return c.Type == "transcript.text.done" +} + +func (c *TranscriptionStreamChunk) IsDelta() bool { + return c.Type == "transcript.text.delta" +} + +func (c *TranscriptionStreamChunk) IsSegment() bool { + return c.Type == "transcript.text.segment" +} + +func (c *TranscriptionStreamChunk) GetText() string { + if c.IsDelta() { + return c.Delta + } + return c.Text +} diff --git a/internal/server/web/proxy/audioV2.go b/internal/server/web/proxy/audio_extended.go similarity index 66% rename from internal/server/web/proxy/audioV2.go rename to internal/server/web/proxy/audio_extended.go index 3b6ac33..425bfd6 100644 --- a/internal/server/web/proxy/audioV2.go +++ b/internal/server/web/proxy/audio_extended.go @@ -1,9 +1,12 @@ package proxy import ( + "bufio" "bytes" "context" "encoding/json" + "errors" + "fmt" "io" "mime/multipart" "net/http" @@ -17,21 +20,30 @@ import ( "go.uber.org/zap" ) -const transcriptionsUrl = "https://api.openai.com/v1/audio/transcriptions" +const ( + transcriptionsUrl = "https://api.openai.com/v1/audio/transcriptions" + translationsUrl = "https://api.openai.com/v1/audio/translations" +) func processGPTTranscriptions(ginCtx *gin.Context, prod bool, client http.Client, e estimator, model string) { +} + +func processGPTTranslations(ginCtx *gin.Context, prod bool, client http.Client, e estimator, model string) { +} + +func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estimator, model, url, handler string) { log := util.GetLogFromCtx(ginCtx) - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.requests", nil, 1) + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.requests", handler), nil, 1) - if ginCtx == nil || ginCtx.Request == nil { - JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] context is empty") + if ginCtx.Request == nil { + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] request is empty") return } ctx, cancel := context.WithTimeout(context.Background(), ginCtx.GetDuration("requestTimeout")) defer cancel() - req, err := http.NewRequestWithContext(ctx, ginCtx.Request.Method, transcriptionsUrl, ginCtx.Request.Body) + req, err := http.NewRequestWithContext(ctx, ginCtx.Request.Method, url, ginCtx.Request.Body) if err != nil { logError(log, "error when creating transcriptions openai http request", prod, err) JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to create openai transcriptions http request") @@ -133,6 +145,89 @@ func processGPTTranscriptions(ginCtx *gin.Context, prod bool, client http.Client ginCtx.Data(res.StatusCode, "application/json", readBytes) return } + + buffer := bufio.NewReader(res.Body) + content := "" + streamingResponse := [][]byte{} + + streamCost := 0.0 + + defer func() { + ginCtx.Set("content", content) + ginCtx.Set("streaming_response", bytes.Join(streamingResponse, []byte{'\n'})) + + ginCtx.Set("costInUsd", streamCost) + }() + + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.streaming_response", nil, 1) + ginCtx.Stream(func(w io.Writer) bool { + raw, err := buffer.ReadBytes('\n') + if err != nil { + if err == io.EOF { + return false + } + + if errors.Is(err, context.DeadlineExceeded) { + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.context_deadline_exceeded_error", nil, 1) + logError(log, "context deadline exceeded when reading bytes from openai transcriptions response", prod, err) + + return false + } + + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.read_bytes_error", nil, 1) + logError(log, "error when reading bytes from openai transcriptions response", prod, err) + + apiErr := &goopenai.ErrorResponse{ + Error: &goopenai.APIError{ + Type: "bricksllm_error", + Message: err.Error(), + }, + } + + errBytes, err := json.Marshal(apiErr) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.json_marshal_error", nil, 1) + logError(log, "error when marshalling bytes for openai streaming transcriptions error response", prod, err) + return false + } + + ginCtx.SSEvent("", string(errBytes)) + ginCtx.SSEvent("", " [DONE]") + return false + } + streamingResponse = append(streamingResponse, raw) + + noSpaceLine := bytes.TrimSpace(raw) + if !bytes.HasPrefix(noSpaceLine, headerData) { + return true + } + + noPrefixLine := bytes.TrimPrefix(noSpaceLine, headerData) + noPrefixLine = bytes.TrimSpace(noPrefixLine) + ginCtx.SSEvent("", " "+string(noPrefixLine)) + + if string(noPrefixLine) == "[DONE]" { + return false + } + chunk := &openai.TranscriptionStreamChunk{} + err = json.Unmarshal(noPrefixLine, chunk) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.completion_response_unmarshall_error", nil, 1) + logError(log, "error when unmarshalling openai transcriptions stream response", prod, err) + } + if err == nil { + textDelta := chunk.GetText() + if len(textDelta) > 0 { + content += textDelta + } + if chunk.IsDone() { + content = chunk.GetText() + streamCost, err = e.EstimateTranscriptionCost(0, model, &chunk.Usage) + } + } + return true + }) + telemetry.Timing("bricksllm.proxy.get_transcriptions_handler.streaming_latency", time.Since(start), nil, 1) } func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, req *http.Request) { From 9289ffe0b54ed1cd85eef5d5e5383205716410e9 Mon Sep 17 00:00:00 2001 From: Sergei Bronnikov Date: Fri, 10 Apr 2026 12:09:52 +0100 Subject: [PATCH 6/8] translation --- internal/server/web/proxy/audio.go | 5 ++ internal/server/web/proxy/audio_extended.go | 76 +++++++++++---------- 2 files changed, 44 insertions(+), 37 deletions(-) diff --git a/internal/server/web/proxy/audio.go b/internal/server/web/proxy/audio.go index 40bd717..738a59a 100644 --- a/internal/server/web/proxy/audio.go +++ b/internal/server/web/proxy/audio.go @@ -339,6 +339,11 @@ func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.Ha func getTranslationsHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc { return func(c *gin.Context) { + model := c.PostForm("model") + if model == "gpt-4o-transcribe" || model == "gpt-4o-transcribe-diarize" || model == "gpt-4o-mini-transcribe" { + processGPTTranslations(c, prod, client, e, model) + return + } log := util.GetLogFromCtx(c) telemetry.Incr("bricksllm.proxy.get_translations_handler.requests", nil, 1) diff --git a/internal/server/web/proxy/audio_extended.go b/internal/server/web/proxy/audio_extended.go index 425bfd6..70cb970 100644 --- a/internal/server/web/proxy/audio_extended.go +++ b/internal/server/web/proxy/audio_extended.go @@ -26,9 +26,11 @@ const ( ) func processGPTTranscriptions(ginCtx *gin.Context, prod bool, client http.Client, e estimator, model string) { + processGPTAudio(ginCtx, prod, client, e, model, transcriptionsUrl, "transcriptions") } func processGPTTranslations(ginCtx *gin.Context, prod bool, client http.Client, e estimator, model string) { + processGPTAudio(ginCtx, prod, client, e, model, translationsUrl, "translations") } func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estimator, model, url, handler string) { @@ -45,8 +47,8 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim req, err := http.NewRequestWithContext(ctx, ginCtx.Request.Method, url, ginCtx.Request.Body) if err != nil { - logError(log, "error when creating transcriptions openai http request", prod, err) - JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to create openai transcriptions http request") + logError(log, "error when creating transcriptions/translation openai http request", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to create openai transcriptions/translation http request") return } @@ -61,16 +63,16 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim } if !isStreaming { - modifyGPTTranscriptionsRequest(ginCtx, prod, log, req) + modifyGPTTranscriptionsRequest(ginCtx, prod, log, req, handler) } start := time.Now() res, err := client.Do(req) if err != nil { - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.http_client_error", nil, 1) + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.http_client_error", handler), nil, 1) - logError(log, "error when sending transcriptions request to openai", prod, err) - JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to send transcriptions request to openai") + logError(log, "error when sending transcriptions/translation request to openai", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to send transcriptions/translation request to openai") return } @@ -84,17 +86,17 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim if res.StatusCode == http.StatusOK && !isStreaming { dur := time.Since(start) - telemetry.Timing("bricksllm.proxy.get_transcriptions_handler.latency", dur, nil, 1) + telemetry.Timing(fmt.Sprintf("bricksllm.proxy.get_%s_handler.latency", handler), dur, nil, 1) readBytes, err := io.ReadAll(res.Body) if err != nil { - logError(log, "error when reading openai http transcriptions response body", prod, err) + logError(log, "error when reading openai http transcriptions/translation response body", prod, err) JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") return } var cost float64 = 0 resp := &openai.TranscriptionResponse{} - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.success", nil, 1) - telemetry.Timing("bricksllm.proxy.get_transcriptions_handler.success_latency", dur, nil, 1) + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.success", handler), nil, 1) + telemetry.Timing(fmt.Sprintf("bricksllm.proxy.get_%s_handler.success_latency", handler), dur, nil, 1) err = json.Unmarshal(readBytes, resp) if err != nil { @@ -104,7 +106,7 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim if err == nil { cost, err = e.EstimateTranscriptionCost(0, model, &resp.Usage) if err != nil { - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.estimate_total_cost_error", nil, 1) + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.estimate_total_cost_error", handler), nil, 1) logError(log, "error when estimating openai cost", prod, err) } } @@ -124,20 +126,20 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim if res.StatusCode != http.StatusOK { dur := time.Since(start) - telemetry.Timing("bricksllm.proxy.get_transcriptions_handler.error_latency", dur, nil, 1) - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.error_response", nil, 1) + telemetry.Timing(fmt.Sprintf("bricksllm.proxy.get_%s_handler.error_latency", handler), dur, nil, 1) + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.error_response", handler), nil, 1) readBytes, err := io.ReadAll(res.Body) if err != nil { - logError(log, "error when reading openai transcriptions response body", prod, err) - JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai transcriptions response body") + logError(log, "error when reading openai transcriptions/translation response body", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai transcriptions/translation response body") return } errorRes := &goopenai.ErrorResponse{} err = json.Unmarshal(readBytes, errorRes) if err != nil { - logError(log, "error when unmarshalling openai transcriptions error response body", prod, err) + logError(log, "error when unmarshalling openai transcriptions/translation error response body", prod, err) } logOpenAiError(log, prod, errorRes) @@ -159,7 +161,7 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim ginCtx.Set("costInUsd", streamCost) }() - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.streaming_response", nil, 1) + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.streaming_response", handler), nil, 1) ginCtx.Stream(func(w io.Writer) bool { raw, err := buffer.ReadBytes('\n') if err != nil { @@ -168,14 +170,14 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim } if errors.Is(err, context.DeadlineExceeded) { - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.context_deadline_exceeded_error", nil, 1) - logError(log, "context deadline exceeded when reading bytes from openai transcriptions response", prod, err) + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.context_deadline_exceeded_error", handler), nil, 1) + logError(log, "context deadline exceeded when reading bytes from openai transcriptions/translation response", prod, err) return false } - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.read_bytes_error", nil, 1) - logError(log, "error when reading bytes from openai transcriptions response", prod, err) + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.read_bytes_error", handler), nil, 1) + logError(log, "error when reading bytes from openai transcriptions/translation response", prod, err) apiErr := &goopenai.ErrorResponse{ Error: &goopenai.APIError{ @@ -186,8 +188,8 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim errBytes, err := json.Marshal(apiErr) if err != nil { - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.json_marshal_error", nil, 1) - logError(log, "error when marshalling bytes for openai streaming transcriptions error response", prod, err) + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.json_marshal_error", handler), nil, 1) + logError(log, "error when marshalling bytes for openai streaming transcriptions/translation error response", prod, err) return false } @@ -212,8 +214,8 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim chunk := &openai.TranscriptionStreamChunk{} err = json.Unmarshal(noPrefixLine, chunk) if err != nil { - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.completion_response_unmarshall_error", nil, 1) - logError(log, "error when unmarshalling openai transcriptions stream response", prod, err) + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.completion_response_unmarshall_error", handler), nil, 1) + logError(log, "error when unmarshalling openai transcriptions/translation stream response", prod, err) } if err == nil { textDelta := chunk.GetText() @@ -227,10 +229,10 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim } return true }) - telemetry.Timing("bricksllm.proxy.get_transcriptions_handler.streaming_latency", time.Since(start), nil, 1) + telemetry.Timing(fmt.Sprintf("bricksllm.proxy.get_%s_handler.streaming_latency", handler), time.Since(start), nil, 1) } -func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, req *http.Request) { +func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, req *http.Request, handler string) { var b bytes.Buffer writer := multipart.NewWriter(&b) defer writer.Close() @@ -244,7 +246,7 @@ func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, "response_format": responseFormat, }) if err != nil { - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.write_field_to_buffer_error", nil, 1) + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.write_field_to_buffer_error", handler), nil, 1) logError(log, "error when writing field to buffer", prod, err) JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot write field to buffer") return @@ -256,25 +258,25 @@ func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, if form.File != nil { fieldWriter, err := writer.CreateFormFile("file", form.File.Filename) if err != nil { - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.create_transcription_file_error", nil, 1) - logError(log, "error when creating transcription file", prod, err) - JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot create transcription file") + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.create_transcription_file_error", handler), nil, 1) + logError(log, "error when creating transcriptions/translation file", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot create transcriptions/translation file") return } opened, err := form.File.Open() if err != nil { - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.open_transcription_file_error", nil, 1) - logError(log, "error when openning transcription file", prod, err) - JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot open transcription file") + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.open_transcription_file_error", handler), nil, 1) + logError(log, "error when openning transcriptions/translation file", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot open transcriptions/translation file") return } _, err = io.Copy(fieldWriter, opened) if err != nil { - telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.copy_transcription_file_error", nil, 1) - logError(log, "error when copying transcription file", prod, err) - JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot copy transcription file") + telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.copy_transcription_file_error", handler), nil, 1) + logError(log, "error when copying transcriptions/translation file", prod, err) + JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot copy transcriptions/translation file") return } } From 7ef9da14e67724a5c5258b9e2d60bb79c69c690f Mon Sep 17 00:00:00 2001 From: Sergei Bronnikov Date: Tue, 14 Apr 2026 11:27:55 +0100 Subject: [PATCH 7/8] fixes --- internal/provider/openai/cost.go | 87 +++++++++++++-------- internal/provider/openai/types.go | 14 +++- internal/server/web/proxy/audio_extended.go | 21 ++--- internal/server/web/proxy/middleware.go | 14 ++++ 4 files changed, 88 insertions(+), 48 deletions(-) diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go index a50a930..6093a54 100644 --- a/internal/provider/openai/cost.go +++ b/internal/provider/openai/cost.go @@ -536,40 +536,44 @@ func (ce *CostEstimator) EstimateImagesCost(model, quality, resolution string, m if err == nil { return mCost, nil } - simpleRes, err := convertResToSimple(resolution) - if err != nil { - return 0, err + + costMap, ok := ce.tokenCostMap["images"] + if !ok { + return 0, errors.New("images cost map is not provided") } + + // Try to prepare normalized model key with resolution/quality + simpleRes, resErr := convertResToSimple(resolution) var normalizedModel string - switch model { - case "dall-e-2": - normalizedModel, err = prepareDallE2Model(simpleRes, model) - if err != nil { - return 0, err + var prepareErr error + + if resErr == nil { + switch model { + case "dall-e-2": + normalizedModel, prepareErr = prepareDallE2Model(simpleRes, model) + case "dall-e-3": + normalizedModel, prepareErr = prepareDallE3Model(quality, simpleRes, model) + case "gpt-image-1", "gpt-image-1.5", "chatgpt-image-latest", "gpt-image-1-mini": + normalizedModel, prepareErr = prepareGptImageModel(quality, simpleRes, model) + default: + // Unknown model, will try model-only fallback + prepareErr = errors.New("unknown model type") } - case "dall-e-3": - normalizedModel, err = prepareDallE3Model(quality, simpleRes, model) - if err != nil { - return 0, err - } - case "gpt-image-1", "gpt-image-1.5", "chatgpt-image-latest", "gpt-image-1-mini": - normalizedModel, err = prepareGptImageModel(quality, simpleRes, model) - if err != nil { - return 0, err + + // If normalization succeeded, try the normalized key first + if prepareErr == nil { + if cost, ok := costMap[normalizedModel]; ok { + return cost, nil + } } - default: - return 0, errors.New("model is not present in the images cost map") } - costMap, ok := ce.tokenCostMap["images"] - if !ok { - return 0, errors.New("images cost map is not provided") - } - cost, ok := costMap[normalizedModel] - if !ok { - return 0, errors.New("model is not present in the images cost map") + // Fall back to model-only lookup + if cost, ok := costMap[model]; ok { + return cost, nil } - return cost, nil + + return 0, errors.New("model (with or without quality/resolution) is not present in the images cost map") } var allowedDallE2Resolutions = []string{"256", "512", "1024"} @@ -826,19 +830,34 @@ func (ce *CostEstimator) EstimateVideoCost(metadata *VideoResponseMetadata) (flo return 0, errors.New("video cost map is not provided") } model := metadata.Model - size, err := normalizedVideoSize(metadata.Size) + + // Validate and get seconds as float + seconds, err := metadata.GetSecondsAsFloat() if err != nil { - return 0, err + return 0, fmt.Errorf("failed to parse seconds field: %w", err) } - costKey := fmt.Sprintf("%s-%s", model, size) - cost, ok := costMap[costKey] - if !ok { - return 0, errors.New("model with provided size is not present in the video cost map") + + // Try model-size lookup first if size is present and can be normalized + size, err := normalizedVideoSize(metadata.Size) + if err == nil && size != "" { + costKey := fmt.Sprintf("%s-%s", model, size) + if cost, ok := costMap[costKey]; ok { + return cost * seconds, nil + } } - return cost * metadata.GetSecondsAsFloat(), nil + + // Fall back to model-only lookup + if cost, ok := costMap[model]; ok { + return cost * seconds, nil + } + + return 0, errors.New("model (with or without size) is not present in the video cost map") } func normalizedVideoSize(size string) (string, error) { + if size == "" { + return "", nil + } switch size { case "720x1280", "1280x720": return "720", nil diff --git a/internal/provider/openai/types.go b/internal/provider/openai/types.go index aa24f06..9377653 100644 --- a/internal/provider/openai/types.go +++ b/internal/provider/openai/types.go @@ -98,11 +98,11 @@ type VideoResponseMetadata struct { Seconds string `json:"seconds,omitempty"` } -func (v *VideoResponseMetadata) GetSecondsAsFloat() float64 { - if secondsFloat, err := strconv.ParseFloat(v.Seconds, 64); err == nil { - return secondsFloat +func (v *VideoResponseMetadata) GetSecondsAsFloat() (float64, error) { + if v.Seconds == "" { + return 0, strconv.ErrSyntax } - return 0 + return strconv.ParseFloat(v.Seconds, 64) } type TranscriptionResponseUsageInputTokenDetails struct { @@ -146,3 +146,9 @@ func (c *TranscriptionStreamChunk) GetText() string { } return c.Text } + +type VideoRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + Size string `json:"size"` +} diff --git a/internal/server/web/proxy/audio_extended.go b/internal/server/web/proxy/audio_extended.go index 70cb970..aa2b721 100644 --- a/internal/server/web/proxy/audio_extended.go +++ b/internal/server/web/proxy/audio_extended.go @@ -63,7 +63,11 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim } if !isStreaming { - modifyGPTTranscriptionsRequest(ginCtx, prod, log, req, handler) + err := modifyGPTTranscriptionsRequest(ginCtx, prod, log, req, handler) + if err != nil { + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] "+err.Error()) + return + } } start := time.Now() @@ -232,7 +236,7 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim telemetry.Timing(fmt.Sprintf("bricksllm.proxy.get_%s_handler.streaming_latency", handler), time.Since(start), nil, 1) } -func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, req *http.Request, handler string) { +func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, req *http.Request, handler string) error { var b bytes.Buffer writer := multipart.NewWriter(&b) defer writer.Close() @@ -248,8 +252,7 @@ func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, if err != nil { telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.write_field_to_buffer_error", handler), nil, 1) logError(log, "error when writing field to buffer", prod, err) - JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot write field to buffer") - return + return fmt.Errorf("cannot write field to buffer: %w", err) } var form TransriptionForm @@ -260,29 +263,27 @@ func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, if err != nil { telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.create_transcription_file_error", handler), nil, 1) logError(log, "error when creating transcriptions/translation file", prod, err) - JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot create transcriptions/translation file") - return + return fmt.Errorf("cannot create transcriptions/translation file: %w", err) } opened, err := form.File.Open() if err != nil { telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.open_transcription_file_error", handler), nil, 1) logError(log, "error when openning transcriptions/translation file", prod, err) - JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot open transcriptions/translation file") - return + return fmt.Errorf("cannot open transcriptions/translation file: %w", err) } _, err = io.Copy(fieldWriter, opened) if err != nil { telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.copy_transcription_file_error", handler), nil, 1) logError(log, "error when copying transcriptions/translation file", prod, err) - JSON(c, http.StatusInternalServerError, "[BricksLLM] cannot copy transcriptions/translation file") - return + return fmt.Errorf("cannot copy transcriptions/translation file: %w", err) } } req.Header.Set("Content-Type", writer.FormDataContentType()) req.Body = io.NopCloser(&b) + return nil } func writePostFields(c *gin.Context, writer *multipart.Writer, overWrites map[string]string) error { diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go index 7a47387..e9b26aa 100644 --- a/internal/server/web/proxy/middleware.go +++ b/internal/server/web/proxy/middleware.go @@ -1003,6 +1003,20 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag logCreateTranslationRequest(logWithCid, model, prompt, responseFormat, converted, prod, private) } + if strings.HasPrefix(c.FullPath(), "/api/providers/openai/v1/videos") && c.Request.Method == http.MethodPost { + model := c.PostForm("model") + if model == "" { + vr := &openai.VideoRequest{} + err := json.Unmarshal(body, vr) + if err != nil { + logError(logWithCid, "error when unmarshalling video request", prod, err) + } + enrichedEvent.Request = vr + model = vr.Model + } + c.Set("model", model) + } + if len(kc.AllowedPaths) != 0 && !containsPath(kc.AllowedPaths, c.FullPath(), c.Request.Method) { telemetry.Incr("bricksllm.proxy.get_middleware.path_not_allowed", nil, 1) JSON(c, http.StatusForbidden, "[BricksLLM] path is not allowed") From b53535bef69df9874cbb362dcdd8844946671e1f Mon Sep 17 00:00:00 2001 From: Sergei Bronnikov Date: Tue, 14 Apr 2026 11:33:50 +0100 Subject: [PATCH 8/8] fixes --- internal/server/web/proxy/audio_extended.go | 30 ++++++++++++--------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/internal/server/web/proxy/audio_extended.go b/internal/server/web/proxy/audio_extended.go index aa2b721..00305ea 100644 --- a/internal/server/web/proxy/audio_extended.go +++ b/internal/server/web/proxy/audio_extended.go @@ -62,12 +62,12 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim req.Header.Set("Connection", "keep-alive") } - if !isStreaming { - err := modifyGPTTranscriptionsRequest(ginCtx, prod, log, req, handler) - if err != nil { - JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] "+err.Error()) - return - } + // Always rebuild the multipart body after PostForm drains it + // For non-streaming, we also modify response_format + err = modifyGPTTranscriptionsRequest(ginCtx, prod, log, req, handler, !isStreaming) + if err != nil { + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] "+err.Error()) + return } start := time.Now() @@ -236,19 +236,23 @@ func processGPTAudio(ginCtx *gin.Context, prod bool, client http.Client, e estim telemetry.Timing(fmt.Sprintf("bricksllm.proxy.get_%s_handler.streaming_latency", handler), time.Since(start), nil, 1) } -func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, req *http.Request, handler string) error { +func modifyGPTTranscriptionsRequest(c *gin.Context, prod bool, log *zap.Logger, req *http.Request, handler string, modifyResponseFormat bool) error { var b bytes.Buffer writer := multipart.NewWriter(&b) defer writer.Close() - responseFormat := c.PostForm("response_format") - if responseFormat == "text" { - responseFormat = "json" + overWrites := map[string]string{} + + // Only modify response_format for non-streaming requests + if modifyResponseFormat { + responseFormat := c.PostForm("response_format") + if responseFormat == "text" { + responseFormat = "json" + } + overWrites["response_format"] = responseFormat } - err := writePostFields(c, writer, map[string]string{ - "response_format": responseFormat, - }) + err := writePostFields(c, writer, overWrites) if err != nil { telemetry.Incr(fmt.Sprintf("bricksllm.proxy.get_%s_handler.write_field_to_buffer_error", handler), nil, 1) logError(log, "error when writing field to buffer", prod, err)