Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 131 additions & 30 deletions internal/provider/openai/cost.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,33 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
},
"audio": {
"whisper-1": 0.006,
"tts-1": 0.015,
"tts-1-hd": 0.03,

"tts-1": 0.015,
"tts-1-hd": 0.03,

"gpt-4o-transcribe": 0.006,
"gpt-4o-transcribe-diarize": 0.006,
"gpt-4o-mini-transcribe": 0.003,

"gpt-4o-mini-tts": 0.012,
},
"transcription-input": {
"gpt-4o-transcribe": 0.0025,
"gpt-4o-transcribe-diarize": 0.0025,
"gpt-4o-mini-transcribe": 0.00125,
},
"transcription-output": {
"gpt-4o-transcribe": 0.01,
"gpt-4o-transcribe-diarize": 0.01,
"gpt-4o-mini-transcribe": 0.005,
},
"video": { // $ per sec
"sora-2": 0.1,
"sora-2-pro": 0.30,
"sora-2-720": 0.1,
"sora-2-pro-720": 0.30,
"sora-2-pro-1024": 0.5,
"sora-2-pro-1080": 0.7,
},
"completion": {
"gpt-image-1.5": 0.010,
Expand Down Expand Up @@ -511,40 +536,44 @@ func (ce *CostEstimator) EstimateImagesCost(model, quality, resolution string, m
if err == nil {
return mCost, nil
}
simpleRes, err := convertResToSimple(resolution)
if err != nil {
return 0, err

costMap, ok := ce.tokenCostMap["images"]
if !ok {
return 0, errors.New("images cost map is not provided")
}

// Try to prepare normalized model key with resolution/quality
simpleRes, resErr := convertResToSimple(resolution)
var normalizedModel string
switch model {
case "dall-e-2":
normalizedModel, err = prepareDallE2Model(simpleRes, model)
if err != nil {
return 0, err
var prepareErr error

if resErr == nil {
switch model {
case "dall-e-2":
normalizedModel, prepareErr = prepareDallE2Model(simpleRes, model)
case "dall-e-3":
normalizedModel, prepareErr = prepareDallE3Model(quality, simpleRes, model)
case "gpt-image-1", "gpt-image-1.5", "chatgpt-image-latest", "gpt-image-1-mini":
normalizedModel, prepareErr = prepareGptImageModel(quality, simpleRes, model)
default:
// Unknown model, will try model-only fallback
prepareErr = errors.New("unknown model type")
}
case "dall-e-3":
normalizedModel, err = prepareDallE3Model(quality, simpleRes, model)
if err != nil {
return 0, err
}
case "gpt-image-1", "gpt-image-1.5", "chatgpt-image-latest", "gpt-image-1-mini":
normalizedModel, err = prepareGptImageModel(quality, simpleRes, model)
if err != nil {
return 0, err

// If normalization succeeded, try the normalized key first
if prepareErr == nil {
if cost, ok := costMap[normalizedModel]; ok {
return cost, nil
}
}
default:
return 0, errors.New("model is not present in the images cost map")
}

costMap, ok := ce.tokenCostMap["images"]
if !ok {
return 0, errors.New("images cost map is not provided")
}
cost, ok := costMap[normalizedModel]
if !ok {
return 0, errors.New("model is not present in the images cost map")
// Fall back to model-only lookup
if cost, ok := costMap[model]; ok {
return cost, nil
}
return cost, nil

return 0, errors.New("model (with or without quality/resolution) is not present in the images cost map")
}

var allowedDallE2Resolutions = []string{"256", "512", "1024"}
Expand Down Expand Up @@ -649,7 +678,30 @@ func prepareGptImageQuality(quality string) (string, error) {
return quality, nil
}

func (ce *CostEstimator) EstimateTranscriptionCost(secs float64, model string) (float64, error) {
func (ce *CostEstimator) EstimateTranscriptionCost(secs float64, model string, usage *TranscriptionResponseUsage) (float64, error) {
if usage != nil {
inputTokens := usage.InputTokens
costMap, ok := ce.tokenCostMap["transcription-input"]
if !ok {
return 0, errors.New("transcription input token cost map is not provided")
}
inputCost, ok := costMap[model]
if !ok {
return 0, errors.New("model is not present in the transcription input token cost map")
}

outputTokens := usage.OutputTokens
costMap, ok = ce.tokenCostMap["transcription-output"]
if !ok {
return 0, errors.New("transcription output token cost map is not provided")
}
outputCost, ok := costMap[model]
if !ok {
return 0, errors.New("model is not present in the transcription output token cost map")
}

return (float64(inputTokens)/1000)*inputCost + (float64(outputTokens)/1000)*outputCost, nil
}
costMap, ok := ce.tokenCostMap["audio"]
if !ok {
return 0, errors.New("audio cost map is not provided")
Expand Down Expand Up @@ -769,6 +821,55 @@ func (ce *CostEstimator) EstimateResponseApiToolCreateContainerCost(req *Respons
return totalCost, nil
}

func (ce *CostEstimator) EstimateVideoCost(metadata *VideoResponseMetadata) (float64, error) {
if metadata == nil {
return 0, errors.New("metadata is nil")
}
costMap, ok := ce.tokenCostMap["video"]
if !ok {
return 0, errors.New("video cost map is not provided")
}
model := metadata.Model

// Validate and get seconds as float
seconds, err := metadata.GetSecondsAsFloat()
if err != nil {
return 0, fmt.Errorf("failed to parse seconds field: %w", err)
}

// Try model-size lookup first if size is present and can be normalized
size, err := normalizedVideoSize(metadata.Size)
if err == nil && size != "" {
costKey := fmt.Sprintf("%s-%s", model, size)
if cost, ok := costMap[costKey]; ok {
return cost * seconds, nil
}
}

// Fall back to model-only lookup
if cost, ok := costMap[model]; ok {
return cost * seconds, nil
}

return 0, errors.New("model (with or without size) is not present in the video cost map")
}

func normalizedVideoSize(size string) (string, error) {
if size == "" {
return "", nil
}
switch size {
case "720x1280", "1280x720":
return "720", nil
case "1024x1792", "1792x1024":
return "1024", nil
case "1080x1920", "1920x1080":
return "1080", nil
default:
return "", errors.New("size is not valid")
}
}

var reasoningModelPrefix = []string{"gpt-5", "o1", "o2", "o3"}

func extendedToolType(toolType, model string) string {
Expand Down
63 changes: 63 additions & 0 deletions internal/provider/openai/types.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package openai

import "strconv"

type ResponseRequest struct {
Background *bool `json:"background,omitzero"`
Conversation *any `json:"conversation,omitzero"`
Expand Down Expand Up @@ -89,3 +91,64 @@ type ImageResponseMetadata struct {
Size string `json:"size,omitempty"`
Usage ImageResponseUsage `json:"usage,omitempty"`
}

type VideoResponseMetadata struct {
Model string `json:"model,omitempty"`
Size string `json:"size,omitempty"`
Seconds string `json:"seconds,omitempty"`
}

func (v *VideoResponseMetadata) GetSecondsAsFloat() (float64, error) {
if v.Seconds == "" {
return 0, strconv.ErrSyntax
}
return strconv.ParseFloat(v.Seconds, 64)
}

type TranscriptionResponseUsageInputTokenDetails struct {
TextTokens int `json:"text_tokens,omitempty"`
AudioTokens int `json:"audio_tokens,omitempty"`
}
type TranscriptionResponseUsage struct {
Type string `json:"type"`
TotalTokens int `json:"total_tokens,omitempty"`
InputTokens int `json:"input_tokens,omitempty"`
InputTokenDetails TranscriptionResponseUsageInputTokenDetails `json:"input_token_details,omitempty"`
OutputTokens int `json:"output_tokens,omitempty"`
}
type TranscriptionResponse struct {
Text string `json:"text,omitempty"`
Usage TranscriptionResponseUsage `json:"usage,omitempty"`
}

type TranscriptionStreamChunk struct {
Type string `json:"type"`
Delta string `json:"delta,omitempty"`
Text string `json:"text,omitempty"`
Usage TranscriptionResponseUsage `json:"usage,omitempty"`
}

func (c *TranscriptionStreamChunk) IsDone() bool {
return c.Type == "transcript.text.done"
}

func (c *TranscriptionStreamChunk) IsDelta() bool {
return c.Type == "transcript.text.delta"
}

func (c *TranscriptionStreamChunk) IsSegment() bool {
return c.Type == "transcript.text.segment"
}

func (c *TranscriptionStreamChunk) GetText() string {
if c.IsDelta() {
return c.Delta
}
return c.Text
}

type VideoRequest struct {
Model string `json:"model"`
Prompt string `json:"prompt"`
Size string `json:"size"`
}
15 changes: 13 additions & 2 deletions internal/server/web/proxy/audio.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,12 @@ func getContentType(format string) string {

func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc {
return func(c *gin.Context) {
model := c.PostForm("model")
if model == "gpt-4o-transcribe" || model == "gpt-4o-transcribe-diarize" || model == "gpt-4o-mini-transcribe" {
processGPTTranscriptions(c, prod, client, e, model)
return
}

log := util.GetLogFromCtx(c)
telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.requests", nil, 1)

Expand Down Expand Up @@ -291,7 +297,7 @@ func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.Ha
}

if err == nil {
cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"))
cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"), nil)
if err != nil {
telemetry.Incr("bricksllm.proxy.get_transcriptions_handler.estimate_total_cost_error", nil, 1)
logError(log, "error when estimating openai cost", prod, err)
Expand Down Expand Up @@ -333,6 +339,11 @@ func getTranscriptionsHandler(prod bool, client http.Client, e estimator) gin.Ha

func getTranslationsHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc {
return func(c *gin.Context) {
model := c.PostForm("model")
if model == "gpt-4o-transcribe" || model == "gpt-4o-transcribe-diarize" || model == "gpt-4o-mini-transcribe" {
processGPTTranslations(c, prod, client, e, model)
return
}
log := util.GetLogFromCtx(c)
telemetry.Incr("bricksllm.proxy.get_translations_handler.requests", nil, 1)

Expand Down Expand Up @@ -451,7 +462,7 @@ func getTranslationsHandler(prod bool, client http.Client, e estimator) gin.Hand
}

if err == nil {
cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"))
cost, err := e.EstimateTranscriptionCost(ar.Duration, c.GetString("model"), nil)
if err != nil {
telemetry.Incr("bricksllm.proxy.get_translations_handler.estimate_total_cost_error", nil, 1)
logError(log, "error when estimating openai cost", prod, err)
Expand Down
Loading
Loading