Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion internal/message/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,8 @@ func (h *Handler) decorateEvent(m Message) error {
e.Event.CostInUsd = cost + completionCost

if e.CostMap != nil {
newCost, err := provider.EstimateTotalCostWithCostMaps(e.Event.Model, tks, completiontks, 1000, e.CostMap.PromptCostPerModel, e.CostMap.CompletionCostPerModel)
model := openai.ModelWithContextLength(e.Event.Model, int64(tks+completiontks))
newCost, err := provider.EstimateTotalCostWithCostMaps(model, tks, completiontks, 1000, e.CostMap.PromptCostPerModel, e.CostMap.CompletionCostPerModel)
if err != nil {
h.log.Debug("error when estimating total cost with cost maps", zap.Error(err))
telemetry.Incr("bricksllm.proxy.decorate_event.estimate_total_cost_with_cost_maps_error", nil, 1)
Expand Down
71 changes: 61 additions & 10 deletions internal/provider/openai/cost.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,14 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
"chatgpt-image-latest": 0.005,
"gpt-image-1-mini": 0.002,

"gpt-5.4": 0.005,
"gpt-5.4-mini": 0.00075,
"gpt-5.4-nano": 0.0002,
"gpt-5.4-pro": 0.06,
"gpt-5.4": 0.005,
"gpt-5.4~long": 0.005,
"gpt-5.4~short": 0.0025,
"gpt-5.4-mini": 0.00075,
"gpt-5.4-nano": 0.0002,
"gpt-5.4-pro": 0.06,
"gpt-5.4-pro~long": 0.06,
"gpt-5.4-pro~short": 0.03,

"gpt-5.3-codex": 0.00175,

Expand Down Expand Up @@ -111,6 +115,12 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
"babbage-002": 0.000400,
},
"cached-prompt": {
"gpt-5.4": 0.0005,
"gpt-5.4~long": 0.0005,
"gpt-5.4~short": 0.00025,
"gpt-5.4-mini": 0.000075,
"gpt-5.4-nano": 0.00002,

"gpt-image-1.5": 0.00125,
"gpt-image-1": 0.00125,
"chatgpt-image-latest": 0.00125,
Expand Down Expand Up @@ -192,10 +202,14 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{
"gpt-image-1.5": 0.010,
"chatgpt-image-latest": 0.010,

"gpt-5.4": 0.0225,
"gpt-5.4-mini": 0.0045,
"gpt-5.4-nano": 0.00125,
"gpt-5.4-pro": 0.27,
"gpt-5.4": 0.0225,
"gpt-5.4~long": 0.0225,
"gpt-5.4~short": 0.015,
"gpt-5.4-mini": 0.0045,
"gpt-5.4-nano": 0.00125,
"gpt-5.4-pro": 0.27,
"gpt-5.4-pro~long": 0.27,
"gpt-5.4-pro~short": 0.18,

"gpt-5.3-codex": 0.014,

Expand Down Expand Up @@ -346,17 +360,27 @@ var OpenAiCodeInterpreterContainerCost = map[string]float64{
var AllowedTools = []string{
"web_search",
"web_search_preview",
"web_search_preview_2025_03_11",

"code_interpreter",

"file_search",
"function",
"computer_use_preview",
"computer",
"computer_use",
"exec_command",
"shell",
"local_shell",
"apply_patch",
"filesystem",
"patch",
"namespace",
"custom",
"custom_code",
"mcp",
"tool_search",
"image_generation",
"skills",
}

type tokenCounter interface {
Expand All @@ -376,6 +400,8 @@ func NewCostEstimator(m map[string]map[string]float64, tc tokenCounter) *CostEst
}

func (ce *CostEstimator) EstimateTotalCost(model string, promptTks, completionTks int) (float64, error) {
totalTokens := int64(promptTks + completionTks)
model = ModelWithContextLength(model, totalTokens)
promptCost, err := ce.EstimatePromptCost(model, promptTks)
if err != nil {
return 0, err
Expand Down Expand Up @@ -455,7 +481,8 @@ func (ce *CostEstimator) EstimateChatCompletionPromptCostWithTokenCounts(r *goop
return 0, 0, err
}

cost, err := ce.EstimatePromptCost(r.Model, tks)
model := ModelWithContextLength(r.Model, int64(tks))
cost, err := ce.EstimatePromptCost(model, tks)
if err != nil {
return 0, 0, err
}
Expand All @@ -473,6 +500,7 @@ func (ce *CostEstimator) EstimateChatCompletionStreamCostWithTokenCounts(model s
return 0, 0, err
}

model = ModelWithContextLength(model, int64(tks))
cost, err := ce.EstimateCompletionCost(model, tks)
if err != nil {
return 0, 0, err
Expand Down Expand Up @@ -797,6 +825,9 @@ func (ce *CostEstimator) EstimateResponseApiTotalCost(model string, usage respon
cachedInputTokens := usage.InputTokensDetails.CachedTokens
outputTokens := usage.OutputTokens

totalTokens := inputTokens + cachedInputTokens + outputTokens
model = ModelWithContextLength(model, totalTokens)

cachedInputCost, err := ce.estimateResponseApiTokensCost("cached-prompt", model, cachedInputTokens)
if err != nil {
cachedInputTokens = 0.0
Expand Down Expand Up @@ -1055,3 +1086,23 @@ func countTotalTokens(model string, r *goopenai.ChatCompletionRequest, tc tokenC

return tks + ftks + mtks, err
}

var modelWithLengthCtx = []string{
"gpt-5.4",
"gpt-5.4-pro",
}

func ModelWithContextLength(model string, tokens int64) string {
trimmed := strings.TrimSpace(model)
if slices.Contains(modelWithLengthCtx, trimmed) {
return trimmed + contextLengthSuffixByTokens(tokens)
}
return trimmed
}

func contextLengthSuffixByTokens(tokens int64) string {
if tokens >= 272000 {
return "~long"
}
return "~short"
}
29 changes: 15 additions & 14 deletions internal/server/web/proxy/middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"io"
"net/http"
"slices"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -823,20 +824,20 @@ func getMiddleware(cpm CustomProvidersManager, rm routeManager, pm PoliciesManag
return
}

//hasNotAllowedTools := false
//for _, tool := range responsesReq.Tools {
// if !slices.Contains(openai.AllowedTools, tool.Type) {
// hasNotAllowedTools = true
// break
// }
//}
//
//if hasNotAllowedTools {
// telemetry.Incr("bricksllm.proxy.get_middleware.tool_not_allowed", nil, 1)
// JSON(c, http.StatusForbidden, "[BricksLLM] one of the tools is not allowed")
// c.Abort()
// return
//}
hasNotAllowedTools := false
for _, tool := range responsesReq.Tools {
if !slices.Contains(openai.AllowedTools, tool.Type) {
hasNotAllowedTools = true
break
}
}

if hasNotAllowedTools {
telemetry.Incr("bricksllm.proxy.get_middleware.tool_not_allowed", nil, 1)
JSON(c, http.StatusForbidden, "[BricksLLM] one of the tools is not allowed")
c.Abort()
return
}

isCreateContainerTool := false
var containerMemLimit string
Expand Down
Loading