53 lines
1.7 KiB
Go
53 lines
1.7 KiB
Go
// Package billing provides token-based cost estimation for LLM API calls.
|
|
// Costs are expressed in USD per 1 000 tokens (blended input+output rate).
|
|
// Ollama (local) has no cost. Unknown providers/models return 0.
|
|
package billing
|
|
|
|
import "strings"
|
|
|
|
// costPer1kTokens maps "provider/model" to USD per 1 000 tokens (blended rate).
|
|
// Exact match is tried first; if not found, prefix match handles versioned names
|
|
// such as "gpt-4o-2024-08-06" matching "openai/gpt-4o".
|
|
var costPer1kTokens = map[string]float64{
|
|
"openai/gpt-4o": 0.005000,
|
|
"openai/gpt-4o-mini": 0.000150,
|
|
"openai/gpt-3.5-turbo": 0.000500,
|
|
"anthropic/claude-3-5-sonnet": 0.003000,
|
|
"anthropic/claude-3-opus": 0.015000,
|
|
"anthropic/claude-3-haiku": 0.000250,
|
|
"mistral/mistral-small": 0.000200,
|
|
"mistral/mistral-large": 0.002000,
|
|
// ollama/* absent → 0 (local inference, no API cost)
|
|
}
|
|
|
|
// CostUSD returns the estimated cost in USD for totalTokens tokens.
|
|
// It first tries an exact match on "provider/model", then a prefix match
|
|
// to handle versioned model names (e.g. "gpt-4o-2024-08-06" → "openai/gpt-4o").
|
|
// Returns 0 for unknown providers/models (e.g. ollama).
|
|
func CostUSD(provider, model string, totalTokens int) float64 {
|
|
if totalTokens <= 0 {
|
|
return 0
|
|
}
|
|
key := provider + "/" + model
|
|
|
|
// Exact match.
|
|
if rate, ok := costPer1kTokens[key]; ok {
|
|
return rate * float64(totalTokens) / 1000.0
|
|
}
|
|
|
|
// Prefix match: find the longest registered key that is a prefix of key.
|
|
var bestRate float64
|
|
var bestLen int
|
|
for k, rate := range costPer1kTokens {
|
|
if strings.HasPrefix(key, k) && len(k) > bestLen {
|
|
bestRate = rate
|
|
bestLen = len(k)
|
|
}
|
|
}
|
|
if bestLen > 0 {
|
|
return bestRate * float64(totalTokens) / 1000.0
|
|
}
|
|
|
|
return 0
|
|
}
|