veylant/internal/billing/billing.go
2026-02-23 13:35:04 +01:00

53 lines
1.7 KiB
Go

// Package billing provides token-based cost estimation for LLM API calls.
// Costs are expressed in USD per 1 000 tokens (blended input+output rate).
// Ollama (local) has no cost. Unknown providers/models return 0.
package billing
import "strings"
// costPer1kTokens maps "provider/model" to USD per 1 000 tokens (blended rate).
// Exact match is tried first; if not found, prefix match handles versioned names
// such as "gpt-4o-2024-08-06" matching "openai/gpt-4o".
var costPer1kTokens = map[string]float64{
"openai/gpt-4o": 0.005000,
"openai/gpt-4o-mini": 0.000150,
"openai/gpt-3.5-turbo": 0.000500,
"anthropic/claude-3-5-sonnet": 0.003000,
"anthropic/claude-3-opus": 0.015000,
"anthropic/claude-3-haiku": 0.000250,
"mistral/mistral-small": 0.000200,
"mistral/mistral-large": 0.002000,
// ollama/* absent → 0 (local inference, no API cost)
}
// CostUSD returns the estimated cost in USD for totalTokens tokens.
// It first tries an exact match on "provider/model", then a prefix match
// to handle versioned model names (e.g. "gpt-4o-2024-08-06" → "openai/gpt-4o").
// Returns 0 for unknown providers/models (e.g. ollama).
func CostUSD(provider, model string, totalTokens int) float64 {
if totalTokens <= 0 {
return 0
}
key := provider + "/" + model
// Exact match.
if rate, ok := costPer1kTokens[key]; ok {
return rate * float64(totalTokens) / 1000.0
}
// Prefix match: find the longest registered key that is a prefix of key.
var bestRate float64
var bestLen int
for k, rate := range costPer1kTokens {
if strings.HasPrefix(key, k) && len(k) > bestLen {
bestRate = rate
bestLen = len(k)
}
}
if bestLen > 0 {
return bestRate * float64(totalTokens) / 1000.0
}
return 0
}