// Package billing provides token-based cost estimation for LLM API calls. // Costs are expressed in USD per 1 000 tokens (blended input+output rate). // Ollama (local) has no cost. Unknown providers/models return 0. package billing import "strings" // costPer1kTokens maps "provider/model" to USD per 1 000 tokens (blended rate). // Exact match is tried first; if not found, prefix match handles versioned names // such as "gpt-4o-2024-08-06" matching "openai/gpt-4o". var costPer1kTokens = map[string]float64{ "openai/gpt-4o": 0.005000, "openai/gpt-4o-mini": 0.000150, "openai/gpt-3.5-turbo": 0.000500, "anthropic/claude-3-5-sonnet": 0.003000, "anthropic/claude-3-opus": 0.015000, "anthropic/claude-3-haiku": 0.000250, "mistral/mistral-small": 0.000200, "mistral/mistral-large": 0.002000, // ollama/* absent → 0 (local inference, no API cost) } // CostUSD returns the estimated cost in USD for totalTokens tokens. // It first tries an exact match on "provider/model", then a prefix match // to handle versioned model names (e.g. "gpt-4o-2024-08-06" → "openai/gpt-4o"). // Returns 0 for unknown providers/models (e.g. ollama). func CostUSD(provider, model string, totalTokens int) float64 { if totalTokens <= 0 { return 0 } key := provider + "/" + model // Exact match. if rate, ok := costPer1kTokens[key]; ok { return rate * float64(totalTokens) / 1000.0 } // Prefix match: find the longest registered key that is a prefix of key. var bestRate float64 var bestLen int for k, rate := range costPer1kTokens { if strings.HasPrefix(key, k) && len(k) > bestLen { bestRate = rate bestLen = len(k) } } if bestLen > 0 { return bestRate * float64(totalTokens) / 1000.0 } return 0 }