veylant/internal/billing/billing.go

// Package billing provides token-based cost estimation for LLM API calls.
// Costs are expressed in USD per 1 000 tokens (blended input+output rate).
// Ollama (local) has no cost. Unknown providers/models return 0.
package billing

import "strings"

// costPer1kTokens maps "provider/model" to USD per 1 000 tokens (blended rate).
// Exact match is tried first; if not found, prefix match handles versioned names
// such as "gpt-4o-2024-08-06" matching "openai/gpt-4o".
var costPer1kTokens = map[string]float64{
	"openai/gpt-4o":               0.005000,
	"openai/gpt-4o-mini":          0.000150,
	"openai/gpt-3.5-turbo":        0.000500,
	"anthropic/claude-3-5-sonnet": 0.003000,
	"anthropic/claude-3-opus":     0.015000,
	"anthropic/claude-3-haiku":    0.000250,
	"mistral/mistral-small":       0.000200,
	"mistral/mistral-large":       0.002000,
	// ollama/* absent → 0 (local inference, no API cost)
}

// CostUSD returns the estimated cost in USD for totalTokens tokens.
// It first tries an exact match on "provider/model", then a prefix match
// to handle versioned model names (e.g. "gpt-4o-2024-08-06" → "openai/gpt-4o").
// Returns 0 for unknown providers/models (e.g. ollama).
func CostUSD(provider, model string, totalTokens int) float64 {
	if totalTokens <= 0 {
		return 0
	}
	key := provider + "/" + model

	// Exact match.
	if rate, ok := costPer1kTokens[key]; ok {
		return rate * float64(totalTokens) / 1000.0
	}

	// Prefix match: find the longest registered key that is a prefix of key.
	var bestRate float64
	var bestLen int
	for k, rate := range costPer1kTokens {
		if strings.HasPrefix(key, k) && len(k) > bestLen {
			bestRate = rate
			bestLen = len(k)
		}
	}
	if bestLen > 0 {
		return bestRate * float64(totalTokens) / 1000.0
	}

	return 0
}