veylant/internal/provider/provider.go

// Package provider defines the LLM provider adapter interface and the unified
// request/response types used by the Veylant proxy.
//
// Each provider (OpenAI, Anthropic, Azure, Mistral, Ollama) implements Adapter.
// The proxy always speaks the OpenAI wire format towards its clients; adapters
// translate to/from each upstream API internally.
package provider

import (
	"context"
	"net/http"
)

// Message is a single chat message in the OpenAI format.
type Message struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

// ChatRequest mirrors the OpenAI /v1/chat/completions request body.
// Fields marked omitempty are forwarded as-is to the upstream provider.
type ChatRequest struct {
	Model       string    `json:"model"`
	Messages    []Message `json:"messages"`
	Stream      bool      `json:"stream,omitempty"`
	Temperature *float64  `json:"temperature,omitempty"`
	MaxTokens   *int      `json:"max_tokens,omitempty"`
	TopP        *float64  `json:"top_p,omitempty"`
	N           *int      `json:"n,omitempty"`
	Stop        []string  `json:"stop,omitempty"`
	User        string    `json:"user,omitempty"`
}

// ChatResponse mirrors the OpenAI /v1/chat/completions response body.
// Provider is set by the router after dispatch and is never serialised to clients.
type ChatResponse struct {
	ID       string   `json:"id"`
	Object   string   `json:"object"`
	Created  int64    `json:"created"`
	Model    string   `json:"model"`
	Choices  []Choice `json:"choices"`
	Usage    Usage    `json:"usage"`
	Provider string   `json:"-"` // populated by router.sendWithFallback for audit logging
}

// Choice is a single completion choice in the response.
type Choice struct {
	Index        int     `json:"index"`
	Message      Message `json:"message"`
	FinishReason string  `json:"finish_reason"`
}

// Usage holds token consumption statistics.
type Usage struct {
	PromptTokens     int `json:"prompt_tokens"`
	CompletionTokens int `json:"completion_tokens"`
	TotalTokens      int `json:"total_tokens"`
}

// Adapter is the contract every LLM provider must implement.
// Sprint 2: OpenAI is the only implementation.
// Sprint 4: Anthropic, Azure, Mistral, Ollama adapters are added.
type Adapter interface {
	// Send performs a non-streaming chat completion request.
	Send(ctx context.Context, req *ChatRequest) (*ChatResponse, error)

	// Stream performs a streaming chat completion request.
	// SSE chunks are written directly to w as they arrive from the upstream.
	// The caller must NOT write to w after Stream returns.
	Stream(ctx context.Context, req *ChatRequest, w http.ResponseWriter) error

	// Validate checks that req is well-formed for this provider.
	// Returns a descriptive error if the request cannot be forwarded.
	Validate(req *ChatRequest) error

	// HealthCheck verifies that the upstream provider is reachable.
	HealthCheck(ctx context.Context) error
}