// Package provider defines the LLM provider adapter interface and the unified // request/response types used by the Veylant proxy. // // Each provider (OpenAI, Anthropic, Azure, Mistral, Ollama) implements Adapter. // The proxy always speaks the OpenAI wire format towards its clients; adapters // translate to/from each upstream API internally. package provider import ( "context" "net/http" ) // Message is a single chat message in the OpenAI format. type Message struct { Role string `json:"role"` Content string `json:"content"` } // ChatRequest mirrors the OpenAI /v1/chat/completions request body. // Fields marked omitempty are forwarded as-is to the upstream provider. type ChatRequest struct { Model string `json:"model"` Messages []Message `json:"messages"` Stream bool `json:"stream,omitempty"` Temperature *float64 `json:"temperature,omitempty"` MaxTokens *int `json:"max_tokens,omitempty"` TopP *float64 `json:"top_p,omitempty"` N *int `json:"n,omitempty"` Stop []string `json:"stop,omitempty"` User string `json:"user,omitempty"` } // ChatResponse mirrors the OpenAI /v1/chat/completions response body. // Provider is set by the router after dispatch and is never serialised to clients. type ChatResponse struct { ID string `json:"id"` Object string `json:"object"` Created int64 `json:"created"` Model string `json:"model"` Choices []Choice `json:"choices"` Usage Usage `json:"usage"` Provider string `json:"-"` // populated by router.sendWithFallback for audit logging } // Choice is a single completion choice in the response. type Choice struct { Index int `json:"index"` Message Message `json:"message"` FinishReason string `json:"finish_reason"` } // Usage holds token consumption statistics. type Usage struct { PromptTokens int `json:"prompt_tokens"` CompletionTokens int `json:"completion_tokens"` TotalTokens int `json:"total_tokens"` } // Adapter is the contract every LLM provider must implement. // Sprint 2: OpenAI is the only implementation. // Sprint 4: Anthropic, Azure, Mistral, Ollama adapters are added. type Adapter interface { // Send performs a non-streaming chat completion request. Send(ctx context.Context, req *ChatRequest) (*ChatResponse, error) // Stream performs a streaming chat completion request. // SSE chunks are written directly to w as they arrive from the upstream. // The caller must NOT write to w after Stream returns. Stream(ctx context.Context, req *ChatRequest, w http.ResponseWriter) error // Validate checks that req is well-formed for this provider. // Returns a descriptive error if the request cannot be forwarded. Validate(req *ChatRequest) error // HealthCheck verifies that the upstream provider is reachable. HealthCheck(ctx context.Context) error }