veylant/internal/provider/provider.go
2026-02-23 13:35:04 +01:00

79 lines
2.8 KiB
Go

// Package provider defines the LLM provider adapter interface and the unified
// request/response types used by the Veylant proxy.
//
// Each provider (OpenAI, Anthropic, Azure, Mistral, Ollama) implements Adapter.
// The proxy always speaks the OpenAI wire format towards its clients; adapters
// translate to/from each upstream API internally.
package provider
import (
"context"
"net/http"
)
// Message is a single chat message in the OpenAI format.
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
}
// ChatRequest mirrors the OpenAI /v1/chat/completions request body.
// Fields marked omitempty are forwarded as-is to the upstream provider.
type ChatRequest struct {
Model string `json:"model"`
Messages []Message `json:"messages"`
Stream bool `json:"stream,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
MaxTokens *int `json:"max_tokens,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
N *int `json:"n,omitempty"`
Stop []string `json:"stop,omitempty"`
User string `json:"user,omitempty"`
}
// ChatResponse mirrors the OpenAI /v1/chat/completions response body.
// Provider is set by the router after dispatch and is never serialised to clients.
type ChatResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []Choice `json:"choices"`
Usage Usage `json:"usage"`
Provider string `json:"-"` // populated by router.sendWithFallback for audit logging
}
// Choice is a single completion choice in the response.
type Choice struct {
Index int `json:"index"`
Message Message `json:"message"`
FinishReason string `json:"finish_reason"`
}
// Usage holds token consumption statistics.
type Usage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
// Adapter is the contract every LLM provider must implement.
// Sprint 2: OpenAI is the only implementation.
// Sprint 4: Anthropic, Azure, Mistral, Ollama adapters are added.
type Adapter interface {
// Send performs a non-streaming chat completion request.
Send(ctx context.Context, req *ChatRequest) (*ChatResponse, error)
// Stream performs a streaming chat completion request.
// SSE chunks are written directly to w as they arrive from the upstream.
// The caller must NOT write to w after Stream returns.
Stream(ctx context.Context, req *ChatRequest, w http.ResponseWriter) error
// Validate checks that req is well-formed for this provider.
// Returns a descriptive error if the request cannot be forwarded.
Validate(req *ChatRequest) error
// HealthCheck verifies that the upstream provider is reachable.
HealthCheck(ctx context.Context) error
}