79 lines
2.8 KiB
Go
79 lines
2.8 KiB
Go
// Package provider defines the LLM provider adapter interface and the unified
|
|
// request/response types used by the Veylant proxy.
|
|
//
|
|
// Each provider (OpenAI, Anthropic, Azure, Mistral, Ollama) implements Adapter.
|
|
// The proxy always speaks the OpenAI wire format towards its clients; adapters
|
|
// translate to/from each upstream API internally.
|
|
package provider
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
)
|
|
|
|
// Message is a single chat message in the OpenAI format.
|
|
type Message struct {
|
|
Role string `json:"role"`
|
|
Content string `json:"content"`
|
|
}
|
|
|
|
// ChatRequest mirrors the OpenAI /v1/chat/completions request body.
|
|
// Fields marked omitempty are forwarded as-is to the upstream provider.
|
|
type ChatRequest struct {
|
|
Model string `json:"model"`
|
|
Messages []Message `json:"messages"`
|
|
Stream bool `json:"stream,omitempty"`
|
|
Temperature *float64 `json:"temperature,omitempty"`
|
|
MaxTokens *int `json:"max_tokens,omitempty"`
|
|
TopP *float64 `json:"top_p,omitempty"`
|
|
N *int `json:"n,omitempty"`
|
|
Stop []string `json:"stop,omitempty"`
|
|
User string `json:"user,omitempty"`
|
|
}
|
|
|
|
// ChatResponse mirrors the OpenAI /v1/chat/completions response body.
|
|
// Provider is set by the router after dispatch and is never serialised to clients.
|
|
type ChatResponse struct {
|
|
ID string `json:"id"`
|
|
Object string `json:"object"`
|
|
Created int64 `json:"created"`
|
|
Model string `json:"model"`
|
|
Choices []Choice `json:"choices"`
|
|
Usage Usage `json:"usage"`
|
|
Provider string `json:"-"` // populated by router.sendWithFallback for audit logging
|
|
}
|
|
|
|
// Choice is a single completion choice in the response.
|
|
type Choice struct {
|
|
Index int `json:"index"`
|
|
Message Message `json:"message"`
|
|
FinishReason string `json:"finish_reason"`
|
|
}
|
|
|
|
// Usage holds token consumption statistics.
|
|
type Usage struct {
|
|
PromptTokens int `json:"prompt_tokens"`
|
|
CompletionTokens int `json:"completion_tokens"`
|
|
TotalTokens int `json:"total_tokens"`
|
|
}
|
|
|
|
// Adapter is the contract every LLM provider must implement.
|
|
// Sprint 2: OpenAI is the only implementation.
|
|
// Sprint 4: Anthropic, Azure, Mistral, Ollama adapters are added.
|
|
type Adapter interface {
|
|
// Send performs a non-streaming chat completion request.
|
|
Send(ctx context.Context, req *ChatRequest) (*ChatResponse, error)
|
|
|
|
// Stream performs a streaming chat completion request.
|
|
// SSE chunks are written directly to w as they arrive from the upstream.
|
|
// The caller must NOT write to w after Stream returns.
|
|
Stream(ctx context.Context, req *ChatRequest, w http.ResponseWriter) error
|
|
|
|
// Validate checks that req is well-formed for this provider.
|
|
// Returns a descriptive error if the request cannot be forwarded.
|
|
Validate(req *ChatRequest) error
|
|
|
|
// HealthCheck verifies that the upstream provider is reachable.
|
|
HealthCheck(ctx context.Context) error
|
|
}
|