122 lines
4.6 KiB
Go
122 lines
4.6 KiB
Go
// Package routing implements the intelligent routing engine for the Veylant proxy.
|
|
// Rules are stored in PostgreSQL (JSONB conditions), cached in-memory (30s TTL),
|
|
// and evaluated in priority order (ASC, first match wins).
|
|
package routing
|
|
|
|
import "time"
|
|
|
|
// ─── Sensitivity ─────────────────────────────────────────────────────────────
|
|
|
|
// Sensitivity represents the PII risk level of a request, from None to Critical.
|
|
// Values are comparable with < / > operators (higher = more sensitive).
|
|
type Sensitivity int
|
|
|
|
const (
|
|
SensitivityNone Sensitivity = 0
|
|
SensitivityLow Sensitivity = 1
|
|
SensitivityMedium Sensitivity = 2
|
|
SensitivityHigh Sensitivity = 3
|
|
SensitivityCritical Sensitivity = 4
|
|
)
|
|
|
|
// String returns the canonical lowercase name for the sensitivity level.
|
|
func (s Sensitivity) String() string {
|
|
switch s {
|
|
case SensitivityNone:
|
|
return "none"
|
|
case SensitivityLow:
|
|
return "low"
|
|
case SensitivityMedium:
|
|
return "medium"
|
|
case SensitivityHigh:
|
|
return "high"
|
|
case SensitivityCritical:
|
|
return "critical"
|
|
default:
|
|
return "unknown"
|
|
}
|
|
}
|
|
|
|
// ParseSensitivity converts a string (none/low/medium/high/critical) to a Sensitivity.
|
|
func ParseSensitivity(s string) (Sensitivity, bool) {
|
|
switch s {
|
|
case "none":
|
|
return SensitivityNone, true
|
|
case "low":
|
|
return SensitivityLow, true
|
|
case "medium":
|
|
return SensitivityMedium, true
|
|
case "high":
|
|
return SensitivityHigh, true
|
|
case "critical":
|
|
return SensitivityCritical, true
|
|
default:
|
|
return SensitivityNone, false
|
|
}
|
|
}
|
|
|
|
// ─── Condition ───────────────────────────────────────────────────────────────
|
|
|
|
// Condition is a single predicate in a routing rule.
|
|
// All conditions within a rule must match (AND logic).
|
|
type Condition struct {
|
|
// Field identifies the request attribute to test.
|
|
// Supported values: user.role, user.department, request.sensitivity,
|
|
// request.model, request.use_case, request.token_estimate
|
|
Field string `json:"field"`
|
|
|
|
// Operator specifies how to compare the field value.
|
|
// Supported values: eq, neq, in, nin, gte, lte, contains, matches
|
|
Operator string `json:"operator"`
|
|
|
|
// Value is the operand. Type depends on the operator:
|
|
// eq/neq/contains/matches/gte/lte → string or number
|
|
// in/nin → []interface{} (JSON array of strings)
|
|
Value interface{} `json:"value"`
|
|
}
|
|
|
|
// ─── Action ──────────────────────────────────────────────────────────────────
|
|
|
|
// Action is the routing decision returned when a rule matches.
|
|
type Action struct {
|
|
// Provider is the name of the primary upstream adapter to use.
|
|
// Valid values: openai, anthropic, azure, mistral, ollama
|
|
Provider string `json:"provider"`
|
|
|
|
// Model overrides the model name sent to the upstream.
|
|
// If empty, the model from the original ChatRequest is forwarded as-is.
|
|
Model string `json:"model,omitempty"`
|
|
|
|
// FallbackProviders are tried in order when the primary provider fails.
|
|
FallbackProviders []string `json:"fallback_providers,omitempty"`
|
|
}
|
|
|
|
// ─── RoutingRule ─────────────────────────────────────────────────────────────
|
|
|
|
// RoutingRule is a single DB-backed policy entry.
|
|
type RoutingRule struct {
|
|
ID string
|
|
TenantID string
|
|
Name string
|
|
Description string
|
|
Conditions []Condition // ALL must match (AND). Empty slice = catch-all (matches everything).
|
|
Action Action
|
|
Priority int // lower value = evaluated first
|
|
IsEnabled bool
|
|
CreatedAt time.Time
|
|
UpdatedAt time.Time
|
|
}
|
|
|
|
// ─── RoutingContext ───────────────────────────────────────────────────────────
|
|
|
|
// RoutingContext carries the per-request attributes used by condition evaluation.
|
|
type RoutingContext struct {
|
|
TenantID string
|
|
UserRole string // primary role from JWT (e.g. "admin", "user")
|
|
Department string // user department from JWT claim
|
|
Sensitivity Sensitivity // scored from PII detection results
|
|
Model string // model name from the ChatRequest
|
|
UseCase string // optional use-case tag (e.g. "summarization")
|
|
TokenEstimate int // estimated prompt token count
|
|
}
|