veylant/internal/health/playground_analyze.go
2026-02-23 13:35:04 +01:00

188 lines
5.6 KiB
Go

package health
import (
"encoding/json"
"net"
"net/http"
"strings"
"sync"
"time"
"go.uber.org/zap"
"golang.org/x/time/rate"
"github.com/veylant/ia-gateway/internal/apierror"
"github.com/veylant/ia-gateway/internal/pii"
)
// PlaygroundAnalyzeRequest is the JSON body for POST /playground/analyze.
type PlaygroundAnalyzeRequest struct {
Text string `json:"text"`
}
// PlaygroundAnalyzeResponse is the JSON response for POST /playground/analyze.
type PlaygroundAnalyzeResponse struct {
Entities []PlaygroundEntity `json:"entities"`
AnonymizedText string `json:"anonymized_text"`
}
// PlaygroundEntity is a detected PII entity returned by the playground endpoint.
type PlaygroundEntity struct {
Type string `json:"type"`
Value string `json:"value,omitempty"` // original value (shown in UI badge)
Pseudonym string `json:"pseudonym,omitempty"`
Start int `json:"start"`
End int `json:"end"`
Confidence float64 `json:"confidence"`
Layer string `json:"layer"`
}
// playgroundIPLimiter holds per-IP token bucket limiters.
// Limiters are evicted after 5 minutes of inactivity.
type playgroundIPLimiter struct {
mu sync.Mutex
limiters map[string]*ipEntry
}
type ipEntry struct {
limiter *rate.Limiter
lastSeen time.Time
}
var globalPlaygroundLimiter = &playgroundIPLimiter{
limiters: make(map[string]*ipEntry),
}
func (p *playgroundIPLimiter) get(ip string) *rate.Limiter {
p.mu.Lock()
defer p.mu.Unlock()
entry, ok := p.limiters[ip]
if !ok {
// 20 req/min per IP, burst of 5.
entry = &ipEntry{limiter: rate.NewLimiter(rate.Every(3*time.Second), 5)}
p.limiters[ip] = entry
}
entry.lastSeen = time.Now()
// Evict stale entries (>5 min inactive) to prevent unbounded memory growth.
for k, v := range p.limiters {
if time.Since(v.lastSeen) > 5*time.Minute {
delete(p.limiters, k)
}
}
return entry.limiter
}
// PlaygroundAnalyzeHandler returns an HTTP handler for POST /playground/analyze.
// The endpoint is public (no JWT required) and rate-limited to 20 req/min per IP.
// If piiClient is nil the handler returns a simulated response so the playground
// page remains usable even when the PII sidecar is not running.
func PlaygroundAnalyzeHandler(piiClient *pii.Client, logger *zap.Logger) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// IP-based rate limiting.
ip := clientIP(r)
if !globalPlaygroundLimiter.get(ip).Allow() {
apierror.WriteError(w, apierror.NewRateLimitError(
"playground rate limit exceeded — max 20 requests/minute per IP",
))
return
}
// Parse request body.
var req PlaygroundAnalyzeRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
apierror.WriteError(w, apierror.NewBadRequestError("invalid JSON: "+err.Error()))
return
}
var resp PlaygroundAnalyzeResponse
if req.Text == "" {
resp = PlaygroundAnalyzeResponse{Entities: []PlaygroundEntity{}, AnonymizedText: ""}
writeJSON(w, resp)
return
}
// Delegate to PII service if available.
if piiClient != nil {
result, err := piiClient.Detect(r.Context(), req.Text, "playground", "public-playground", true, false)
if err == nil {
entities := make([]PlaygroundEntity, 0, len(result.Entities))
for _, e := range result.Entities {
entities = append(entities, PlaygroundEntity{
Type: e.EntityType,
Value: req.Text[clamp(int(e.Start), 0, len(req.Text)):clamp(int(e.End), 0, len(req.Text))],
Pseudonym: e.Pseudonym,
Start: int(e.Start),
End: int(e.End),
Confidence: float64(e.Confidence),
Layer: e.DetectionLayer,
})
}
resp = PlaygroundAnalyzeResponse{
Entities: entities,
AnonymizedText: result.AnonymizedText,
}
writeJSON(w, resp)
return
}
logger.Warn("playground PII detection failed — using simulated response", zap.Error(err))
}
// PII service unavailable: return a deterministic simulated response so the
// playground page still demonstrates the feature.
resp = simulatedResponse(req.Text)
writeJSON(w, resp)
}
}
// simulatedResponse returns a static example response when the PII service is down.
func simulatedResponse(text string) PlaygroundAnalyzeResponse {
return PlaygroundAnalyzeResponse{
Entities: []PlaygroundEntity{
{Type: "PERSON", Value: "(example)", Pseudonym: "[PERSON_1]", Start: 0, End: 0, Confidence: 0.99, Layer: "simulated"},
{Type: "EMAIL_ADDRESS", Value: "(example)", Pseudonym: "[EMAIL_1]", Start: 0, End: 0, Confidence: 0.99, Layer: "simulated"},
},
AnonymizedText: strings.TrimSpace(text) + "\n\n[PII service offline — showing example output]",
}
}
// clientIP extracts the real client IP from the request, respecting X-Real-IP
// and X-Forwarded-For headers set by Traefik / Nginx.
func clientIP(r *http.Request) string {
if ip := r.Header.Get("X-Real-IP"); ip != "" {
if parsed := net.ParseIP(strings.TrimSpace(ip)); parsed != nil {
return parsed.String()
}
}
if forwarded := r.Header.Get("X-Forwarded-For"); forwarded != "" {
// X-Forwarded-For may be a comma-separated list; use the first (client) IP.
parts := strings.SplitN(forwarded, ",", 2)
if parsed := net.ParseIP(strings.TrimSpace(parts[0])); parsed != nil {
return parsed.String()
}
}
host, _, err := net.SplitHostPort(r.RemoteAddr)
if err != nil {
return r.RemoteAddr
}
return host
}
func clamp(v, min, max int) int {
if v < min {
return min
}
if v > max {
return max
}
return v
}
func writeJSON(w http.ResponseWriter, v any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
_ = json.NewEncoder(w).Encode(v)
}