package pii import ( "encoding/json" "fmt" "net/http" "regexp" "sort" "strings" "go.uber.org/zap" "github.com/veylant/ia-gateway/internal/apierror" ) // AnalyzeRequest is the JSON body accepted by POST /v1/pii/analyze. type AnalyzeRequest struct { Text string `json:"text"` } // AnalyzeEntity is a single PII entity returned by the analyze endpoint. type AnalyzeEntity struct { Type string `json:"type"` Start int `json:"start"` End int `json:"end"` Confidence float64 `json:"confidence"` Layer string `json:"layer"` } // AnalyzeResponse is the JSON response of POST /v1/pii/analyze. type AnalyzeResponse struct { Anonymized string `json:"anonymized"` Entities []AnalyzeEntity `json:"entities"` } // AnalyzeHandler wraps a pii.Client as an HTTP handler for the playground. // It is safe to call when client is nil: falls back to regex detection. type AnalyzeHandler struct { client *Client logger *zap.Logger } // NewAnalyzeHandler creates a new AnalyzeHandler. // client may be nil (PII service disabled) — the handler falls back to regex. func NewAnalyzeHandler(client *Client, logger *zap.Logger) *AnalyzeHandler { return &AnalyzeHandler{client: client, logger: logger} } func (h *AnalyzeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { var req AnalyzeRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { apierror.WriteError(w, apierror.NewBadRequestError("invalid JSON: "+err.Error())) return } if req.Text == "" { w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) _ = json.NewEncoder(w).Encode(AnalyzeResponse{Anonymized: "", Entities: []AnalyzeEntity{}}) return } // Attempt real PII detection if service is available. // NOTE: when fail_open=true, Detect() returns (result, nil) even on RPC // failure, but sets result.Entities to nil to signal the degraded path. // A real successful response always has a non-nil Entities slice. if h.client != nil { resp, err := h.client.Detect(r.Context(), req.Text, "playground", "playground-analyze", true, false) if err == nil && resp.Entities != nil { // Real PII service response (may be empty if no PII detected). entities := make([]AnalyzeEntity, 0, len(resp.Entities)) for _, e := range resp.Entities { entities = append(entities, AnalyzeEntity{ Type: e.EntityType, Start: int(e.Start), End: int(e.End), Confidence: float64(e.Confidence), Layer: e.DetectionLayer, }) } w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) _ = json.NewEncoder(w).Encode(AnalyzeResponse{ Anonymized: resp.AnonymizedText, Entities: entities, }) return } if err != nil { h.logger.Warn("PII service error — falling back to regex detection", zap.Error(err)) } else { h.logger.Debug("PII service unavailable (fail-open) — falling back to regex detection") } } // Fallback: local regex detection so the playground stays useful when the // PII sidecar is not running (dev mode, demo environments). result := regexDetect(req.Text) w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) _ = json.NewEncoder(w).Encode(result) } // ── Regex-based local detection ─────────────────────────────────────────────── // Package-level compiled regexes (compiled once at startup). var ( rePiiIBAN = regexp.MustCompile(`(?i)FR\d{2}[\s]?\d{4}[\s]?\d{4}[\s]?\d{4}[\s]?\d{4}[\s]?\d{4}[\s]?\d{3}`) rePiiEmail = regexp.MustCompile(`[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}`) rePiiPhone = regexp.MustCompile(`(?:\+33|0033|0)[\s]?[1-9](?:[\s.\-]?\d{2}){4}`) rePiiCC = regexp.MustCompile(`(?:4\d{3}|5[1-5]\d{2}|3[47]\d{2})(?:[ \-]?\d{4}){3}`) rePiiSSN = regexp.MustCompile(`\b[12][\s]?\d{2}[\s]?\d{2}[\s]?\d{2}[\s]?\d{3}[\s]?\d{3}[\s]?\d{2}\b`) ) type regexPattern struct { re *regexp.Regexp typ string } var piiPatterns = []regexPattern{ {rePiiIBAN, "IBAN_CODE"}, {rePiiEmail, "EMAIL_ADDRESS"}, {rePiiPhone, "PHONE_NUMBER"}, {rePiiCC, "CREDIT_CARD"}, {rePiiSSN, "FR_SSN"}, } type rawMatch struct { typ string start int end int } // regexDetect runs a set of compiled regexes over text and returns matched // entities with their byte offsets, plus an anonymized version of the text. func regexDetect(text string) AnalyzeResponse { var raw []rawMatch for _, p := range piiPatterns { for _, loc := range p.re.FindAllStringIndex(text, -1) { raw = append(raw, rawMatch{typ: p.typ, start: loc[0], end: loc[1]}) } } // Sort by start position. sort.Slice(raw, func(i, j int) bool { return raw[i].start < raw[j].start }) // Remove overlapping matches (keep the first / longest-starting one). filtered := raw[:0] cursor := 0 for _, m := range raw { if m.start >= cursor { filtered = append(filtered, m) cursor = m.end } } if len(filtered) == 0 { return AnalyzeResponse{Anonymized: text, Entities: []AnalyzeEntity{}} } // Build anonymized text and entity list simultaneously. var sb strings.Builder counters := map[string]int{} entities := make([]AnalyzeEntity, 0, len(filtered)) cursor = 0 for _, m := range filtered { sb.WriteString(text[cursor:m.start]) counters[m.typ]++ sb.WriteString(fmt.Sprintf("[%s_%d]", m.typ, counters[m.typ])) entities = append(entities, AnalyzeEntity{ Type: m.typ, Start: m.start, End: m.end, Confidence: 0.95, Layer: "regex-local", }) cursor = m.end } sb.WriteString(text[cursor:]) return AnalyzeResponse{ Anonymized: sb.String(), Entities: entities, } }