veylant/internal/pii/client.go
2026-03-06 18:38:04 +01:00

139 lines
3.7 KiB
Go

// Package pii provides a gRPC client for the Python PII detection service.
// It exposes a thin wrapper with graceful degradation: if the service is
// unreachable and fail_open is true, the original text is returned unchanged.
package pii
import (
"context"
"fmt"
"time"
"go.uber.org/zap"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
piiv1 "github.com/veylant/ia-gateway/gen/pii/v1"
)
// Entity is a PII entity detected and pseudonymized by the Python service.
type Entity struct {
EntityType string
OriginalValue string
Pseudonym string // token of the form [PII:TYPE:UUID]
Start int32
End int32
Confidence float32
DetectionLayer string
}
// DetectResult holds the anonymized prompt and the entity list for
// later de-pseudonymization of the LLM response.
type DetectResult struct {
AnonymizedText string
Entities []Entity
ProcessingTimeMs int64
}
// Client wraps a gRPC connection to the PII detection service.
type Client struct {
conn *grpc.ClientConn
stub piiv1.PiiServiceClient
timeout time.Duration
failOpen bool
logger *zap.Logger
}
// Config holds the configuration for the PII gRPC client.
type Config struct {
// Address of the PII service (e.g. "localhost:50051").
Address string
// Timeout for each RPC call.
Timeout time.Duration
// FailOpen: if true, return the original text on service failure instead of erroring.
FailOpen bool
}
// New creates a Client and establishes a gRPC connection.
// The connection is lazy — the first RPC will trigger the actual dial.
func New(cfg Config, logger *zap.Logger) (*Client, error) {
if cfg.Timeout == 0 {
cfg.Timeout = 100 * time.Millisecond
}
conn, err := grpc.NewClient(
cfg.Address,
grpc.WithTransportCredentials(insecure.NewCredentials()),
)
if err != nil {
return nil, fmt.Errorf("pii: dial %s: %w", cfg.Address, err)
}
return &Client{
conn: conn,
stub: piiv1.NewPiiServiceClient(conn),
timeout: cfg.Timeout,
failOpen: cfg.FailOpen,
logger: logger,
}, nil
}
// Close releases the gRPC connection.
func (c *Client) Close() error {
return c.conn.Close()
}
// Detect sends text to the PII service for detection and pseudonymization.
// If the call fails and FailOpen is true, it returns the original text with no entities.
// zeroRetention instructs the Python service not to persist the mapping to Redis —
// the pseudonymization tokens are ephemeral and valid only for this request (E4-12).
func (c *Client) Detect(
ctx context.Context,
text, tenantID, requestID string,
enableNER bool,
zeroRetention bool,
) (*DetectResult, error) {
callCtx, cancel := context.WithTimeout(ctx, c.timeout)
defer cancel()
resp, err := c.stub.Detect(callCtx, &piiv1.PiiRequest{
Text: text,
TenantId: tenantID,
RequestId: requestID,
Options: &piiv1.PiiOptions{
EnableNer: enableNER,
ConfidenceThreshold: 0.65,
ZeroRetention: zeroRetention,
},
})
if err != nil {
if c.failOpen {
c.logger.Warn("pii service unavailable — fail open, returning original text",
zap.String("tenant_id", tenantID),
zap.String("request_id", requestID),
zap.Error(err),
)
return &DetectResult{AnonymizedText: text}, nil
}
return nil, fmt.Errorf("pii: detect RPC: %w", err)
}
entities := make([]Entity, 0, len(resp.Entities))
for _, e := range resp.Entities {
entities = append(entities, Entity{
EntityType: e.EntityType,
OriginalValue: e.OriginalValue,
Pseudonym: e.Pseudonym,
Start: e.Start,
End: e.End,
Confidence: e.Confidence,
DetectionLayer: e.DetectionLayer,
})
}
return &DetectResult{
AnonymizedText: resp.AnonymizedText,
Entities: entities,
ProcessingTimeMs: resp.ProcessingTimeMs,
}, nil
}