139 lines
3.7 KiB
Go
139 lines
3.7 KiB
Go
// Package pii provides a gRPC client for the Python PII detection service.
|
|
// It exposes a thin wrapper with graceful degradation: if the service is
|
|
// unreachable and fail_open is true, the original text is returned unchanged.
|
|
package pii
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"go.uber.org/zap"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/credentials/insecure"
|
|
|
|
piiv1 "github.com/veylant/ia-gateway/gen/pii/v1"
|
|
)
|
|
|
|
// Entity is a PII entity detected and pseudonymized by the Python service.
|
|
type Entity struct {
|
|
EntityType string
|
|
OriginalValue string
|
|
Pseudonym string // token of the form [PII:TYPE:UUID]
|
|
Start int32
|
|
End int32
|
|
Confidence float32
|
|
DetectionLayer string
|
|
}
|
|
|
|
// DetectResult holds the anonymized prompt and the entity list for
|
|
// later de-pseudonymization of the LLM response.
|
|
type DetectResult struct {
|
|
AnonymizedText string
|
|
Entities []Entity
|
|
ProcessingTimeMs int64
|
|
}
|
|
|
|
// Client wraps a gRPC connection to the PII detection service.
|
|
type Client struct {
|
|
conn *grpc.ClientConn
|
|
stub piiv1.PiiServiceClient
|
|
timeout time.Duration
|
|
failOpen bool
|
|
logger *zap.Logger
|
|
}
|
|
|
|
// Config holds the configuration for the PII gRPC client.
|
|
type Config struct {
|
|
// Address of the PII service (e.g. "localhost:50051").
|
|
Address string
|
|
// Timeout for each RPC call.
|
|
Timeout time.Duration
|
|
// FailOpen: if true, return the original text on service failure instead of erroring.
|
|
FailOpen bool
|
|
}
|
|
|
|
// New creates a Client and establishes a gRPC connection.
|
|
// The connection is lazy — the first RPC will trigger the actual dial.
|
|
func New(cfg Config, logger *zap.Logger) (*Client, error) {
|
|
if cfg.Timeout == 0 {
|
|
cfg.Timeout = 100 * time.Millisecond
|
|
}
|
|
|
|
conn, err := grpc.NewClient(
|
|
cfg.Address,
|
|
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("pii: dial %s: %w", cfg.Address, err)
|
|
}
|
|
|
|
return &Client{
|
|
conn: conn,
|
|
stub: piiv1.NewPiiServiceClient(conn),
|
|
timeout: cfg.Timeout,
|
|
failOpen: cfg.FailOpen,
|
|
logger: logger,
|
|
}, nil
|
|
}
|
|
|
|
// Close releases the gRPC connection.
|
|
func (c *Client) Close() error {
|
|
return c.conn.Close()
|
|
}
|
|
|
|
// Detect sends text to the PII service for detection and pseudonymization.
|
|
// If the call fails and FailOpen is true, it returns the original text with no entities.
|
|
// zeroRetention instructs the Python service not to persist the mapping to Redis —
|
|
// the pseudonymization tokens are ephemeral and valid only for this request (E4-12).
|
|
func (c *Client) Detect(
|
|
ctx context.Context,
|
|
text, tenantID, requestID string,
|
|
enableNER bool,
|
|
zeroRetention bool,
|
|
) (*DetectResult, error) {
|
|
callCtx, cancel := context.WithTimeout(ctx, c.timeout)
|
|
defer cancel()
|
|
|
|
resp, err := c.stub.Detect(callCtx, &piiv1.PiiRequest{
|
|
Text: text,
|
|
TenantId: tenantID,
|
|
RequestId: requestID,
|
|
Options: &piiv1.PiiOptions{
|
|
EnableNer: enableNER,
|
|
ConfidenceThreshold: 0.85,
|
|
ZeroRetention: zeroRetention,
|
|
},
|
|
})
|
|
if err != nil {
|
|
if c.failOpen {
|
|
c.logger.Warn("pii service unavailable — fail open, returning original text",
|
|
zap.String("tenant_id", tenantID),
|
|
zap.String("request_id", requestID),
|
|
zap.Error(err),
|
|
)
|
|
return &DetectResult{AnonymizedText: text}, nil
|
|
}
|
|
return nil, fmt.Errorf("pii: detect RPC: %w", err)
|
|
}
|
|
|
|
entities := make([]Entity, 0, len(resp.Entities))
|
|
for _, e := range resp.Entities {
|
|
entities = append(entities, Entity{
|
|
EntityType: e.EntityType,
|
|
OriginalValue: e.OriginalValue,
|
|
Pseudonym: e.Pseudonym,
|
|
Start: e.Start,
|
|
End: e.End,
|
|
Confidence: e.Confidence,
|
|
DetectionLayer: e.DetectionLayer,
|
|
})
|
|
}
|
|
|
|
return &DetectResult{
|
|
AnonymizedText: resp.AnonymizedText,
|
|
Entities: entities,
|
|
ProcessingTimeMs: resp.ProcessingTimeMs,
|
|
}, nil
|
|
}
|