veylant/proto/pii/v1/pii.proto
2026-02-23 13:35:04 +01:00

105 lines
3.5 KiB
Protocol Buffer
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

syntax = "proto3";
package pii.v1;
option go_package = "github.com/veylant/ia-gateway/gen/pii/v1;piiv1";
// PiiService detects and pseudonymizes personally identifiable information
// in user prompts before they are forwarded to LLM providers.
//
// Latency contract: p99 < 50ms for prompts up to 500 tokens.
service PiiService {
// Detect scans text for PII entities and returns an anonymized version.
// Detected entities are pseudonymized with [PII:TYPE:UUID] tokens and
// stored in Redis (AES-256-GCM encrypted) for later de-pseudonymization.
rpc Detect(PiiRequest) returns (PiiResponse);
// Health returns the service readiness status.
rpc Health(HealthRequest) returns (HealthResponse);
}
// PiiRequest is sent by the Go proxy before forwarding a prompt to an LLM.
message PiiRequest {
// Raw text of the user prompt.
string text = 1;
// Tenant identifier — used for scoped pseudonymization mappings in Redis.
string tenant_id = 2;
// Unique request ID (UUID v7) for tracing and log correlation.
string request_id = 3;
// Detection options for this request.
PiiOptions options = 4;
}
// PiiOptions controls the detection pipeline behaviour per request.
message PiiOptions {
// enable_ner activates Layer 2 (Presidio + spaCy NER) in addition to regex.
// Set to false for low-sensitivity requests to stay within the 50ms budget.
bool enable_ner = 1;
// confidence_threshold filters out entities below this confidence score.
// Presidio default is 0.85 — lower to catch more (at the cost of false positives).
float confidence_threshold = 2;
// zero_retention: if true, the Python PII service skips persisting the
// pseudonymization mapping to Redis. Mappings are held in-memory only for
// the duration of this request. Activated per-tenant via the "zero_retention"
// feature flag (E4-12).
bool zero_retention = 3;
}
// PiiResponse is returned by the PII service to the Go proxy.
message PiiResponse {
// Anonymized version of the input text.
// PII values are replaced by tokens of the form [PII:EMAIL:3a7f2b1c-...].
string anonymized_text = 1;
// List of all detected PII entities with their pseudonyms.
repeated PiiEntity entities = 2;
// Total time spent in the PII pipeline, in milliseconds.
int64 processing_time_ms = 3;
}
// PiiEntity represents a single detected PII value and its pseudonym.
message PiiEntity {
// Entity type as detected by the pipeline.
// Known values: EMAIL, PHONE_NUMBER, IBAN_CODE, FR_SSN, CREDIT_CARD,
// PERSON, LOCATION, ORGANIZATION.
string entity_type = 1;
// The original PII value found in the text (never logged in production).
string original_value = 2;
// The pseudonymization token that replaced this entity in anonymized_text.
// Format: [PII:<TYPE>:<UUID>]
string pseudonym = 3;
// Character offsets in the original text.
int32 start = 4;
int32 end = 5;
// Detection confidence (0.01.0). 1.0 for regex matches, model-scored for NER.
float confidence = 6;
// Detection layer that found this entity: "regex", "ner".
string detection_layer = 7;
}
// HealthRequest is empty — used for service readiness probes.
message HealthRequest {}
// HealthResponse reports service status and loaded model information.
message HealthResponse {
// "ok" when the service is ready to handle requests.
string status = 1;
// Whether spaCy NER model is loaded and ready (warm).
bool ner_model_loaded = 2;
// Name of the loaded spaCy model, e.g. "fr_core_news_lg".
string spacy_model = 3;
}