105 lines
3.5 KiB
Protocol Buffer
105 lines
3.5 KiB
Protocol Buffer
syntax = "proto3";
|
||
|
||
package pii.v1;
|
||
|
||
option go_package = "github.com/veylant/ia-gateway/gen/pii/v1;piiv1";
|
||
|
||
// PiiService detects and pseudonymizes personally identifiable information
|
||
// in user prompts before they are forwarded to LLM providers.
|
||
//
|
||
// Latency contract: p99 < 50ms for prompts up to 500 tokens.
|
||
service PiiService {
|
||
// Detect scans text for PII entities and returns an anonymized version.
|
||
// Detected entities are pseudonymized with [PII:TYPE:UUID] tokens and
|
||
// stored in Redis (AES-256-GCM encrypted) for later de-pseudonymization.
|
||
rpc Detect(PiiRequest) returns (PiiResponse);
|
||
|
||
// Health returns the service readiness status.
|
||
rpc Health(HealthRequest) returns (HealthResponse);
|
||
}
|
||
|
||
// PiiRequest is sent by the Go proxy before forwarding a prompt to an LLM.
|
||
message PiiRequest {
|
||
// Raw text of the user prompt.
|
||
string text = 1;
|
||
|
||
// Tenant identifier — used for scoped pseudonymization mappings in Redis.
|
||
string tenant_id = 2;
|
||
|
||
// Unique request ID (UUID v7) for tracing and log correlation.
|
||
string request_id = 3;
|
||
|
||
// Detection options for this request.
|
||
PiiOptions options = 4;
|
||
}
|
||
|
||
// PiiOptions controls the detection pipeline behaviour per request.
|
||
message PiiOptions {
|
||
// enable_ner activates Layer 2 (Presidio + spaCy NER) in addition to regex.
|
||
// Set to false for low-sensitivity requests to stay within the 50ms budget.
|
||
bool enable_ner = 1;
|
||
|
||
// confidence_threshold filters out entities below this confidence score.
|
||
// Presidio default is 0.85 — lower to catch more (at the cost of false positives).
|
||
float confidence_threshold = 2;
|
||
|
||
// zero_retention: if true, the Python PII service skips persisting the
|
||
// pseudonymization mapping to Redis. Mappings are held in-memory only for
|
||
// the duration of this request. Activated per-tenant via the "zero_retention"
|
||
// feature flag (E4-12).
|
||
bool zero_retention = 3;
|
||
}
|
||
|
||
// PiiResponse is returned by the PII service to the Go proxy.
|
||
message PiiResponse {
|
||
// Anonymized version of the input text.
|
||
// PII values are replaced by tokens of the form [PII:EMAIL:3a7f2b1c-...].
|
||
string anonymized_text = 1;
|
||
|
||
// List of all detected PII entities with their pseudonyms.
|
||
repeated PiiEntity entities = 2;
|
||
|
||
// Total time spent in the PII pipeline, in milliseconds.
|
||
int64 processing_time_ms = 3;
|
||
}
|
||
|
||
// PiiEntity represents a single detected PII value and its pseudonym.
|
||
message PiiEntity {
|
||
// Entity type as detected by the pipeline.
|
||
// Known values: EMAIL, PHONE_NUMBER, IBAN_CODE, FR_SSN, CREDIT_CARD,
|
||
// PERSON, LOCATION, ORGANIZATION.
|
||
string entity_type = 1;
|
||
|
||
// The original PII value found in the text (never logged in production).
|
||
string original_value = 2;
|
||
|
||
// The pseudonymization token that replaced this entity in anonymized_text.
|
||
// Format: [PII:<TYPE>:<UUID>]
|
||
string pseudonym = 3;
|
||
|
||
// Character offsets in the original text.
|
||
int32 start = 4;
|
||
int32 end = 5;
|
||
|
||
// Detection confidence (0.0–1.0). 1.0 for regex matches, model-scored for NER.
|
||
float confidence = 6;
|
||
|
||
// Detection layer that found this entity: "regex", "ner".
|
||
string detection_layer = 7;
|
||
}
|
||
|
||
// HealthRequest is empty — used for service readiness probes.
|
||
message HealthRequest {}
|
||
|
||
// HealthResponse reports service status and loaded model information.
|
||
message HealthResponse {
|
||
// "ok" when the service is ready to handle requests.
|
||
string status = 1;
|
||
|
||
// Whether spaCy NER model is loaded and ready (warm).
|
||
bool ner_model_loaded = 2;
|
||
|
||
// Name of the loaded spaCy model, e.g. "fr_core_news_lg".
|
||
string spacy_model = 3;
|
||
}
|