syntax = "proto3"; package pii.v1; option go_package = "github.com/veylant/ia-gateway/gen/pii/v1;piiv1"; // PiiService detects and pseudonymizes personally identifiable information // in user prompts before they are forwarded to LLM providers. // // Latency contract: p99 < 50ms for prompts up to 500 tokens. service PiiService { // Detect scans text for PII entities and returns an anonymized version. // Detected entities are pseudonymized with [PII:TYPE:UUID] tokens and // stored in Redis (AES-256-GCM encrypted) for later de-pseudonymization. rpc Detect(PiiRequest) returns (PiiResponse); // Health returns the service readiness status. rpc Health(HealthRequest) returns (HealthResponse); } // PiiRequest is sent by the Go proxy before forwarding a prompt to an LLM. message PiiRequest { // Raw text of the user prompt. string text = 1; // Tenant identifier — used for scoped pseudonymization mappings in Redis. string tenant_id = 2; // Unique request ID (UUID v7) for tracing and log correlation. string request_id = 3; // Detection options for this request. PiiOptions options = 4; } // PiiOptions controls the detection pipeline behaviour per request. message PiiOptions { // enable_ner activates Layer 2 (Presidio + spaCy NER) in addition to regex. // Set to false for low-sensitivity requests to stay within the 50ms budget. bool enable_ner = 1; // confidence_threshold filters out entities below this confidence score. // Presidio default is 0.85 — lower to catch more (at the cost of false positives). float confidence_threshold = 2; // zero_retention: if true, the Python PII service skips persisting the // pseudonymization mapping to Redis. Mappings are held in-memory only for // the duration of this request. Activated per-tenant via the "zero_retention" // feature flag (E4-12). bool zero_retention = 3; } // PiiResponse is returned by the PII service to the Go proxy. message PiiResponse { // Anonymized version of the input text. // PII values are replaced by tokens of the form [PII:EMAIL:3a7f2b1c-...]. string anonymized_text = 1; // List of all detected PII entities with their pseudonyms. repeated PiiEntity entities = 2; // Total time spent in the PII pipeline, in milliseconds. int64 processing_time_ms = 3; } // PiiEntity represents a single detected PII value and its pseudonym. message PiiEntity { // Entity type as detected by the pipeline. // Known values: EMAIL, PHONE_NUMBER, IBAN_CODE, FR_SSN, CREDIT_CARD, // PERSON, LOCATION, ORGANIZATION. string entity_type = 1; // The original PII value found in the text (never logged in production). string original_value = 2; // The pseudonymization token that replaced this entity in anonymized_text. // Format: [PII::] string pseudonym = 3; // Character offsets in the original text. int32 start = 4; int32 end = 5; // Detection confidence (0.0–1.0). 1.0 for regex matches, model-scored for NER. float confidence = 6; // Detection layer that found this entity: "regex", "ner". string detection_layer = 7; } // HealthRequest is empty — used for service readiness probes. message HealthRequest {} // HealthResponse reports service status and loaded model information. message HealthResponse { // "ok" when the service is ready to handle requests. string status = 1; // Whether spaCy NER model is loaded and ready (warm). bool ner_model_loaded = 2; // Name of the loaded spaCy model, e.g. "fr_core_news_lg". string spacy_model = 3; }