+ All notable changes to Veylant IA are documented here. Versioning follows{" "}
+
+ Semantic Versioning
+
+ .
+
+
+
v1.0.0 — February 25, 2026
+
+ First production release. Pentest passed (0 Critical, 0 High). 2 pilot clients migrated.
+
+
+
Features
+
+ {v100Changes.features.map((f) => (
+
+ {f}
+
+ ))}
+
+
+
Bug Fixes
+
+ {v100Changes.bugfixes.map((b) => (
+
+ {b}
+
+ ))}
+
+
+
v1.1.0 — Planned (Q2 2026)
+
+ Priorities sourced from 2 pilot client sessions (MoSCoW method). See{" "}
+ docs/feedback-backlog.md for the full backlog.
+
+
+
+
+
+
+
Priority
+
Feature
+
+
+
+ {v110Roadmap.map((item) => (
+
+
+
+ {item.priority}
+
+
+
{item.item}
+
+ ))}
+
+
+
+
+
Migration & Compatibility
+
+ V1.0.0 has no breaking changes from the beta releases used by pilot clients. The{" "}
+ /v1/chat/completions API is fully backward-compatible with the OpenAI API
+ format. Any client using the OpenAI SDK will continue to work without changes.
+
+
+ See the Installation guide for upgrade
+ instructions from beta.
+
+ Everything you need to build, configure, and operate your enterprise AI governance
+ platform. Prevent Shadow AI, enforce PII anonymization, ensure GDPR compliance, and
+ control costs across all LLM usage.
+
+
+
+ {/* What's new */}
+
+ Pentest passed (0 Critical, 0 High), 2 pilot clients migrated, blue/green deployment with
+ Istio, HPA autoscaling (3→15 replicas), 7 Prometheus alerts, SLO dashboard (99.5%), and 6
+ operational runbooks.{" "}
+
+ Read the full changelog →
+
+
+
+ {/* Cards grid */}
+
+ );
+}
diff --git a/web/src/pages/docs/api-reference/AdminCompliancePage.tsx b/web/src/pages/docs/api-reference/AdminCompliancePage.tsx
new file mode 100644
index 0000000..d5225c5
--- /dev/null
+++ b/web/src/pages/docs/api-reference/AdminCompliancePage.tsx
@@ -0,0 +1,143 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+import { ApiEndpoint } from "../components/ApiEndpoint";
+import { ParamTable } from "../components/ParamTable";
+
+export function AdminCompliancePage() {
+ return (
+
+
Admin — Compliance
+
+ GDPR Article 30 processing registry, EU AI Act risk classification, and data subject
+ rights (access and erasure).
+
+
+
+ Compliance endpoints require admin role. Auditors can read but not modify.
+
+
+
GDPR Article 30 — Processing Registry
+
+
+
+
+
+
+
+
+
+
+
EU AI Act Classification
+
+
+
+
+
Risk level mapping:
+
+
+
+
+
Score
+
Level
+
Description
+
+
+
+ {[
+ { score: "5", level: "Forbidden", desc: "System must not be deployed. Example: social scoring, real-time biometric surveillance in public spaces.", color: "text-red-600" },
+ { score: "3–4", level: "High", desc: "Strict conformity assessment required before deployment. DPIA mandatory.", color: "text-orange-600" },
+ { score: "1–2", level: "Limited", desc: "Transparency obligations: users must be informed they interact with AI.", color: "text-amber-600" },
+ { score: "0", level: "Minimal", desc: "Minimal risk. Voluntary code of conduct recommended.", color: "text-green-600" },
+ ].map((row) => (
+
+
{row.score}
+
{row.level}
+
{row.desc}
+
+ ))}
+
+
+
+
+
GDPR Subject Rights
+
+
Article 15 — Right of Access
+
+
+
+
Article 17 — Right to Erasure
+
+
+
+ ClickHouse audit logs cannot be deleted. The erasure endpoint scrubs PII from prompt
+ content and pseudonymizes user identifiers, but the request metadata (token counts, cost,
+ timestamps) is retained for compliance reporting.
+
+
+ );
+}
diff --git a/web/src/pages/docs/api-reference/AdminFlagsPage.tsx b/web/src/pages/docs/api-reference/AdminFlagsPage.tsx
new file mode 100644
index 0000000..a907071
--- /dev/null
+++ b/web/src/pages/docs/api-reference/AdminFlagsPage.tsx
@@ -0,0 +1,98 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+import { ApiEndpoint } from "../components/ApiEndpoint";
+
+export function AdminFlagsPage() {
+ return (
+
+
Admin — Feature Flags
+
+ Feature flags control behavior at runtime without redeployment. Flags are stored in
+ PostgreSQL with an in-memory cache (updated every 30 seconds). If PostgreSQL is
+ unavailable, the in-memory defaults are used.
+
+
+
+ Feature flag management requires the admin role.
+
+
+
List All Flags
+
+
+
+
Get a Flag
+
+
+
Set a Flag
+
+
+
+
Delete a Flag
+
+
+
Default Flag Values
+
+
+
+
+
Flag
+
Default
+
Effect when disabled
+
+
+
+ {[
+ { flag: "pii_detection", def: "true", effect: "Prompts forwarded without PII scanning" },
+ { flag: "pii_pseudonymization", def: "true", effect: "PII detected but not stored in Redis" },
+ { flag: "audit_logging", def: "true", effect: "Requests not written to ClickHouse" },
+ { flag: "playground", def: "true", effect: "POST /playground/analyze returns 404" },
+ { flag: "streaming", def: "true", effect: "SSE requests return 400" },
+ { flag: "cost_tracking", def: "true", effect: "Token cost not computed or stored" },
+ { flag: "circuit_breaker", def: "true", effect: "Failures not counted, no fallback" },
+ ].map((row) => (
+
+
{row.flag}
+
+
+ {row.def}
+
+
+
{row.effect}
+
+ ))}
+
+
+
+
+
+ Disabling audit_logging or pii_detection in production may
+ violate GDPR obligations and internal SLA requirements. Always coordinate with your DPO
+ before making changes.
+
+
+ );
+}
diff --git a/web/src/pages/docs/api-reference/AdminLogsPage.tsx b/web/src/pages/docs/api-reference/AdminLogsPage.tsx
new file mode 100644
index 0000000..0849ced
--- /dev/null
+++ b/web/src/pages/docs/api-reference/AdminLogsPage.tsx
@@ -0,0 +1,133 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+import { ApiEndpoint } from "../components/ApiEndpoint";
+import { ParamTable } from "../components/ParamTable";
+
+export function AdminLogsPage() {
+ return (
+
+
Admin — Audit Logs & Costs
+
+ Query the immutable audit trail and cost breakdown for AI requests. All data is stored in
+ ClickHouse (append-only — no DELETE operations).
+
+
+
Audit Logs
+
+
+
+
+
+
+
+ All accesses to audit logs are themselves logged. This satisfies the "audit-of-the-audit"
+ requirement for sensitive compliance use cases.
+
+
+
Cost Breakdown
+
+
+
+
+
+
+
Rate Limit Overrides
+
+
+
+
+
+ );
+}
diff --git a/web/src/pages/docs/api-reference/AdminPoliciesPage.tsx b/web/src/pages/docs/api-reference/AdminPoliciesPage.tsx
new file mode 100644
index 0000000..5820ca1
--- /dev/null
+++ b/web/src/pages/docs/api-reference/AdminPoliciesPage.tsx
@@ -0,0 +1,124 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+import { ApiEndpoint } from "../components/ApiEndpoint";
+import { ParamTable } from "../components/ParamTable";
+
+export function AdminPoliciesPage() {
+ return (
+
+
Admin — Routing Policies
+
+ Routing policies (rules) define how AI requests are dispatched to providers. Rules are
+ evaluated in ascending priority order; the first matching rule wins.
+
+
+
+ All /v1/admin/policies endpoints require the admin or{" "}
+ manager role.
+
+
+
rbac — Rules per RBAC role with model restrictions
+
department — Rules per department (legal, hr, engineering, finance)
+
cost-optimized — Routes to cheaper models for simple queries
+
+
+ );
+}
diff --git a/web/src/pages/docs/api-reference/AdminUsersPage.tsx b/web/src/pages/docs/api-reference/AdminUsersPage.tsx
new file mode 100644
index 0000000..87f457d
--- /dev/null
+++ b/web/src/pages/docs/api-reference/AdminUsersPage.tsx
@@ -0,0 +1,91 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+import { ApiEndpoint } from "../components/ApiEndpoint";
+import { ParamTable } from "../components/ParamTable";
+
+export function AdminUsersPage() {
+ return (
+
+
Admin — Users
+
+ Manage users within a tenant. Users are synchronized from Keycloak but can have additional
+ metadata (department, cost overrides) managed through the admin API.
+
+
+
+ Requires admin role. Managers can read users but cannot create or delete.
+
+
+
+ );
+}
diff --git a/web/src/pages/docs/api-reference/AuthenticationPage.tsx b/web/src/pages/docs/api-reference/AuthenticationPage.tsx
new file mode 100644
index 0000000..6836d3b
--- /dev/null
+++ b/web/src/pages/docs/api-reference/AuthenticationPage.tsx
@@ -0,0 +1,133 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+
+export function AuthenticationPage() {
+ return (
+
+
Authentication
+
+ All /v1/* endpoints require a Bearer JWT in the{" "}
+ Authorization header. Veylant IA validates the token against Keycloak (OIDC)
+ or uses a mock verifier in development mode.
+
+
+ When server.env=development and Keycloak is unreachable, the proxy uses a{" "}
+ MockVerifier. Any non-empty Bearer token is accepted. The authenticated user
+ is injected as admin@veylant.dev with admin role and tenant ID{" "}
+ dev-tenant.
+
+
+
+
Production: Keycloak OIDC Flow
+
In production, clients obtain a token via the standard OIDC Authorization Code flow:
+
+
Redirect user to Keycloak login page
+
User authenticates; Keycloak redirects back with an authorization code
+
Exchange code for tokens at the token endpoint
+
Use the access_token as the Bearer token
+
+
+
+
JWT Claims
+
The proxy extracts the following claims from the JWT:
+ );
+}
diff --git a/web/src/pages/docs/api-reference/ChatCompletionsPage.tsx b/web/src/pages/docs/api-reference/ChatCompletionsPage.tsx
new file mode 100644
index 0000000..f100505
--- /dev/null
+++ b/web/src/pages/docs/api-reference/ChatCompletionsPage.tsx
@@ -0,0 +1,194 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+import { ApiEndpoint } from "../components/ApiEndpoint";
+import { ParamTable } from "../components/ParamTable";
+
+export function ChatCompletionsPage() {
+ return (
+
+
Chat Completions
+
+ The primary inference endpoint. Fully compatible with the OpenAI Chat Completions API —
+ switch your base_url to Veylant IA and all existing SDK calls work unchanged.
+
+
+
+
+
+ This endpoint is a superset of the OpenAI Chat Completions API. All standard OpenAI
+ parameters are supported and forwarded to the upstream provider. Veylant IA adds governance
+ on top without changing the request/response schema.
+
+
+
Request Body
+
+
+
Example Request
+
+
+
PII Anonymization in Action
+
+ Before the request is forwarded to the LLM, the PII service scans all message content.
+ Detected entities are anonymized in the prompt. The audit log records what was found.
+
+
+
+
Response
+
Responses are identical to the OpenAI API format:
+
+
+
Streaming (SSE)
+
+ Set "stream": true to receive chunks via Server-Sent Events. PII
+ anonymization applies to the request before it's sent upstream — not to
+ the streamed response. This keeps streaming latency minimal.
+
+
+
+
+
+
Error Responses
+
+
+
How Routing Affects the Model
+
+ The routing engine evaluates rules against the request context. A matched rule can override
+ the requested model or select a different provider. The model in the response
+ reflects the model actually used (which may differ from what was requested).
+
+
+ Use the GET /v1/admin/logs endpoint to see model_requested vs{" "}
+ model_used in the audit trail.
+
+
+ );
+}
diff --git a/web/src/pages/docs/api-reference/PiiAnalysisPage.tsx b/web/src/pages/docs/api-reference/PiiAnalysisPage.tsx
new file mode 100644
index 0000000..2e178c0
--- /dev/null
+++ b/web/src/pages/docs/api-reference/PiiAnalysisPage.tsx
@@ -0,0 +1,136 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+import { ApiEndpoint } from "../components/ApiEndpoint";
+import { ParamTable } from "../components/ParamTable";
+
+export function PiiAnalysisPage() {
+ return (
+
+
PII Analysis
+
+ Analyze text for PII on demand. This endpoint exposes the same 3-layer detection pipeline
+ used internally for prompt anonymization — useful for testing policies, building pre-flight
+ checks, or auditing content.
+
+ Veylant IA supports zero-downtime deployments via blue/green traffic switching with Istio.
+ Two Helm releases (veylant-proxy-blue and veylant-proxy-green)
+ run simultaneously; traffic is switched via an Istio VirtualService patch.
+
+
+
+ Traffic rollback is under 5 seconds — just patch the VirtualService weight.
+ No pod restarts required.
+
+
+
Architecture
+
+
{`Internet
+ │
+Istio Gateway
+ │
+VirtualService (veylant-proxy)
+ ├── weight: 100 → Service: veylant-proxy-blue (ACTIVE)
+ └── weight: 0 → Service: veylant-proxy-green (STANDBY)
+
+Helm releases:
+ veylant-proxy-blue: 3 replicas, image: 1.0.0 ← current production
+ veylant-proxy-green: 3 replicas, image: 1.1.0 ← new version (deployed but no traffic)`}
+
+
+
Deploying a New Version
+
+
+
Rollback
+
+
+
Make Commands
+
+
+
+
+
Command
+
Action
+
+
+
+ {[
+ { cmd: "make deploy-blue IMAGE_TAG=X.Y.Z", action: "Deploy IMAGE_TAG to blue slot (Helm upgrade)" },
+ { cmd: "make deploy-green IMAGE_TAG=X.Y.Z", action: "Deploy IMAGE_TAG to green slot (Helm upgrade)" },
+ { cmd: "make deploy-rollback ACTIVE_SLOT=blue", action: "Switch 100% traffic to blue slot" },
+ { cmd: "make deploy-rollback ACTIVE_SLOT=green", action: "Switch 100% traffic to green slot" },
+ { cmd: "make helm-dry-run", action: "Render Helm templates without deploying" },
+ { cmd: "make helm-deploy IMAGE_TAG=X.Y.Z", action: "Deploy to staging (requires KUBECONFIG)" },
+ ].map((row) => (
+
+
{row.cmd}
+
{row.action}
+
+ ))}
+
+
+
+
+
Release Pipeline
+
+ Tagging a version (v*) triggers the GitHub Actions release pipeline:
+
+
+
Multi-arch Docker build (amd64/arm64) → pushed to GHCR
+
Helm chart packaged as OCI artifact → pushed to GHCR
+
GitHub Release created with CHANGELOG.md notes extracted automatically
+
Trivy image scan (CRITICAL/HIGH blocking)
+
gitleaks secret detection
+
+
+ );
+}
diff --git a/web/src/pages/docs/deployment/DockerPage.tsx b/web/src/pages/docs/deployment/DockerPage.tsx
new file mode 100644
index 0000000..2194cd9
--- /dev/null
+++ b/web/src/pages/docs/deployment/DockerPage.tsx
@@ -0,0 +1,71 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+
+export function DockerPage() {
+ return (
+
+
Docker Compose Deployment
+
+ For small to medium deployments (single server, staging), Docker Compose is the recommended
+ approach. The production configuration uses the same services as local development with
+ hardened settings.
+
+
+
Production Configuration
+
+ Ensure you have set: server.env=production, a strong crypto.key,
+ TLS certificates for all services, PostgreSQL with TLS, and proper secrets management
+ (HashiCorp Vault recommended).
+
+
+
+
+
+ );
+}
diff --git a/web/src/pages/docs/deployment/KubernetesPage.tsx b/web/src/pages/docs/deployment/KubernetesPage.tsx
new file mode 100644
index 0000000..875950c
--- /dev/null
+++ b/web/src/pages/docs/deployment/KubernetesPage.tsx
@@ -0,0 +1,117 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+
+export function KubernetesPage() {
+ return (
+
+
Kubernetes Deployment (Helm)
+
+ Veylant IA ships a production-grade Helm chart at{" "}
+ deploy/helm/veylant-proxy/. The chart includes Deployment, Service, HPA,
+ PodDisruptionBudget, and ServiceMonitor resources.
+
+
+
Prerequisites
+
+
Kubernetes 1.28+ (EKS v1.31 tested)
+
Helm 3.12+
+
Metrics Server (for HPA)
+
Prometheus Operator (optional, for ServiceMonitor)
+
Istio 1.20+ (for blue/green deployment)
+
+
+
Install the Chart
+
+
+
Production Values
+
+
+
Horizontal Pod Autoscaler
+
+ The HPA scales from 3 to 15 replicas based on CPU (70%) and memory (80%) utilization.
+ Scale-up is fast (30s stabilization); scale-down is conservative (5 minutes) to avoid
+ thrashing.
+
+
+
+
Terraform (AWS EKS)
+
+ Infrastructure as Code for a production EKS cluster is in{" "}
+ deploy/terraform/:
+
+
+
EKS v1.31, 3-AZ node groups (t3.large)
+
S3 bucket for PostgreSQL backups (7-day retention)
+
IRSA for pod-level AWS permissions
+
VPC, subnets, security groups
+
+
+
+
+ Configure remote state (S3 + DynamoDB locking) before running in production. The default
+ local state is not suitable for team use.
+
+
+ );
+}
diff --git a/web/src/pages/docs/getting-started/KeyConceptsPage.tsx b/web/src/pages/docs/getting-started/KeyConceptsPage.tsx
new file mode 100644
index 0000000..7442943
--- /dev/null
+++ b/web/src/pages/docs/getting-started/KeyConceptsPage.tsx
@@ -0,0 +1,127 @@
+import { Callout } from "../components/Callout";
+
+const concepts = [
+ {
+ term: "Tenant",
+ definition:
+ "A logical unit of isolation — typically a company or business unit. All data in PostgreSQL is isolated by tenant_id via Row-Level Security. A tenant can have multiple users, multiple routing rules, and separate cost quotas.",
+ },
+ {
+ term: "Routing Rule",
+ definition:
+ "A policy that matches incoming AI requests based on conditions (user role, department, model, token estimate, sensitivity) and routes them to a specific provider with optional fallback. Rules are sorted by priority (lower = evaluated first). First match wins.",
+ },
+ {
+ term: "PII (Personally Identifiable Information)",
+ definition:
+ "Data that can identify a person: names, email addresses, phone numbers, IBANs, SSNs, credit card numbers, etc. Veylant IA detects and anonymizes PII in prompts before they leave your network.",
+ },
+ {
+ term: "Pseudonymization",
+ definition:
+ "A reversible PII replacement technique. Detected PII tokens are replaced with synthetic identifiers (e.g., PERSON_001) and the original→synthetic mapping is stored in Redis (AES-256-GCM encrypted, TTL-based). The LLM works with the synthetic data; the response can optionally be de-pseudonymized.",
+ },
+ {
+ term: "Audit Log",
+ definition:
+ "An immutable record of every AI request: tenant, user, model, provider, token counts, cost, PII entities detected, policy matched, latency, and response status. Stored in ClickHouse (append-only). Retention via TTL policies — no DELETE operations.",
+ },
+ {
+ term: "Provider Adapter",
+ definition:
+ "A Go interface (Send, Stream, Validate, HealthCheck) implemented for each LLM provider. The routing engine selects the adapter; all adapters return OpenAI-format responses regardless of the upstream API.",
+ },
+ {
+ term: "Circuit Breaker",
+ definition:
+ "A per-provider failure counter. When failures exceed a threshold (default: 5), the breaker opens and the provider is bypassed for a TTL period (default: 60s). The fallback chain in the routing rule is used instead.",
+ },
+ {
+ term: "RBAC",
+ definition:
+ "Role-Based Access Control. Four roles: admin (full access), manager (read-write policies and users), user (inference only, restricted models), auditor (read-only logs and compliance, no inference). Roles are embedded in the Keycloak JWT.",
+ },
+ {
+ term: "Feature Flag",
+ definition:
+ "A boolean or string flag stored in PostgreSQL with an in-memory cache. Used to gate features without redeployment. Falls back to in-memory defaults if the database is unavailable.",
+ },
+ {
+ term: "GDPR Article 30",
+ definition:
+ "The GDPR requirement to maintain a Record of Processing Activities (ROPA). Veylant IA provides a built-in registry with fields for use case, legal basis, data categories, retention period, recipients, and processors.",
+ },
+ {
+ term: "EU AI Act",
+ definition:
+ "EU regulation classifying AI systems by risk level: forbidden, high, limited, or minimal. Veylant IA's compliance module helps you classify each use case through a structured questionnaire and generates PDF reports.",
+ },
+ {
+ term: "SLO (Service Level Objective)",
+ definition:
+ "Veylant IA targets 99.5% availability and p95 latency < 500ms. These are tracked in the production Grafana dashboard with an error budget that updates in real time.",
+ },
+];
+
+export function KeyConceptsPage() {
+ return (
+
+
Key Concepts
+
+ This glossary explains the core abstractions you'll encounter when working with Veylant IA.
+
+
+
+ If you're new to Veylant IA, read{" "}
+ What is Veylant IA? first, then come back here before
+ diving into the API reference or guides.
+
+
+
Glossary
+
+
+ {concepts.map((c) => (
+
+
{c.term}
+
{c.definition}
+
+ ))}
+
+
+
Request Lifecycle
+
What happens when a client sends a request to POST /v1/chat/completions:
+
+
+
{`1. Request arrives at Go proxy (:8090)
+2. RequestID middleware → generate X-Request-ID
+3. SecurityHeaders middleware → set CSP, HSTS, COOP headers
+4. CORS middleware → validate Origin header
+5. Auth middleware → validate Bearer JWT (Keycloak or mock)
+ → extract tenant_id, user_id, role, department from claims
+6. RateLimit middleware → check per-tenant token bucket (Redis)
+ → if exceeded: 429 with Retry-After header
+7. RBAC check → validate role has access to requested model
+8. Routing engine → evaluate rules (priority ASC, first match)
+ → select provider + fallback chain
+9. PII detection → gRPC call to PII service (<50ms budget)
+ → anonymize/pseudonymize prompt
+10. Circuit breaker check → skip if provider is open
+11. Provider adapter → forward to LLM (stream or batch)
+12. Audit logger → async ClickHouse write (non-blocking)
+13. Response returned to client`}
+
+
+
Multi-tenancy Model
+
+ Veylant IA uses logical isolation via PostgreSQL Row-Level Security (RLS).
+ The application connects as role veylant_app and sets{" "}
+ app.tenant_id per session using a middleware. All queries automatically filter
+ by tenant without requiring explicit WHERE clauses in application code.
+
+
+ Physical isolation (separate database instances per tenant) is a V2 feature. See the
+ feedback backlog.
+
+
+ );
+}
diff --git a/web/src/pages/docs/getting-started/QuickStartPage.tsx b/web/src/pages/docs/getting-started/QuickStartPage.tsx
new file mode 100644
index 0000000..dbd9191
--- /dev/null
+++ b/web/src/pages/docs/getting-started/QuickStartPage.tsx
@@ -0,0 +1,167 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+
+export function QuickStartPage() {
+ return (
+
+
Quick Start
+
+ This guide gets you from zero to a running Veylant IA instance in under 5 minutes using
+ Docker Compose.
+
+
+
+ You need Docker 24+ and Docker Compose v2 installed.
+ Clone the repository and ensure ports 8090, 8080, 5432, 6379, 8123, 3000, and 3001 are
+ free.
+
+
+
Step 1 — Clone the repository
+
+
+
Step 2 — Configure environment
+
+ Copy the sample environment file and add at least one LLM provider API key. For a minimal
+ setup, OpenAI is enough.
+
+
+
+
+ In server.env=development (the default), all external services degrade
+ gracefully. Keycloak is bypassed (mock JWT), PostgreSQL failures disable routing, ClickHouse
+ failures disable audit logs. This means you can start the proxy even if some services
+ haven't fully initialized yet.
+
+
+
Step 3 — Start the stack
+
+
+ This starts 9 services: PostgreSQL, Redis, ClickHouse, Keycloak, the Go proxy, PII
+ detection service, Prometheus, Grafana, and the React dashboard.
+
+
+ Wait for the proxy to print server listening on :8090. First startup takes
+ ~2 minutes while Keycloak initializes and database migrations run.
+
+ In development mode, the proxy uses a mock JWT verifier. Pass any Bearer token and the
+ request will be authenticated as admin@veylant.dev.
+
+
+
+
Or use the OpenAI Python SDK with a changed base URL:
+
+
+
Step 6 — Explore the dashboard
+
+ Open http://localhost:3000 to see the React dashboard. In development mode,
+ you're automatically logged in as Dev Admin. You'll see:
+
+
+
+ Overview — request counts, costs, and tokens consumed
+
+
+ Playground IA — test prompts with live PII detection visualization
+
+
+ Policies — create and manage routing rules
+
+
+ Compliance — GDPR Article 30 registry and AI Act questionnaire
+
+
+
+
+ Try creating a routing rule in the dashboard that sends all requests from the{" "}
+ legal department to Anthropic instead of OpenAI. See{" "}
+ Routing Rules Engine for the full guide.
+
+
+
Stop the stack
+
+
+ );
+}
diff --git a/web/src/pages/docs/getting-started/WhatIsVeylantPage.tsx b/web/src/pages/docs/getting-started/WhatIsVeylantPage.tsx
new file mode 100644
index 0000000..3bdf927
--- /dev/null
+++ b/web/src/pages/docs/getting-started/WhatIsVeylantPage.tsx
@@ -0,0 +1,172 @@
+import { Callout } from "../components/Callout";
+import { Link } from "react-router-dom";
+
+export function WhatIsVeylantPage() {
+ return (
+
+
What is Veylant IA?
+
+ Veylant IA is a B2B SaaS platform that acts as an intelligent proxy and
+ gateway for enterprise AI consumption. It sits between your organization's applications and
+ LLM providers (OpenAI, Anthropic, Azure, Mistral, Ollama), enforcing governance policies on
+ every request.
+
+
+
+ Veylant IA implements the OpenAI API format — specifically{" "}
+ /v1/chat/completions. Your existing OpenAI SDK clients work without
+ modification; just change the base URL.
+
+
+
Core Value Proposition
+
Four problems Veylant IA solves for enterprise IT and compliance teams:
+
+
+ {[
+ {
+ title: "Shadow AI Prevention",
+ desc: "Every AI call flows through the proxy — no direct provider access possible. Full audit trail of who sent what to which model.",
+ },
+ {
+ title: "PII Anonymization",
+ desc: "3-layer detection (regex + spaCy NER + LLM validation) anonymizes sensitive data before it leaves your perimeter. <50ms latency.",
+ },
+ {
+ title: "GDPR & EU AI Act Compliance",
+ desc: "Built-in Article 30 processing registry, AI Act risk classification, DPIA templates, and subject access/erasure rights.",
+ },
+ {
+ title: "Cost Control",
+ desc: "Per-tenant and per-user token budgets, circuit breakers per provider, cost breakdown by department and model.",
+ },
+ ].map((card) => (
+
+
{card.title}
+
{card.desc}
+
+ ))}
+
+
+
Architecture Overview
+
+ Veylant IA is a modular monolith (not microservices) with two distinct
+ runtimes:
+
+ The core of Veylant IA. Written in Go 1.24, it handles all incoming AI requests, applies
+ governance policies, and routes them to the appropriate LLM provider. It exposes:
+
+ A Python FastAPI + gRPC service running 3 detection layers in under 50ms. Anonymizes or
+ pseudonymizes PII in prompts before they reach the upstream LLM. Pseudonymized mappings are
+ stored in Redis (AES-256-GCM encrypted, TTL-based).
+
+
+
Routing Engine
+
+ Rules stored in PostgreSQL (JSONB conditions), cached in memory. Routes requests to
+ providers based on user role, department, model requested, sensitivity score, token
+ estimate, and more. First-match wins; lower priority number = evaluated first.
+
PostgreSQL unreachable → routing disabled, feature flags use in-memory defaults
+
ClickHouse unreachable → audit logging disabled
+
PII service unreachable → PII skipped if fail_open=true
+
+ In production mode, any of the above causes a fatal startup error.
+
+
+ );
+}
diff --git a/web/src/pages/docs/guides/ComplianceGuide.tsx b/web/src/pages/docs/guides/ComplianceGuide.tsx
new file mode 100644
index 0000000..b58d802
--- /dev/null
+++ b/web/src/pages/docs/guides/ComplianceGuide.tsx
@@ -0,0 +1,154 @@
+import { Callout } from "../components/Callout";
+import { CodeBlock } from "../components/CodeBlock";
+import { Link } from "react-router-dom";
+
+export function ComplianceGuide() {
+ return (
+
+
GDPR & EU AI Act Compliance
+
+ Veylant IA includes a built-in compliance module for GDPR Article 30 record-keeping and EU
+ AI Act risk classification. It is designed to serve as your primary compliance tool for AI
+ deployments.
+
+
+
GDPR Article 30 — Record of Processing Activities
+
+ Article 30 requires organizations to maintain a written record of all data processing
+ activities. For AI systems, this means documenting each use case where personal data may be
+ processed.
+
+ A DPIA is mandatory under GDPR Art. 35 for high-risk processing activities. High-risk AI
+ systems under the AI Act also trigger DPIA requirements. Veylant IA generates DPIA template
+ documents from the Admin → Compliance → Reports tab.
+
+
+
Compliance Reports
+
Available report formats via the API:
+
+
+
+ All accesses to compliance reports and audit logs are themselves logged. This satisfies
+ data protection authority requirements for meta-logging of sensitive data access.
+
+
+
Working with Compliance
+
+ See the Admin — Compliance API for full
+ endpoint documentation, or navigate to{" "}
+ Dashboard → Compliance to use the visual interface.
+
+
+ );
+}
diff --git a/web/src/pages/docs/guides/MonitoringGuide.tsx b/web/src/pages/docs/guides/MonitoringGuide.tsx
new file mode 100644
index 0000000..5d3b8e5
--- /dev/null
+++ b/web/src/pages/docs/guides/MonitoringGuide.tsx
@@ -0,0 +1,162 @@
+import { Callout } from "../components/Callout";
+import { CodeBlock } from "../components/CodeBlock";
+
+export function MonitoringGuide() {
+ return (
+
+
Monitoring & Alerting
+
+ Veylant IA exposes Prometheus metrics and ships pre-built Grafana dashboards and
+ Alertmanager rules for production operations.
+
+
+
Prometheus Metrics
+
+ The proxy exposes metrics at GET /metrics (Prometheus text format, scraped
+ every 15 seconds).
+
+ );
+}
diff --git a/web/src/pages/docs/guides/PiiGuide.tsx b/web/src/pages/docs/guides/PiiGuide.tsx
new file mode 100644
index 0000000..4b651d6
--- /dev/null
+++ b/web/src/pages/docs/guides/PiiGuide.tsx
@@ -0,0 +1,147 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+
+export function PiiGuide() {
+ return (
+
+
PII Detection & Anonymization
+
+ Veylant IA intercepts all AI prompts and runs a 3-layer PII detection pipeline before
+ forwarding to the LLM. The entire pipeline must complete in under 50ms.
+
+
+
Detection Pipeline
+
+
{`Incoming prompt text
+ │
+ ▼
+Layer 1: Regex (< 1ms)
+ ─ IBAN: /[A-Z]{2}\\d{2}[A-Z0-9]{11,30}/
+ ─ Email: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}/
+ ─ Phone: /(?:\\+33|0)[1-9](?:[\\s.-]?\\d{2}){4}/
+ ─ SSN: /\\d{1}\\s?\\d{2}\\s?\\d{2}\\s?\\d{2}\\s?\\d{3}\\s?\\d{3}\\s?\\d{2}/
+ ─ Credit card: /\\d{4}[\\s-]?\\d{4}[\\s-]?\\d{4}[\\s-]?\\d{4}/
+ │
+ ▼
+Layer 2: Presidio + spaCy NER (15–40ms)
+ ─ PERSON: "John Smith", "Marie Dupont"
+ ─ LOCATION: "Paris", "75001", "rue de Rivoli"
+ ─ ORGANIZATION: "Acme Corp", "Banque de France"
+ ─ DATE_TIME: "12/03/1985"
+ ─ NRP: nationality, religion, political group
+ │
+ ▼
+Layer 3: LLM validation (optional, V1.1)
+ ─ For ambiguous cases scored 0.5–0.8
+ ─ Sends short validation prompts to a fast LLM
+ ─ Corrects false positives and false negatives
+ │
+ ▼
+Anonymized prompt → forwarded to LLM provider`}
+
+
+
Anonymization vs Pseudonymization
+
Veylant IA supports two modes:
+
+
+
+
Anonymization (default)
+
+ PII is replaced with a type label. No mapping stored. Irreversible.
+
+
+
+
+
Pseudonymization
+
+ PII is replaced with a synthetic token. Mapping stored in Redis (encrypted, TTL-based).
+ Reversible.
+
+
+
+
+
+
Redis Pseudonymization Mapping
+
+ When pseudonymization is enabled, the PII service stores mappings in Redis using the
+ following key structure:
+
+
+
+ If the PII service is unreachable and pii.fail_open=true (default in
+ development), requests are forwarded without anonymization and a warning is logged. In
+ production, set pii.fail_open=false to return 503 instead.
+
+
+
Disabling PII per Request
+
+ There is no per-request PII bypass. PII is controlled globally via the{" "}
+ pii_detection feature flag or the pii.enabled config key.
+ Fine-grained per-rule PII control is planned for V1.1.
+
+
+
Testing PII Detection
+
+
+
+
+ );
+}
diff --git a/web/src/pages/docs/guides/RbacGuide.tsx b/web/src/pages/docs/guides/RbacGuide.tsx
new file mode 100644
index 0000000..a559ba7
--- /dev/null
+++ b/web/src/pages/docs/guides/RbacGuide.tsx
@@ -0,0 +1,126 @@
+import { Callout } from "../components/Callout";
+import { CodeBlock } from "../components/CodeBlock";
+
+export function RbacGuide() {
+ return (
+
+
RBAC & Permissions
+
+ Veylant IA enforces Role-Based Access Control on every request. Roles are embedded in the
+ Keycloak JWT and cannot be elevated at runtime.
+
+
+
Roles
+
+ {[
+ {
+ role: "admin",
+ color: "bg-red-100 dark:bg-red-900/40 text-red-700 dark:text-red-300",
+ description: "Full access. Can manage policies, users, providers, feature flags, and read all compliance/audit data. Has unrestricted model access.",
+ },
+ {
+ role: "manager",
+ color: "bg-amber-100 dark:bg-amber-900/40 text-amber-700 dark:text-amber-300",
+ description: "Read-write access to routing policies and users. Can run AI inference with any model. Cannot manage feature flags or access compliance reports.",
+ },
+ {
+ role: "user",
+ color: "bg-blue-100 dark:bg-blue-900/40 text-blue-700 dark:text-blue-300",
+ description: "Inference only. Restricted to the model list in rbac.user_allowed_models (default: gpt-4o-mini, mistral-medium). No admin API access.",
+ },
+ {
+ role: "auditor",
+ color: "bg-purple-100 dark:bg-purple-900/40 text-purple-700 dark:text-purple-300",
+ description: "Read-only access to audit logs and compliance data. Cannot call /v1/chat/completions. Intended for compliance officers and DPOs.",
+ },
+ ].map((item) => (
+
+ Users with the user role can only access models listed in{" "}
+ rbac.user_allowed_models. Requests to other models are rejected with 403:
+
+
+
+
+
+ The admin and manager roles have unrestricted model access —{" "}
+ user_allowed_models does not apply to them.
+
+
+
Setting Up Roles in Keycloak
+
Assign roles to users in Keycloak:
+
+
Log in to Keycloak Admin Console (http://localhost:8080, admin/admin)
+
Go to Realm: veylant → Users
+
Select a user → Role Mappings → Realm Roles
+
Assign one of: admin, manager, user, auditor
+
+
+ );
+}
diff --git a/web/src/pages/docs/guides/RoutingGuide.tsx b/web/src/pages/docs/guides/RoutingGuide.tsx
new file mode 100644
index 0000000..bf30ee6
--- /dev/null
+++ b/web/src/pages/docs/guides/RoutingGuide.tsx
@@ -0,0 +1,146 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+
+export function RoutingGuide() {
+ return (
+
+
Routing Rules Engine
+
+ The routing engine matches incoming AI requests against a prioritized list of rules and
+ dispatches them to the appropriate LLM provider. Rules are stored in PostgreSQL (JSONB
+ conditions) and cached in memory.
+
+
+
Evaluation Order
+
+
Rules are sorted ascending by priority (lower number = evaluated first)
+
First match wins — once a rule matches, evaluation stops
+
All conditions within a rule are AND-joined
+
An empty conditions array is a catch-all that matches everything
+
+
+
+ Use gaps between priorities (10, 20, 30, ...) so you can insert rules later without
+ renumbering. The catch-all rule should have the highest priority number (e.g. 999).
+
+
+
+ The fallback_providers array defines ordered fallbacks. The routing engine
+ tries providers in order:
+
+
+
Check if primary provider's circuit breaker is open → skip if open
+
Try primary provider → if error, try next fallback
+
Continue through the fallback chain
+
If all providers fail → return 503
+
+
+
Rule Caching
+
+ Rules are loaded from PostgreSQL and cached in memory. The cache is invalidated when a
+ rule is created, updated, or deleted via the admin API. You can also force a reload:
+
+
+
+
+ The in-memory rule cache refreshes every 60 seconds from PostgreSQL. Admin API changes
+ (create/update/delete) invalidate the cache immediately via a shared channel.
+
+
+ );
+}
diff --git a/web/src/pages/docs/installation/ConfigurationPage.tsx b/web/src/pages/docs/installation/ConfigurationPage.tsx
new file mode 100644
index 0000000..5846eee
--- /dev/null
+++ b/web/src/pages/docs/installation/ConfigurationPage.tsx
@@ -0,0 +1,152 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+import { ParamTable } from "../components/ParamTable";
+
+export function ConfigurationPage() {
+ return (
+
+
Configuration Reference
+
+ Veylant IA is configured via config.yaml at the repository root. Any key can
+ be overridden via an environment variable using the VEYLANT_ prefix and
+ replacing . with _.
+
+ );
+}
diff --git a/web/src/pages/docs/installation/DockerComposePage.tsx b/web/src/pages/docs/installation/DockerComposePage.tsx
new file mode 100644
index 0000000..1fc5ba4
--- /dev/null
+++ b/web/src/pages/docs/installation/DockerComposePage.tsx
@@ -0,0 +1,133 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+
+export function DockerComposePage() {
+ return (
+
+
Docker Compose Setup
+
+ The recommended way to run Veylant IA locally or in a single-server staging environment is
+ Docker Compose. The full stack is defined in docker-compose.yml at the
+ repository root.
+
Go proxy (waits for PostgreSQL, uses service_started for others)
+
React web (waits for proxy)
+
Prometheus → Grafana (monitoring)
+
+
+
+ The proxy Docker image uses distroless/static — no shell, no{" "}
+ wget. Services that depend on the proxy use{" "}
+ condition: service_started rather than a health check command.
+
+
+
First Run: Database Migrations
+
+ On first start, the proxy automatically applies PostgreSQL migrations (9 migration files)
+ and ClickHouse DDL. You can also run migrations manually:
+
+
+
+
Protocol Buffer Generation
+
+ If the gen/ or services/pii/gen/ directories are missing (e.g.,
+ fresh clone), regenerate the gRPC stubs before starting:
+
+
+
+
+ The PII service starts but rejects all gRPC requests if services/pii/gen/ is
+ missing. Run make proto first.
+
+
+
Viewing Logs
+
+
+ );
+}
diff --git a/web/src/pages/docs/installation/ProvidersPage.tsx b/web/src/pages/docs/installation/ProvidersPage.tsx
new file mode 100644
index 0000000..91bfce1
--- /dev/null
+++ b/web/src/pages/docs/installation/ProvidersPage.tsx
@@ -0,0 +1,116 @@
+import { CodeBlock } from "../components/CodeBlock";
+import { Callout } from "../components/Callout";
+
+export function ProvidersPage() {
+ return (
+
+
Provider Setup
+
+ Veylant IA supports 5 LLM providers out of the box. Each provider implements the{" "}
+ provider.Adapter interface (Send, Stream,{" "}
+ Validate, HealthCheck).
+
+
+
+ Implement the provider.Adapter interface in{" "}
+ internal/provider/<name>/ and register it in{" "}
+ cmd/proxy/main.go. No other changes needed.
+
+
+
+ Azure requires a deployment name instead of a model name. The proxy maps OpenAI model IDs
+ to Azure deployment names via the routing rule's target_model field.
+
+ Ollama requires no API key. Ensure the Ollama server is reachable from the proxy container.
+
+
+
+
+ Ollama automatically uses GPU if available. For Docker, add{" "}
+ --gpus all to the container runtime command or use the{" "}
+ deploy.resources.reservations.devices key in docker-compose.yml.
+
+
+
Check Provider Status
+
+ The admin API exposes circuit breaker state for all providers:
+
+ Veylant IA never stores plain-text API keys. Provider keys (OpenAI, Anthropic, etc.) are
+ stored in HashiCorp Vault and rotated on a 90-day cycle. User-facing API keys use a
+ prefix+hash scheme.
+
+
+
API Key Format
+
+ Veylant IA user API keys follow the format sk-vyl_{"{prefix}"}{"{hash}"}:
+
+
+
+
The display prefix (ab12cd34) is stored in plaintext for key identification in the dashboard
+
The full key is SHA-256 hashed; only the hash is stored in PostgreSQL
+
If a key is compromised, only the prefix reveals which key to revoke — not the key value
+
+
+
Provider API Keys
+
+ Provider API keys (OpenAI, Anthropic, etc.) must never be committed to the repository.
+ Use environment variables or HashiCorp Vault.
+
+
+
+
+
Key Rotation
+
Provider API keys are rotated on a 90-day cycle via Vault:
+
+
Generate new API key from the provider portal
+
Write new key to Vault: vault kv patch secret/veylant/providers openai_api_key=sk-new...
+
Vault agent syncs the new value to running pods automatically (no restart needed)
+
Revoke the old key from the provider portal
+
+
+
Secret Detection in CI
+
+ Every commit is scanned by gitleaks for accidentally committed secrets.
+ Any string matching common API key patterns (starting with sk-,{" "}
+ sk-ant-, etc.) blocks the CI pipeline.
+
+
+
+
Key Usage Auditing
+
+ Every AI request records which API key (by prefix) was used in the ClickHouse audit log.
+ This allows you to trace the source of any request to a specific service or developer.
+
+
+
+ );
+}
diff --git a/web/src/pages/docs/security/SecurityModelPage.tsx b/web/src/pages/docs/security/SecurityModelPage.tsx
new file mode 100644
index 0000000..2d4bcf2
--- /dev/null
+++ b/web/src/pages/docs/security/SecurityModelPage.tsx
@@ -0,0 +1,125 @@
+import { Callout } from "../components/Callout";
+
+export function SecurityModelPage() {
+ return (
+
+
Security Model
+
+ Veylant IA is designed with a Zero Trust security model. Every component assumes the
+ network is hostile and authenticates and authorizes each request independently.
+
+
+
Zero Trust Architecture
+
+ {[
+ {
+ title: "mTLS Between Services",
+ desc: "All internal service-to-service communication uses mutual TLS. The proxy, PII service, PostgreSQL, Redis, and ClickHouse all authenticate each other via certificates.",
+ },
+ {
+ title: "TLS 1.3 Externally",
+ desc: "External traffic uses TLS 1.3 minimum. TLS 1.0 and 1.1 are disabled at the Traefik/nginx gateway level.",
+ },
+ {
+ title: "JWT Validation",
+ desc: "Every API request carries a signed JWT. The proxy validates the signature against Keycloak's JWKS endpoint on every request (cached with TTL).",
+ },
+ {
+ title: "Network Policies",
+ desc: "Kubernetes NetworkPolicies restrict pod-to-pod communication. Only the proxy can reach the PII service; only Prometheus can scrape /metrics.",
+ },
+ ].map((item) => (
+
+
{item.title}
+
{item.desc}
+
+ ))}
+
+
+
Encryption at Rest
+
+
+ Prompt storage — Encrypted with AES-256-GCM using the{" "}
+ crypto.key config value (application-level, independent of disk encryption)
+
+
+ PII pseudonymization mappings — Encrypted in Redis with AES-256-GCM per
+ mapping entry
+
+
+ API keys — Stored as SHA-256 hashes only. The prefix (e.g.{" "}
+ sk-vyl_ab12cd34) is kept for display, but the full key is never stored
+
+
+ HashiCorp Vault — Provider API keys and the crypto key are stored in
+ Vault; 90-day rotation cycle
+
+
+
+
Audit-of-the-Audit
+
+ All accesses to audit logs and compliance reports are themselves logged. This two-level
+ audit trail satisfies requirements for sensitive data access monitoring.
+
+
+
Custom Security Rules (SAST)
+
+ CI enforces custom Semgrep rules (.semgrep.yml) that catch common security
+ issues specific to this codebase:
+
+
+
+
+
+
Rule
+
What it catches
+
+
+
+ {[
+ { rule: "context-background-in-handler", catches: "context.Background() in HTTP handlers — use r.Context() to propagate cancellation" },
+ { rule: "sql-string-concat", catches: "SQL string concatenation — use parameterized queries ($1, $2, ...)" },
+ { rule: "sensitive-field-in-log", catches: "Logging password, api_key, token, secret, Authorization, email, prompt" },
+ { rule: "hardcoded-api-key", catches: "String literals starting with sk- hardcoded in source" },
+ { rule: "request-body-without-limit", catches: "json.NewDecoder(r.Body) without http.MaxBytesReader" },
+ { rule: "python-eval-exec", catches: "eval() or exec() on variables in the PII service" },
+ ].map((row) => (
+
+
{row.rule}
+
{row.catches}
+
+ ))}
+
+
+
+
+
Penetration Test Results (v1.0.0)
+
+ Grey-box penetration test completed June 9–20, 2026. Results:
+
+
+ Critical: 0
+
+
+ High: 0
+
+
+ Medium: 2 (remediated before launch)
+
+
+ Low: 3 (accepted risk, backlog)
+
+
+ Full report available to enterprise customers under NDA.
+
+
+