257 lines
6.8 KiB
JSON
257 lines
6.8 KiB
JSON
{
|
|
"title": "Veylant — Production SLO & Error Budget",
|
|
"uid": "veylant-production-slo",
|
|
"schemaVersion": 38,
|
|
"version": 1,
|
|
"refresh": "1m",
|
|
"time": { "from": "now-30d", "to": "now" },
|
|
"tags": ["slo", "production", "veylant"],
|
|
"panels": [
|
|
{
|
|
"id": 1,
|
|
"title": "Uptime SLO — 30-day rolling (target: 99.5%)",
|
|
"type": "gauge",
|
|
"gridPos": { "h": 8, "w": 6, "x": 0, "y": 0 },
|
|
"options": {
|
|
"reduceOptions": { "calcs": ["lastNotNull"] },
|
|
"orientation": "auto",
|
|
"showThresholdLabels": true,
|
|
"showThresholdMarkers": true
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percentunit",
|
|
"min": 0.99,
|
|
"max": 1,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "red", "value": null },
|
|
{ "color": "yellow", "value": 0.995 },
|
|
{ "color": "green", "value": 0.999 }
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "1 - (sum(increase(veylant_request_errors_total[30d])) / sum(increase(veylant_requests_total[30d])))",
|
|
"legendFormat": "Uptime SLO"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 2,
|
|
"title": "Error Budget Remaining (minutes)",
|
|
"description": "SLO target: 99.5% uptime over 30 days = 216 min allowed downtime",
|
|
"type": "stat",
|
|
"gridPos": { "h": 8, "w": 6, "x": 6, "y": 0 },
|
|
"options": {
|
|
"reduceOptions": { "calcs": ["lastNotNull"] },
|
|
"colorMode": "background"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "m",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "red", "value": null },
|
|
{ "color": "yellow", "value": 43 },
|
|
{ "color": "green", "value": 108 }
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "(0.005 * 30 * 24 * 60) - (sum(increase(veylant_request_errors_total[30d])) / sum(increase(veylant_requests_total[30d])) * 30 * 24 * 60)",
|
|
"legendFormat": "Budget remaining (min)"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 3,
|
|
"title": "p99 Latency SLO (target: < 500ms)",
|
|
"type": "gauge",
|
|
"gridPos": { "h": 8, "w": 6, "x": 12, "y": 0 },
|
|
"options": {
|
|
"reduceOptions": { "calcs": ["lastNotNull"] },
|
|
"orientation": "auto",
|
|
"showThresholdMarkers": true
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 0.3 },
|
|
{ "color": "red", "value": 0.5 }
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "histogram_quantile(0.99, sum by (le) (rate(veylant_request_duration_seconds_bucket[5m])))",
|
|
"legendFormat": "p99 latency"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 4,
|
|
"title": "Active Alerts",
|
|
"type": "stat",
|
|
"gridPos": { "h": 8, "w": 6, "x": 18, "y": 0 },
|
|
"options": {
|
|
"reduceOptions": { "calcs": ["lastNotNull"] },
|
|
"colorMode": "background"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 1 },
|
|
{ "color": "red", "value": 3 }
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(ALERTS{alertstate=\"firing\",job=~\"veylant.*\"})",
|
|
"legendFormat": "Firing alerts"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 5,
|
|
"title": "PII Entities Detected — Rate by Type (per min)",
|
|
"type": "timeseries",
|
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
|
|
"targets": [
|
|
{
|
|
"expr": "sum by (entity_type) (rate(veylant_pii_entities_detected_total[1m])) * 60",
|
|
"legendFormat": "{{ entity_type }}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "short",
|
|
"custom": { "lineWidth": 2 }
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 6,
|
|
"title": "PostgreSQL Active Connections",
|
|
"type": "timeseries",
|
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
|
|
"targets": [
|
|
{
|
|
"expr": "veylant_db_connections_active",
|
|
"legendFormat": "Active connections"
|
|
},
|
|
{
|
|
"expr": "veylant_db_connections_idle",
|
|
"legendFormat": "Idle connections"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "short",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 15 },
|
|
{ "color": "red", "value": 20 }
|
|
]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 7,
|
|
"title": "Provider RPS Breakdown",
|
|
"type": "piechart",
|
|
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 16 },
|
|
"options": {
|
|
"pieType": "donut",
|
|
"displayLabels": ["name", "percent"]
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum by (provider) (rate(veylant_requests_total[5m]))",
|
|
"legendFormat": "{{ provider }}"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 8,
|
|
"title": "Provider RPS — Time Series",
|
|
"type": "timeseries",
|
|
"gridPos": { "h": 8, "w": 16, "x": 8, "y": 16 },
|
|
"targets": [
|
|
{
|
|
"expr": "sum by (provider) (rate(veylant_requests_total[1m]))",
|
|
"legendFormat": "{{ provider }}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "reqps",
|
|
"custom": { "lineWidth": 2 }
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 9,
|
|
"title": "Redis Memory Usage %",
|
|
"type": "timeseries",
|
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 },
|
|
"targets": [
|
|
{
|
|
"expr": "redis_memory_used_bytes / redis_memory_max_bytes * 100",
|
|
"legendFormat": "Redis memory %"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 70 },
|
|
{ "color": "red", "value": 90 }
|
|
]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 10,
|
|
"title": "Error Rate by Provider (5m avg)",
|
|
"type": "timeseries",
|
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 },
|
|
"targets": [
|
|
{
|
|
"expr": "veylant:error_rate:5m * 100",
|
|
"legendFormat": "{{ provider }} error %"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"custom": { "lineWidth": 2 }
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|