From 40d917e160dc6fd5bff8163eb42d8c08d486a75b Mon Sep 17 00:00:00 2001 From: David Date: Mon, 6 Apr 2026 20:13:17 +0200 Subject: [PATCH] chore(ci): remove smoke tests from preprod and prod pipelines Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/cd-main.yml | 42 +- .github/workflows/cd-preprod.yml | 37 +- .../controllers/health.controller.ts | 2 + .../dashboards/xpeditis-logs.json | 670 ++++++++---------- .../grafana/provisioning/datasources/loki.yml | 2 +- infra/logging/promtail/promtail-config.yml | 2 +- 6 files changed, 293 insertions(+), 462 deletions(-) diff --git a/.github/workflows/cd-main.yml b/.github/workflows/cd-main.yml index f8f5236..5633e39 100644 --- a/.github/workflows/cd-main.yml +++ b/.github/workflows/cd-main.yml @@ -10,7 +10,7 @@ name: CD Production # If someone merges to main without going through preprod, # this step fails and the deployment is blocked. # -# Flow: quality-gate → verify-image → promote → deploy → smoke-tests → notify +# Flow: quality-gate → verify-image → promote → deploy → notify # # Secrets required: # REGISTRY_TOKEN — Scaleway registry (read/write) @@ -231,47 +231,11 @@ jobs: kubectl rollout status deployment/xpeditis-frontend -n ${{ env.K8S_NAMESPACE }} --timeout=120s echo "Rollback complete. Previous version is live." - # ── 5. Smoke Tests ─────────────────────────────────────────────────── - # kubectl rollout status already verified pod readiness. - # These smoke tests validate the full network path: - # Cloudflare → Hetzner LB → Traefik → pod. - smoke-tests: - name: Smoke Tests - runs-on: ubuntu-latest - needs: deploy - steps: - - name: Wait for LB propagation - run: sleep 30 - - - name: Health — Backend - run: | - for i in {1..12}; do - STATUS=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 \ - "${{ secrets.PROD_BACKEND_URL }}/api/v1/health" 2>/dev/null || echo "000") - echo " Attempt $i: HTTP $STATUS" - if [ "$STATUS" = "200" ]; then echo "Backend OK."; exit 0; fi - sleep 15 - done - echo "CRITICAL: Backend unreachable after 12 attempts." - exit 1 - - - name: Health — Frontend - run: | - for i in {1..12}; do - STATUS=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 \ - "${{ secrets.PROD_FRONTEND_URL }}" 2>/dev/null || echo "000") - echo " Attempt $i: HTTP $STATUS" - if [ "$STATUS" = "200" ]; then echo "Frontend OK."; exit 0; fi - sleep 15 - done - echo "CRITICAL: Frontend unreachable after 12 attempts." - exit 1 - # ── Notifications ──────────────────────────────────────────────────── notify-success: name: Notify Success runs-on: ubuntu-latest - needs: [verify-image, smoke-tests] + needs: [verify-image, deploy] if: success() steps: - run: | @@ -292,7 +256,7 @@ jobs: notify-failure: name: Notify Failure runs-on: ubuntu-latest - needs: [backend-quality, frontend-quality, backend-tests, frontend-tests, verify-image, promote-images, deploy, smoke-tests] + needs: [backend-quality, frontend-quality, backend-tests, frontend-tests, verify-image, promote-images, deploy] if: failure() steps: - run: | diff --git a/.github/workflows/cd-preprod.yml b/.github/workflows/cd-preprod.yml index 1b8887e..9da16bf 100644 --- a/.github/workflows/cd-preprod.yml +++ b/.github/workflows/cd-preprod.yml @@ -1,7 +1,7 @@ name: CD Preprod # Full pipeline triggered on every push to preprod. -# Flow: lint → unit tests → integration tests → docker build → deploy → smoke tests → notify +# Flow: lint → unit tests → integration tests → docker build → deploy → notify # # Secrets required: # REGISTRY_TOKEN — Scaleway registry (read/write) @@ -274,42 +274,11 @@ jobs: fi echo "Frontend webhook triggered." - # ── 6. Smoke Tests ─────────────────────────────────────────────────── - smoke-tests: - name: Smoke Tests - runs-on: ubuntu-latest - needs: deploy - steps: - - name: Wait for services - run: sleep 40 - - name: Health — Backend - run: | - for i in {1..12}; do - STATUS=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 \ - "${{ secrets.PREPROD_BACKEND_URL }}/api/v1/health" 2>/dev/null || echo "000") - echo " Attempt $i: HTTP $STATUS" - if [ "$STATUS" = "200" ]; then echo "Backend OK."; exit 0; fi - sleep 15 - done - echo "Backend unreachable after 12 attempts." - exit 1 - - name: Health — Frontend - run: | - for i in {1..12}; do - STATUS=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 \ - "${{ secrets.PREPROD_FRONTEND_URL }}" 2>/dev/null || echo "000") - echo " Attempt $i: HTTP $STATUS" - if [ "$STATUS" = "200" ]; then echo "Frontend OK."; exit 0; fi - sleep 15 - done - echo "Frontend unreachable after 12 attempts." - exit 1 - # ── Notifications ──────────────────────────────────────────────────── notify-success: name: Notify Success runs-on: ubuntu-latest - needs: [build-backend, build-frontend, smoke-tests] + needs: [build-backend, build-frontend, deploy] if: success() steps: - run: | @@ -329,7 +298,7 @@ jobs: notify-failure: name: Notify Failure runs-on: ubuntu-latest - needs: [backend-quality, frontend-quality, backend-tests, frontend-tests, integration-tests, build-backend, build-frontend, deploy, smoke-tests] + needs: [backend-quality, frontend-quality, backend-tests, frontend-tests, integration-tests, build-backend, build-frontend, deploy] if: failure() steps: - run: | diff --git a/apps/backend/src/application/controllers/health.controller.ts b/apps/backend/src/application/controllers/health.controller.ts index 1952eec..67991b5 100644 --- a/apps/backend/src/application/controllers/health.controller.ts +++ b/apps/backend/src/application/controllers/health.controller.ts @@ -1,6 +1,8 @@ import { Controller, Get } from '@nestjs/common'; import { ApiTags, ApiOperation, ApiResponse } from '@nestjs/swagger'; +import { Public } from '../decorators/public.decorator'; +@Public() @ApiTags('health') @Controller('health') export class HealthController { diff --git a/infra/logging/grafana/provisioning/dashboards/xpeditis-logs.json b/infra/logging/grafana/provisioning/dashboards/xpeditis-logs.json index 96e624f..c930170 100644 --- a/infra/logging/grafana/provisioning/dashboards/xpeditis-logs.json +++ b/infra/logging/grafana/provisioning/dashboards/xpeditis-logs.json @@ -1,32 +1,37 @@ { - "title": "Xpeditis — Logs & Monitoring", - "uid": "xpeditis-logs", - "description": "Dashboard complet — logs backend/frontend, métriques HTTP, erreurs", - "tags": ["xpeditis", "logs", "backend", "frontend"], - "timezone": "browser", + "__inputs": [ + { + "name": "DS_LOKI", + "label": "Loki", + "description": "Loki datasource", + "type": "datasource", + "pluginId": "loki", + "pluginName": "Loki" + } + ], + "__requires": [ + { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "11.0.0" }, + { "type": "datasource", "id": "loki", "name": "Loki", "version": "1.0.0" }, + { "type": "panel", "id": "stat", "name": "Stat", "version": "" }, + { "type": "panel", "id": "timeseries", "name": "Time series", "version": "" }, + { "type": "panel", "id": "piechart", "name": "Pie chart", "version": "" }, + { "type": "panel", "id": "bargauge", "name": "Bar gauge", "version": "" }, + { "type": "panel", "id": "logs", "name": "Logs", "version": "" } + ], + "title": "Xpeditis — Logs & KPIs", + "uid": "xpeditis-logs-kpis", + "description": "Logs applicatifs, KPIs HTTP, temps de réponse et erreurs — Backend & Frontend", + "tags": ["xpeditis", "logs", "monitoring", "backend"], + "timezone": "Europe/Paris", "refresh": "30s", - "schemaVersion": 38, + "schemaVersion": 39, "time": { "from": "now-1h", "to": "now" }, "timepicker": {}, - "fiscalYearStartMonth": 0, "graphTooltip": 1, "editable": true, "version": 1, - "weekStart": "", "links": [], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { "type": "grafana", "uid": "-- Grafana --" }, - "enable": true, - "hide": true, - "iconColor": "rgba(0,211,255,1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, + "annotations": { "list": [] }, "templating": { "list": [ @@ -34,119 +39,99 @@ "name": "service", "label": "Service", "type": "query", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "query": "label_values(service)", "refresh": 2, - "sort": 1, "includeAll": true, "allValue": ".+", - "multi": false, - "hide": 0, + "multi": true, "current": {}, - "options": [] + "hide": 0, + "sort": 1 }, { "name": "level", "label": "Niveau", - "type": "custom", - "query": "All : .+, error : error, fatal : fatal, warn : warn, info : info, debug : debug", - "includeAll": false, - "multi": false, + "type": "query", + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "query": "label_values(level)", + "refresh": 2, + "includeAll": true, + "allValue": ".+", + "multi": true, + "current": {}, "hide": 0, - "current": { "text": "All", "value": ".+" }, - "options": [ - { "text": "All", "value": ".+", "selected": true }, - { "text": "error", "value": "error", "selected": false }, - { "text": "fatal", "value": "fatal", "selected": false }, - { "text": "warn", "value": "warn", "selected": false }, - { "text": "info", "value": "info", "selected": false }, - { "text": "debug", "value": "debug", "selected": false } - ] - }, - { - "name": "search", - "label": "Recherche", - "type": "textbox", - "query": "", - "hide": 0, - "current": { "text": "", "value": "" }, - "options": [{ "selected": true, "text": "", "value": "" }] + "sort": 1 } ] }, "panels": [ - { - "id": 100, - "type": "row", - "title": "Vue d'ensemble", - "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 } - }, - { "id": 1, - "title": "Total logs", + "title": "Requêtes totales", "type": "stat", - "gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "gridPos": { "x": 0, "y": 0, "w": 6, "h": 4 }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "options": { - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { "calcs": ["sum"], "fields": "", "values": false }, "orientation": "auto", "textMode": "auto", "colorMode": "background", - "graphMode": "area", + "graphMode": "none", "justifyMode": "center" }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, - "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] }, - "mappings": [] + "color": { "mode": "fixed", "fixedColor": "#10183A" }, + "unit": "short", + "thresholds": { "mode": "absolute", "steps": [{ "color": "#10183A", "value": null }] } }, "overrides": [] }, "targets": [ { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "sum(count_over_time({service=~\"$service\"} [$__range]))", - "legendFormat": "Total", - "instant": true + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "expr": "sum(count_over_time({service=~\"$service\"} | json | req_method != \"\" [$__range]))", + "legendFormat": "Requêtes", + "instant": true, + "range": false, + "refId": "A" } ] }, { "id": 2, - "title": "Erreurs & Fatal", + "title": "Erreurs (error + fatal)", "type": "stat", - "gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "gridPos": { "x": 6, "y": 0, "w": 6, "h": 4 }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "options": { - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { "calcs": ["sum"], "fields": "", "values": false }, "orientation": "auto", "textMode": "auto", "colorMode": "background", - "graphMode": "area", + "graphMode": "none", "justifyMode": "center" }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, - "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }, - "mappings": [] + "color": { "mode": "fixed", "fixedColor": "red" }, + "unit": "short", + "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }] } }, "overrides": [] }, "targets": [ { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "expr": "sum(count_over_time({service=~\"$service\", level=~\"error|fatal\"} [$__range]))", "legendFormat": "Erreurs", - "instant": true + "instant": true, + "range": false, + "refId": "A" } ] }, @@ -155,342 +140,342 @@ "id": 3, "title": "Warnings", "type": "stat", - "gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "gridPos": { "x": 12, "y": 0, "w": 6, "h": 4 }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "options": { - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { "calcs": ["sum"], "fields": "", "values": false }, "orientation": "auto", "textMode": "auto", "colorMode": "background", - "graphMode": "area", + "graphMode": "none", "justifyMode": "center" }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, - "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 1 }] }, - "mappings": [] + "color": { "mode": "fixed", "fixedColor": "orange" }, + "unit": "short", + "thresholds": { "mode": "absolute", "steps": [{ "color": "orange", "value": null }] } }, "overrides": [] }, "targets": [ { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "expr": "sum(count_over_time({service=~\"$service\", level=\"warn\"} [$__range]))", "legendFormat": "Warnings", - "instant": true + "instant": true, + "range": false, + "refId": "A" } ] }, { "id": 4, - "title": "Info", + "title": "Taux d'erreur", "type": "stat", - "gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "gridPos": { "x": 18, "y": 0, "w": 6, "h": 4 }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "orientation": "auto", "textMode": "auto", "colorMode": "background", - "graphMode": "area", + "graphMode": "none", "justifyMode": "center" }, "fieldConfig": { "defaults": { - "color": { "fixedColor": "blue", "mode": "fixed" }, - "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] }, - "mappings": [] + "unit": "percentunit", + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "orange", "value": 0.01 }, + { "color": "red", "value": 0.05 } + ] + }, + "color": { "mode": "thresholds" } }, "overrides": [] }, "targets": [ { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "sum(count_over_time({service=~\"$service\", level=\"info\"} [$__range]))", - "legendFormat": "Info", - "instant": true + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "expr": "sum(rate({service=~\"$service\", level=~\"error|fatal\"} [$__interval])) / sum(rate({service=~\"$service\"} [$__interval]))", + "legendFormat": "Taux d'erreur", + "instant": false, + "range": true, + "refId": "A" } ] }, { "id": 5, - "title": "Requêtes HTTP 5xx", - "type": "stat", - "gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "title": "Trafic par service (req/s)", + "type": "timeseries", + "gridPos": { "x": 0, "y": 4, "w": 12, "h": 8 }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "options": { - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "orientation": "auto", - "textMode": "auto", - "colorMode": "background", - "graphMode": "area", - "justifyMode": "center" + "tooltip": { "mode": "multi", "sort": "desc" }, + "legend": { "displayMode": "list", "placement": "bottom" } }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, - "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }, - "mappings": [] + "unit": "reqps", + "color": { "mode": "palette-classic" }, + "custom": { + "lineWidth": 2, + "fillOpacity": 10, + "gradientMode": "opacity", + "spanNulls": false + } }, "overrides": [] }, "targets": [ { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "sum(count_over_time({service=\"backend\"} | json | res_statusCode >= 500 [$__range]))", - "legendFormat": "5xx", - "instant": true + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "expr": "sum by(service) (rate({service=~\"$service\"} | json | req_method != \"\" [$__interval]))", + "legendFormat": "{{service}}", + "instant": false, + "range": true, + "refId": "A" } ] }, { "id": 6, - "title": "Temps réponse moyen (ms)", - "type": "stat", - "gridPos": { "h": 4, "w": 4, "x": 20, "y": 1 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "title": "Erreurs & Warnings dans le temps", + "type": "timeseries", + "gridPos": { "x": 12, "y": 4, "w": 12, "h": 8 }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "options": { - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "orientation": "auto", - "textMode": "auto", - "colorMode": "background", - "graphMode": "area", - "justifyMode": "center" + "tooltip": { "mode": "multi", "sort": "desc" }, + "legend": { "displayMode": "list", "placement": "bottom" } }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, - "unit": "ms", - "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 500 }, { "color": "red", "value": 2000 }] }, - "mappings": [] + "unit": "short", + "color": { "mode": "palette-classic" }, + "custom": { + "lineWidth": 2, + "fillOpacity": 15, + "gradientMode": "opacity" + } }, - "overrides": [] + "overrides": [ + { + "matcher": { "id": "byName", "options": "error" }, + "properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "red" } }] + }, + { + "matcher": { "id": "byName", "options": "fatal" }, + "properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "dark-red" } }] + }, + { + "matcher": { "id": "byName", "options": "warn" }, + "properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "orange" } }] + } + ] }, "targets": [ { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "avg(avg_over_time({service=\"backend\"} | json | unwrap responseTime [$__range]))", - "legendFormat": "Avg", - "instant": true + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "expr": "sum by(level) (rate({service=~\"$service\", level=~\"error|fatal|warn\"} [$__interval]))", + "legendFormat": "{{level}}", + "instant": false, + "range": true, + "refId": "A" } ] }, - { - "id": 200, - "type": "row", - "title": "Volume des logs", - "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 } - }, - { "id": 7, - "title": "Volume par niveau", + "title": "Temps de réponse Backend", "type": "timeseries", - "gridPos": { "h": 8, "w": 14, "x": 0, "y": 6 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "gridPos": { "x": 0, "y": 12, "w": 16, "h": 8 }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "options": { - "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "multi", "sort": "desc" } + "tooltip": { "mode": "multi", "sort": "desc" }, + "legend": { "displayMode": "list", "placement": "bottom" } }, "fieldConfig": { "defaults": { + "unit": "ms", "color": { "mode": "palette-classic" }, "custom": { - "drawStyle": "bars", - "fillOpacity": 80, - "stacking": { "group": "A", "mode": "normal" }, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false + "lineWidth": 2, + "fillOpacity": 8, + "gradientMode": "opacity" }, - "unit": "short", - "mappings": [], - "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "orange", "value": 500 }, + { "color": "red", "value": 1000 } + ] + } }, "overrides": [ - { "matcher": { "id": "byName", "options": "error" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] }, - { "matcher": { "id": "byName", "options": "fatal" }, "properties": [{ "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } }] }, - { "matcher": { "id": "byName", "options": "warn" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }, - { "matcher": { "id": "byName", "options": "info" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } }] }, - { "matcher": { "id": "byName", "options": "debug" }, "properties": [{ "id": "color", "value": { "fixedColor": "gray", "mode": "fixed" } }] } + { + "matcher": { "id": "byName", "options": "Pire cas (1% des requêtes)" }, + "properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "red" } }] + }, + { + "matcher": { "id": "byName", "options": "Lent (5% des requêtes)" }, + "properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "orange" } }] + }, + { + "matcher": { "id": "byName", "options": "Temps médian (requête typique)" }, + "properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "#34CCCD" } }] + } ] }, "targets": [ { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "sum by (level) (count_over_time({service=~\"$service\", level=~\".+\"} [$__interval]))", - "legendFormat": "{{level}}" + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "expr": "quantile_over_time(0.50, {service=\"backend\"} | json | responseTime > 0 | unwrap responseTime [$__interval])", + "legendFormat": "Temps médian (requête typique)", + "instant": false, + "range": true, + "refId": "A" + }, + { + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "expr": "quantile_over_time(0.95, {service=\"backend\"} | json | responseTime > 0 | unwrap responseTime [$__interval])", + "legendFormat": "Lent (5% des requêtes)", + "instant": false, + "range": true, + "refId": "B" + }, + { + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "expr": "quantile_over_time(0.99, {service=\"backend\"} | json | responseTime > 0 | unwrap responseTime [$__interval])", + "legendFormat": "Pire cas (1% des requêtes)", + "instant": false, + "range": true, + "refId": "C" } ] }, { "id": 8, - "title": "Volume par service", - "type": "timeseries", - "gridPos": { "h": 8, "w": 10, "x": 14, "y": 6 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "title": "Répartition par niveau de log", + "type": "piechart", + "gridPos": { "x": 16, "y": 12, "w": 8, "h": 8 }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "options": { - "legend": { "calcs": ["sum"], "displayMode": "list", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "multi", "sort": "desc" } + "pieType": "donut", + "tooltip": { "mode": "single" }, + "legend": { "displayMode": "list", "placement": "bottom", "values": ["percent"] } }, "fieldConfig": { - "defaults": { - "color": { "mode": "palette-classic" }, - "custom": { - "drawStyle": "bars", - "fillOpacity": 60, - "stacking": { "group": "A", "mode": "normal" }, - "lineWidth": 1, - "showPoints": "never", - "spanNulls": false - }, - "unit": "short", - "mappings": [], - "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } - }, - "overrides": [] + "defaults": { "unit": "short", "color": { "mode": "palette-classic" } }, + "overrides": [ + { "matcher": { "id": "byName", "options": "error" }, "properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "red" } }] }, + { "matcher": { "id": "byName", "options": "fatal" }, "properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "dark-red" } }] }, + { "matcher": { "id": "byName", "options": "warn" }, "properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "orange" } }] }, + { "matcher": { "id": "byName", "options": "info" }, "properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "#34CCCD" } }] }, + { "matcher": { "id": "byName", "options": "debug" }, "properties": [{ "id": "color", "value": { "mode": "fixed", "fixedColor": "blue" } }] } + ] }, "targets": [ { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "sum by (service) (count_over_time({service=~\"$service\"} [$__interval]))", - "legendFormat": "{{service}}" + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "expr": "sum by(level) (count_over_time({service=~\"$service\", level=~\"$level\"} [$__range]))", + "legendFormat": "{{level}}", + "instant": true, + "range": false, + "refId": "A" } ] }, - { - "id": 300, - "type": "row", - "title": "HTTP — Backend", - "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 } - }, - { "id": 9, - "title": "Taux d'erreur HTTP", - "type": "timeseries", - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "title": "Codes HTTP (5m)", + "type": "bargauge", + "gridPos": { "x": 0, "y": 20, "w": 12, "h": 8 }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "options": { - "legend": { "calcs": ["max", "mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "multi", "sort": "desc" } + "orientation": "horizontal", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "displayMode": "gradient", + "valueMode": "color", + "showUnfilled": true, + "minVizWidth": 10, + "minVizHeight": 10 }, "fieldConfig": { "defaults": { - "color": { "mode": "palette-classic" }, - "custom": { - "drawStyle": "line", - "fillOpacity": 20, - "lineWidth": 2, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false - }, "unit": "short", - "mappings": [], - "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } + "color": { "mode": "palette-classic" }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "orange", "value": 1 } + ] + } }, - "overrides": [ - { "matcher": { "id": "byName", "options": "5xx" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] }, - { "matcher": { "id": "byName", "options": "4xx" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] }, - { "matcher": { "id": "byName", "options": "2xx" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] } - ] + "overrides": [] }, "targets": [ { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "sum(count_over_time({service=\"backend\"} | json | res_statusCode >= 500 [$__interval]))", - "legendFormat": "5xx" - }, - { - "refId": "B", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "sum(count_over_time({service=\"backend\"} | json | res_statusCode >= 400 < 500 [$__interval]))", - "legendFormat": "4xx" - }, - { - "refId": "C", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "sum(count_over_time({service=\"backend\"} | json | res_statusCode >= 200 < 300 [$__interval]))", - "legendFormat": "2xx" + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "expr": "sum by(status_code) (count_over_time({service=\"backend\"} | json | res_statusCode != \"\" | label_format status_code=\"{{res_statusCode}}\" [$__range]))", + "legendFormat": "HTTP {{status_code}}", + "instant": true, + "range": false, + "refId": "A" } ] }, { "id": 10, - "title": "Temps de réponse (ms)", - "type": "timeseries", - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "title": "Top erreurs par contexte NestJS", + "type": "bargauge", + "gridPos": { "x": 12, "y": 20, "w": 12, "h": 8 }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "options": { - "legend": { "calcs": ["max", "mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "multi", "sort": "desc" } + "orientation": "horizontal", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "displayMode": "gradient", + "showUnfilled": true }, "fieldConfig": { "defaults": { - "color": { "mode": "palette-classic" }, - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 2, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false - }, - "unit": "ms", - "mappings": [], - "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "orange", "value": 500 }, { "color": "red", "value": 2000 }] } + "unit": "short", + "color": { "mode": "fixed", "fixedColor": "red" }, + "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }] } }, "overrides": [] }, "targets": [ { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "avg(avg_over_time({service=\"backend\"} | json | unwrap responseTime [$__interval]))", - "legendFormat": "Moy" - }, - { - "refId": "B", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "max(max_over_time({service=\"backend\"} | json | unwrap responseTime [$__interval]))", - "legendFormat": "Max" + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "expr": "topk(10, sum by(context) (count_over_time({service=\"backend\", level=~\"error|fatal\"} | json | context != \"\" [$__range]) ))", + "legendFormat": "{{context}}", + "instant": true, + "range": false, + "refId": "A" } ] }, - { - "id": 400, - "type": "row", - "title": "Logs — Flux en direct", - "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 } - }, - { "id": 11, - "title": "Backend — Logs", + "title": "Logs — Backend", "type": "logs", - "gridPos": { "h": 14, "w": 12, "x": 0, "y": 24 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "gridPos": { "x": 0, "y": 28, "w": 24, "h": 12 }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "options": { "dedupStrategy": "none", "enableLogDetails": true, @@ -503,24 +488,27 @@ }, "targets": [ { + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "expr": "{service=\"backend\", level=~\"$level\"}", + "legendFormat": "", + "instant": false, + "range": true, "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "{service=\"backend\", level=~\"$level\"} |= \"$search\"", - "legendFormat": "" + "maxLines": 500 } ] }, { "id": 12, - "title": "Frontend — Logs", + "title": "Logs — Frontend", "type": "logs", - "gridPos": { "h": 14, "w": 12, "x": 12, "y": 24 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, + "gridPos": { "x": 0, "y": 40, "w": 24, "h": 10 }, + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, "options": { "dedupStrategy": "none", "enableLogDetails": true, - "prettifyLogMessage": true, + "prettifyLogMessage": false, "showCommonLabels": false, "showLabels": false, "showTime": true, @@ -529,105 +517,13 @@ }, "targets": [ { + "datasource": { "type": "loki", "uid": "${DS_LOKI}" }, + "expr": "{service=\"frontend\"}", + "legendFormat": "", + "instant": false, + "range": true, "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "{service=\"frontend\", level=~\"$level\"} |= \"$search\"", - "legendFormat": "" - } - ] - }, - - { - "id": 500, - "type": "row", - "title": "Tous les logs filtrés", - "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 38 } - }, - - { - "id": 13, - "title": "Flux filtré — $service / $level", - "description": "Utilisez les variables en haut pour filtrer par service, niveau ou mot-clé", - "type": "logs", - "gridPos": { "h": 14, "w": 24, "x": 0, "y": 39 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "options": { - "dedupStrategy": "none", - "enableLogDetails": true, - "prettifyLogMessage": true, - "showCommonLabels": false, - "showLabels": true, - "showTime": true, - "sortOrder": "Descending", - "wrapLogMessage": true - }, - "targets": [ - { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "{service=~\"$service\", level=~\"$level\"} |= \"$search\"", - "legendFormat": "" - } - ] - }, - - { - "id": 600, - "type": "row", - "title": "Erreurs & Exceptions", - "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 53 } - }, - - { - "id": 14, - "title": "Erreurs — Backend", - "type": "logs", - "gridPos": { "h": 10, "w": 12, "x": 0, "y": 54 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "options": { - "dedupStrategy": "signature", - "enableLogDetails": true, - "prettifyLogMessage": true, - "showCommonLabels": false, - "showLabels": false, - "showTime": true, - "sortOrder": "Descending", - "wrapLogMessage": true - }, - "targets": [ - { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "{service=\"backend\", level=~\"error|fatal\"}", - "legendFormat": "" - } - ] - }, - - { - "id": 15, - "title": "Erreurs — Frontend", - "type": "logs", - "gridPos": { "h": 10, "w": 12, "x": 12, "y": 54 }, - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "options": { - "dedupStrategy": "signature", - "enableLogDetails": true, - "prettifyLogMessage": true, - "showCommonLabels": false, - "showLabels": false, - "showTime": true, - "sortOrder": "Descending", - "wrapLogMessage": true - }, - "targets": [ - { - "refId": "A", - "datasource": { "type": "loki", "uid": "loki-xpeditis" }, - "expr": "{service=\"frontend\", level=~\"error|fatal\"}", - "legendFormat": "" + "maxLines": 200 } ] } diff --git a/infra/logging/grafana/provisioning/datasources/loki.yml b/infra/logging/grafana/provisioning/datasources/loki.yml index b3102e9..3d48bde 100644 --- a/infra/logging/grafana/provisioning/datasources/loki.yml +++ b/infra/logging/grafana/provisioning/datasources/loki.yml @@ -5,7 +5,7 @@ datasources: uid: loki-xpeditis type: loki access: proxy - url: http://loki:3100 + url: http://xpeditis-loki:3100 isDefault: true version: 1 editable: false diff --git a/infra/logging/promtail/promtail-config.yml b/infra/logging/promtail/promtail-config.yml index 67c5b6b..df36f4a 100644 --- a/infra/logging/promtail/promtail-config.yml +++ b/infra/logging/promtail/promtail-config.yml @@ -21,7 +21,7 @@ scrape_configs: values: ['logging=promtail'] relabel_configs: - - source_labels: ['__meta_docker_container_label_com_docker_compose_service'] + - source_labels: ['__meta_docker_container_label_logging_service'] target_label: service - source_labels: ['__meta_docker_container_name'] regex: '/?(.*)'