veylant/deploy/alertmanager/alertmanager.yml
2026-02-23 13:35:04 +01:00

133 lines
5.6 KiB
YAML

global:
# Default timeout for receivers.
resolve_timeout: 5m
# Slack default settings (overridden per receiver if needed).
slack_api_url: "https://hooks.slack.com/services/PLACEHOLDER"
# Templates for Slack message formatting.
templates:
- "/etc/alertmanager/templates/*.tmpl"
# ──────────────────────────────────────────────────────────────────────────────
# Routing tree
# ──────────────────────────────────────────────────────────────────────────────
route:
# Default receiver: all alerts go to Slack unless matched by a child route.
receiver: slack-default
# Group alerts by alert name and provider to avoid alert spam.
group_by: [alertname, provider]
# Wait 30s before sending the first notification (allows grouping).
group_wait: 30s
# Wait 5m before sending a notification about new alerts in an existing group.
group_interval: 5m
# Resend a notification every 4h if the alert is still firing.
repeat_interval: 4h
routes:
# Critical alerts → PagerDuty (on-call escalation).
- match:
severity: critical
receiver: pagerduty
# Critical alerts bypass grouping delays — notify immediately.
group_wait: 10s
repeat_interval: 1h
continue: false
# Warning alerts → dedicated Slack channel.
- match:
severity: warning
receiver: slack-warnings
continue: false
# ──────────────────────────────────────────────────────────────────────────────
# Inhibition rules
# ──────────────────────────────────────────────────────────────────────────────
inhibit_rules:
# If a critical alert fires for a provider, suppress warnings for the same provider.
# Avoids noise when a provider is fully down (circuit breaker + latency fire together).
- source_match:
severity: critical
target_match:
severity: warning
equal: [provider]
# If ProxyDown fires, suppress all other alerts (proxy is the root cause).
- source_match:
alertname: VeylantProxyDown
target_match_re:
alertname: ".+"
equal: []
# ──────────────────────────────────────────────────────────────────────────────
# Receivers
# ──────────────────────────────────────────────────────────────────────────────
receivers:
# Default Slack channel — catch-all for uncategorised alerts.
- name: slack-default
slack_configs:
- channel: "#veylant-alerts"
send_resolved: true
username: "Veylant Alertmanager"
icon_emoji: ":warning:"
title: >-
{{ if eq .Status "firing" }}🔴{{ else }}✅{{ end }}
[{{ .Status | toUpper }}] {{ .CommonLabels.alertname }}
text: >-
{{ range .Alerts }}
*Alert:* {{ .Annotations.summary }}
*Description:* {{ .Annotations.description }}
*Provider:* {{ .Labels.provider | default "N/A" }}
*Severity:* {{ .Labels.severity }}
*Runbook:* {{ .Annotations.runbook | default "N/A" }}
{{ end }}
# Warning channel — operational warnings, lower urgency.
- name: slack-warnings
slack_configs:
- channel: "#veylant-warnings"
send_resolved: true
username: "Veylant Alertmanager"
icon_emoji: ":yellow_circle:"
title: >-
{{ if eq .Status "firing" }}🟡{{ else }}✅{{ end }}
[{{ .Status | toUpper }}] {{ .CommonLabels.alertname }}
text: >-
{{ range .Alerts }}
*Alert:* {{ .Annotations.summary }}
*Description:* {{ .Annotations.description }}
*Runbook:* {{ .Annotations.runbook | default "N/A" }}
{{ end }}
# PagerDuty — critical on-call escalation.
- name: pagerduty
pagerduty_configs:
- routing_key: "${PAGERDUTY_INTEGRATION_KEY}"
severity: >-
{{ if eq .CommonLabels.severity "critical" }}critical{{ else }}warning{{ end }}
description: "{{ .CommonAnnotations.summary }}"
details:
alertname: "{{ .CommonLabels.alertname }}"
provider: "{{ .CommonLabels.provider }}"
description: "{{ .CommonAnnotations.description }}"
runbook: "{{ .CommonAnnotations.runbook }}"
# Also notify Slack for visibility.
slack_configs:
- channel: "#veylant-critical"
send_resolved: true
username: "Veylant Alertmanager"
icon_emoji: ":red_circle:"
title: >-
{{ if eq .Status "firing" }}🚨 CRITICAL{{ else }}✅ RESOLVED{{ end }}:
{{ .CommonLabels.alertname }}
text: >-
*PagerDuty escalated.*
{{ range .Alerts }}
*Summary:* {{ .Annotations.summary }}
*Description:* {{ .Annotations.description }}
*Runbook:* {{ .Annotations.runbook | default "N/A" }}
{{ end }}