global: # Default timeout for receivers. resolve_timeout: 5m # Slack default settings (overridden per receiver if needed). slack_api_url: "https://hooks.slack.com/services/PLACEHOLDER" # Templates for Slack message formatting. templates: - "/etc/alertmanager/templates/*.tmpl" # ────────────────────────────────────────────────────────────────────────────── # Routing tree # ────────────────────────────────────────────────────────────────────────────── route: # Default receiver: all alerts go to Slack unless matched by a child route. receiver: slack-default # Group alerts by alert name and provider to avoid alert spam. group_by: [alertname, provider] # Wait 30s before sending the first notification (allows grouping). group_wait: 30s # Wait 5m before sending a notification about new alerts in an existing group. group_interval: 5m # Resend a notification every 4h if the alert is still firing. repeat_interval: 4h routes: # Critical alerts → PagerDuty (on-call escalation). - match: severity: critical receiver: pagerduty # Critical alerts bypass grouping delays — notify immediately. group_wait: 10s repeat_interval: 1h continue: false # Warning alerts → dedicated Slack channel. - match: severity: warning receiver: slack-warnings continue: false # ────────────────────────────────────────────────────────────────────────────── # Inhibition rules # ────────────────────────────────────────────────────────────────────────────── inhibit_rules: # If a critical alert fires for a provider, suppress warnings for the same provider. # Avoids noise when a provider is fully down (circuit breaker + latency fire together). - source_match: severity: critical target_match: severity: warning equal: [provider] # If ProxyDown fires, suppress all other alerts (proxy is the root cause). - source_match: alertname: VeylantProxyDown target_match_re: alertname: ".+" equal: [] # ────────────────────────────────────────────────────────────────────────────── # Receivers # ────────────────────────────────────────────────────────────────────────────── receivers: # Default Slack channel — catch-all for uncategorised alerts. - name: slack-default slack_configs: - channel: "#veylant-alerts" send_resolved: true username: "Veylant Alertmanager" icon_emoji: ":warning:" title: >- {{ if eq .Status "firing" }}🔴{{ else }}✅{{ end }} [{{ .Status | toUpper }}] {{ .CommonLabels.alertname }} text: >- {{ range .Alerts }} *Alert:* {{ .Annotations.summary }} *Description:* {{ .Annotations.description }} *Provider:* {{ .Labels.provider | default "N/A" }} *Severity:* {{ .Labels.severity }} *Runbook:* {{ .Annotations.runbook | default "N/A" }} {{ end }} # Warning channel — operational warnings, lower urgency. - name: slack-warnings slack_configs: - channel: "#veylant-warnings" send_resolved: true username: "Veylant Alertmanager" icon_emoji: ":yellow_circle:" title: >- {{ if eq .Status "firing" }}🟡{{ else }}✅{{ end }} [{{ .Status | toUpper }}] {{ .CommonLabels.alertname }} text: >- {{ range .Alerts }} *Alert:* {{ .Annotations.summary }} *Description:* {{ .Annotations.description }} *Runbook:* {{ .Annotations.runbook | default "N/A" }} {{ end }} # PagerDuty — critical on-call escalation. - name: pagerduty pagerduty_configs: - routing_key: "${PAGERDUTY_INTEGRATION_KEY}" severity: >- {{ if eq .CommonLabels.severity "critical" }}critical{{ else }}warning{{ end }} description: "{{ .CommonAnnotations.summary }}" details: alertname: "{{ .CommonLabels.alertname }}" provider: "{{ .CommonLabels.provider }}" description: "{{ .CommonAnnotations.description }}" runbook: "{{ .CommonAnnotations.runbook }}" # Also notify Slack for visibility. slack_configs: - channel: "#veylant-critical" send_resolved: true username: "Veylant Alertmanager" icon_emoji: ":red_circle:" title: >- {{ if eq .Status "firing" }}🚨 CRITICAL{{ else }}✅ RESOLVED{{ end }}: {{ .CommonLabels.alertname }} text: >- *PagerDuty escalated.* {{ range .Alerts }} *Summary:* {{ .Annotations.summary }} *Description:* {{ .Annotations.description }} *Runbook:* {{ .Annotations.runbook | default "N/A" }} {{ end }}