133 lines
5.6 KiB
YAML
133 lines
5.6 KiB
YAML
global:
|
|
# Default timeout for receivers.
|
|
resolve_timeout: 5m
|
|
# Slack default settings (overridden per receiver if needed).
|
|
slack_api_url: "https://hooks.slack.com/services/PLACEHOLDER"
|
|
|
|
# Templates for Slack message formatting.
|
|
templates:
|
|
- "/etc/alertmanager/templates/*.tmpl"
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Routing tree
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
route:
|
|
# Default receiver: all alerts go to Slack unless matched by a child route.
|
|
receiver: slack-default
|
|
|
|
# Group alerts by alert name and provider to avoid alert spam.
|
|
group_by: [alertname, provider]
|
|
|
|
# Wait 30s before sending the first notification (allows grouping).
|
|
group_wait: 30s
|
|
|
|
# Wait 5m before sending a notification about new alerts in an existing group.
|
|
group_interval: 5m
|
|
|
|
# Resend a notification every 4h if the alert is still firing.
|
|
repeat_interval: 4h
|
|
|
|
routes:
|
|
# Critical alerts → PagerDuty (on-call escalation).
|
|
- match:
|
|
severity: critical
|
|
receiver: pagerduty
|
|
# Critical alerts bypass grouping delays — notify immediately.
|
|
group_wait: 10s
|
|
repeat_interval: 1h
|
|
continue: false
|
|
|
|
# Warning alerts → dedicated Slack channel.
|
|
- match:
|
|
severity: warning
|
|
receiver: slack-warnings
|
|
continue: false
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Inhibition rules
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
inhibit_rules:
|
|
# If a critical alert fires for a provider, suppress warnings for the same provider.
|
|
# Avoids noise when a provider is fully down (circuit breaker + latency fire together).
|
|
- source_match:
|
|
severity: critical
|
|
target_match:
|
|
severity: warning
|
|
equal: [provider]
|
|
|
|
# If ProxyDown fires, suppress all other alerts (proxy is the root cause).
|
|
- source_match:
|
|
alertname: VeylantProxyDown
|
|
target_match_re:
|
|
alertname: ".+"
|
|
equal: []
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Receivers
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
receivers:
|
|
# Default Slack channel — catch-all for uncategorised alerts.
|
|
- name: slack-default
|
|
slack_configs:
|
|
- channel: "#veylant-alerts"
|
|
send_resolved: true
|
|
username: "Veylant Alertmanager"
|
|
icon_emoji: ":warning:"
|
|
title: >-
|
|
{{ if eq .Status "firing" }}🔴{{ else }}✅{{ end }}
|
|
[{{ .Status | toUpper }}] {{ .CommonLabels.alertname }}
|
|
text: >-
|
|
{{ range .Alerts }}
|
|
*Alert:* {{ .Annotations.summary }}
|
|
*Description:* {{ .Annotations.description }}
|
|
*Provider:* {{ .Labels.provider | default "N/A" }}
|
|
*Severity:* {{ .Labels.severity }}
|
|
*Runbook:* {{ .Annotations.runbook | default "N/A" }}
|
|
{{ end }}
|
|
|
|
# Warning channel — operational warnings, lower urgency.
|
|
- name: slack-warnings
|
|
slack_configs:
|
|
- channel: "#veylant-warnings"
|
|
send_resolved: true
|
|
username: "Veylant Alertmanager"
|
|
icon_emoji: ":yellow_circle:"
|
|
title: >-
|
|
{{ if eq .Status "firing" }}🟡{{ else }}✅{{ end }}
|
|
[{{ .Status | toUpper }}] {{ .CommonLabels.alertname }}
|
|
text: >-
|
|
{{ range .Alerts }}
|
|
*Alert:* {{ .Annotations.summary }}
|
|
*Description:* {{ .Annotations.description }}
|
|
*Runbook:* {{ .Annotations.runbook | default "N/A" }}
|
|
{{ end }}
|
|
|
|
# PagerDuty — critical on-call escalation.
|
|
- name: pagerduty
|
|
pagerduty_configs:
|
|
- routing_key: "${PAGERDUTY_INTEGRATION_KEY}"
|
|
severity: >-
|
|
{{ if eq .CommonLabels.severity "critical" }}critical{{ else }}warning{{ end }}
|
|
description: "{{ .CommonAnnotations.summary }}"
|
|
details:
|
|
alertname: "{{ .CommonLabels.alertname }}"
|
|
provider: "{{ .CommonLabels.provider }}"
|
|
description: "{{ .CommonAnnotations.description }}"
|
|
runbook: "{{ .CommonAnnotations.runbook }}"
|
|
# Also notify Slack for visibility.
|
|
slack_configs:
|
|
- channel: "#veylant-critical"
|
|
send_resolved: true
|
|
username: "Veylant Alertmanager"
|
|
icon_emoji: ":red_circle:"
|
|
title: >-
|
|
{{ if eq .Status "firing" }}🚨 CRITICAL{{ else }}✅ RESOLVED{{ end }}:
|
|
{{ .CommonLabels.alertname }}
|
|
text: >-
|
|
*PagerDuty escalated.*
|
|
{{ range .Alerts }}
|
|
*Summary:* {{ .Annotations.summary }}
|
|
*Description:* {{ .Annotations.description }}
|
|
*Runbook:* {{ .Annotations.runbook | default "N/A" }}
|
|
{{ end }}
|