veylant/deploy/k8s/production/postgres-backup.yaml
2026-02-23 13:35:04 +01:00

120 lines
4.4 KiB
YAML

apiVersion: batch/v1
kind: CronJob
metadata:
name: veylant-postgres-backup
namespace: veylant
labels:
app.kubernetes.io/name: veylant-postgres-backup
app.kubernetes.io/component: backup
spec:
# Run daily at 02:00 UTC — off-peak for EU West.
schedule: "0 2 * * *"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 7
failedJobsHistoryLimit: 3
jobTemplate:
spec:
# Retry once on failure before marking as failed.
backoffLimit: 1
template:
metadata:
labels:
app.kubernetes.io/name: veylant-postgres-backup
annotations:
# Vault Agent Injector — inject secrets from Vault.
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "veylant-backup"
vault.hashicorp.com/agent-inject-secret-db: "secret/veylant/production/db"
vault.hashicorp.com/agent-inject-template-db: |
{{- with secret "secret/veylant/production/db" -}}
export PGPASSWORD="{{ .Data.data.password }}"
export PGUSER="{{ .Data.data.username }}"
export PGHOST="{{ .Data.data.host }}"
export PGDATABASE="{{ .Data.data.dbname }}"
{{- end }}
vault.hashicorp.com/agent-inject-secret-aws: "secret/veylant/production/aws"
vault.hashicorp.com/agent-inject-template-aws: |
{{- with secret "secret/veylant/production/aws" -}}
export AWS_ACCESS_KEY_ID="{{ .Data.data.access_key_id }}"
export AWS_SECRET_ACCESS_KEY="{{ .Data.data.secret_access_key }}"
export AWS_DEFAULT_REGION="{{ .Data.data.region }}"
{{- end }}
spec:
restartPolicy: OnFailure
serviceAccountName: veylant-backup
securityContext:
runAsNonRoot: true
runAsUser: 999
fsGroup: 999
containers:
- name: pg-backup
image: postgres:16-alpine
imagePullPolicy: IfNotPresent
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 256Mi
env:
- name: S3_BUCKET
value: "veylant-backups-production"
- name: BACKUP_PREFIX
value: "postgres"
command:
- /bin/sh
- -c
- |
set -euo pipefail
# Load secrets injected by Vault Agent.
source /vault/secrets/db
source /vault/secrets/aws
# Install AWS CLI (not in postgres:16-alpine by default).
apk add --no-cache aws-cli 2>/dev/null || true
TIMESTAMP=$(date -u +"%Y%m%d_%H%M%S")
FILENAME="${BACKUP_PREFIX}_${TIMESTAMP}.sql.gz"
S3_PATH="s3://${S3_BUCKET}/${BACKUP_PREFIX}/${FILENAME}"
echo "[$(date -u)] Starting backup: ${FILENAME}"
# Dump and compress — pipe directly to S3 without storing locally.
pg_dump \
--host="${PGHOST}" \
--username="${PGUSER}" \
--dbname="${PGDATABASE}" \
--format=plain \
--no-password \
--verbose \
| gzip -9 \
| aws s3 cp - "${S3_PATH}" \
--storage-class STANDARD_IA \
--metadata "created-by=veylant-backup,db=${PGDATABASE}"
echo "[$(date -u)] Backup completed: ${S3_PATH}"
# Verify the upload is readable.
aws s3 ls "${S3_PATH}" || { echo "Upload verification failed"; exit 1; }
echo "[$(date -u)] Backup verified successfully."
---
# S3 Lifecycle policy is managed in Terraform (deploy/terraform/main.tf).
# Retention: 7 daily backups kept automatically via S3 lifecycle rules.
# Manual restore: aws s3 cp s3://veylant-backups-production/postgres/<file> - | gunzip | psql
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: veylant-backup
namespace: veylant
labels:
app.kubernetes.io/name: veylant-backup
annotations:
# AWS IRSA — IAM role for S3 write access (created in Terraform).
eks.amazonaws.com/role-arn: "arn:aws:iam::ACCOUNT_ID:role/veylant-backup-role"