#!/usr/bin/env bash # deploy/scripts/blue-green.sh # # Atomic blue/green deployment for Veylant IA proxy. # Rollback time: < 5s (single kubectl patch on the Istio VirtualService). # # Strategy: # 1. Detect which slot is currently active (blue|green) from the VirtualService. # 2. Deploy the new image tag to the INACTIVE slot via helm upgrade. # 3. Wait for the inactive slot's rollout to complete. # 4. Smoke-test the inactive slot via a temp port-forward. # 5. Switch 100% traffic to the new slot (patch VirtualService). # 6. Verify health post-switch; roll back if verification fails. # 7. Scale down the old slot to 0 replicas to free resources. # # Required env vars: # IMAGE_TAG — Docker image tag to deploy (e.g. sha-abc123) # NAMESPACE — Kubernetes namespace (default: veylant) # KUBECONFIG — path to kubeconfig (uses default if not set) # # Optional env vars: # ROLLOUT_TIMEOUT — kubectl rollout wait timeout (default: 5m) # SMOKE_RETRIES — health check retries after switch (default: 5) # DRY_RUN — set to "true" to print commands without executing set -euo pipefail # ── Config ──────────────────────────────────────────────────────────────────── IMAGE_TAG="${IMAGE_TAG:?IMAGE_TAG is required}" NAMESPACE="${NAMESPACE:-veylant}" ROLLOUT_TIMEOUT="${ROLLOUT_TIMEOUT:-5m}" SMOKE_RETRIES="${SMOKE_RETRIES:-5}" DRY_RUN="${DRY_RUN:-false}" CHART_PATH="deploy/helm/veylant-proxy" # ── Helpers ─────────────────────────────────────────────────────────────────── log() { echo "[blue-green] $*"; } die() { echo "[blue-green] ERROR: $*" >&2; exit 1; } run() { if [[ "$DRY_RUN" == "true" ]]; then echo "[dry-run] $*" else "$@" fi } # ── Step 1: Detect active slot ──────────────────────────────────────────────── log "Detecting active slot from VirtualService..." ACTIVE_SLOT=$(kubectl get virtualservice veylant-proxy -n "$NAMESPACE" -o jsonpath='{.spec.http[0].route[0].destination.subset}' 2>/dev/null || echo "blue") if [[ "$ACTIVE_SLOT" == "blue" ]]; then INACTIVE_SLOT="green" else INACTIVE_SLOT="blue" fi log "Active slot: ${ACTIVE_SLOT} → deploying to INACTIVE slot: ${INACTIVE_SLOT}" HELM_RELEASE="veylant-proxy-${INACTIVE_SLOT}" VALUES_FILE="${CHART_PATH}/values-${INACTIVE_SLOT}.yaml" # ── Step 2: Deploy to inactive slot ────────────────────────────────────────── log "Deploying image tag '${IMAGE_TAG}' to slot '${INACTIVE_SLOT}' (release: ${HELM_RELEASE})..." run helm upgrade --install "$HELM_RELEASE" "$CHART_PATH" \ -f "$VALUES_FILE" \ --namespace "$NAMESPACE" \ --create-namespace \ --set image.tag="$IMAGE_TAG" \ --set slot="$INACTIVE_SLOT" \ --wait \ --timeout "$ROLLOUT_TIMEOUT" log "Helm deploy complete for slot '${INACTIVE_SLOT}'." # ── Step 3: Wait for rollout ────────────────────────────────────────────────── log "Waiting for deployment rollout (timeout: ${ROLLOUT_TIMEOUT})..." run kubectl rollout status "deployment/${HELM_RELEASE}" \ -n "$NAMESPACE" \ --timeout "$ROLLOUT_TIMEOUT" log "Rollout complete." # ── Step 4: Smoke test on inactive slot ────────────────────────────────────── log "Smoke-testing inactive slot via port-forward..." PF_PORT=19090 # Start port-forward in background; capture PID for cleanup. if [[ "$DRY_RUN" != "true" ]]; then kubectl port-forward \ "deployment/${HELM_RELEASE}" \ "${PF_PORT}:8090" \ -n "$NAMESPACE" &>/tmp/veylant-pf.log & PF_PID=$! # Give it 3s to establish. sleep 3 SMOKE_OK=false for i in $(seq 1 5); do HTTP_STATUS=$(curl -sf -o /dev/null -w "%{http_code}" "http://localhost:${PF_PORT}/healthz" 2>/dev/null || echo "000") if [[ "$HTTP_STATUS" == "200" ]]; then SMOKE_OK=true break fi log " Smoke attempt ${i}/5: HTTP ${HTTP_STATUS} — retrying..." sleep 2 done kill "$PF_PID" 2>/dev/null || true wait "$PF_PID" 2>/dev/null || true if [[ "$SMOKE_OK" != "true" ]]; then die "Smoke test failed on inactive slot '${INACTIVE_SLOT}'. Deployment ABORTED — active slot unchanged." fi fi log "Smoke test passed." # ── Step 5: Switch traffic to new slot ─────────────────────────────────────── log "Switching 100%% traffic from '${ACTIVE_SLOT}' → '${INACTIVE_SLOT}'..." run kubectl patch virtualservice veylant-proxy -n "$NAMESPACE" --type merge \ -p "{\"spec\":{\"http\":[{\"route\":[{\"destination\":{\"host\":\"veylant-proxy\",\"subset\":\"${INACTIVE_SLOT}\"},\"weight\":100}]}]}}" log "Traffic switched." # ── Step 6: Verify post-switch ──────────────────────────────────────────────── log "Verifying health post-switch (${SMOKE_RETRIES} attempts)..." VEYLANT_URL="${VEYLANT_URL:-http://localhost:8090}" POST_SWITCH_OK=false if [[ "$DRY_RUN" != "true" ]]; then for i in $(seq 1 "$SMOKE_RETRIES"); do HTTP_STATUS=$(curl -sf -o /dev/null -w "%{http_code}" "${VEYLANT_URL}/healthz" 2>/dev/null || echo "000") if [[ "$HTTP_STATUS" == "200" ]]; then POST_SWITCH_OK=true break fi log " Post-switch check ${i}/${SMOKE_RETRIES}: HTTP ${HTTP_STATUS} — retrying..." sleep 2 done else POST_SWITCH_OK=true fi if [[ "$POST_SWITCH_OK" != "true" ]]; then log "Post-switch verification FAILED. Rolling back to '${ACTIVE_SLOT}'..." kubectl patch virtualservice veylant-proxy -n "$NAMESPACE" --type merge \ -p "{\"spec\":{\"http\":[{\"route\":[{\"destination\":{\"host\":\"veylant-proxy\",\"subset\":\"${ACTIVE_SLOT}\"},\"weight\":100}]}]}}" die "Rollback complete. Active slot reverted to '${ACTIVE_SLOT}'." fi log "Post-switch verification passed." # ── Step 7: Scale down old slot ─────────────────────────────────────────────── log "Scaling down old slot '${ACTIVE_SLOT}' to 0 replicas..." OLD_RELEASE="veylant-proxy-${ACTIVE_SLOT}" run kubectl scale deployment "$OLD_RELEASE" --replicas=0 -n "$NAMESPACE" 2>/dev/null || \ log " (scale-down skipped — release ${OLD_RELEASE} not found)" log "" log "✓ Blue/green deployment complete." log " Previous slot : ${ACTIVE_SLOT} (scaled to 0)" log " Active slot : ${INACTIVE_SLOT} (image: ${IMAGE_TAG})" log " Rollback : make deploy-rollback ACTIVE_SLOT=${ACTIVE_SLOT} NAMESPACE=${NAMESPACE}"