163 lines
6.9 KiB
Bash
163 lines
6.9 KiB
Bash
#!/usr/bin/env bash
|
|
# deploy/scripts/blue-green.sh
|
|
#
|
|
# Atomic blue/green deployment for Veylant IA proxy.
|
|
# Rollback time: < 5s (single kubectl patch on the Istio VirtualService).
|
|
#
|
|
# Strategy:
|
|
# 1. Detect which slot is currently active (blue|green) from the VirtualService.
|
|
# 2. Deploy the new image tag to the INACTIVE slot via helm upgrade.
|
|
# 3. Wait for the inactive slot's rollout to complete.
|
|
# 4. Smoke-test the inactive slot via a temp port-forward.
|
|
# 5. Switch 100% traffic to the new slot (patch VirtualService).
|
|
# 6. Verify health post-switch; roll back if verification fails.
|
|
# 7. Scale down the old slot to 0 replicas to free resources.
|
|
#
|
|
# Required env vars:
|
|
# IMAGE_TAG — Docker image tag to deploy (e.g. sha-abc123)
|
|
# NAMESPACE — Kubernetes namespace (default: veylant)
|
|
# KUBECONFIG — path to kubeconfig (uses default if not set)
|
|
#
|
|
# Optional env vars:
|
|
# ROLLOUT_TIMEOUT — kubectl rollout wait timeout (default: 5m)
|
|
# SMOKE_RETRIES — health check retries after switch (default: 5)
|
|
# DRY_RUN — set to "true" to print commands without executing
|
|
|
|
set -euo pipefail
|
|
|
|
# ── Config ────────────────────────────────────────────────────────────────────
|
|
IMAGE_TAG="${IMAGE_TAG:?IMAGE_TAG is required}"
|
|
NAMESPACE="${NAMESPACE:-veylant}"
|
|
ROLLOUT_TIMEOUT="${ROLLOUT_TIMEOUT:-5m}"
|
|
SMOKE_RETRIES="${SMOKE_RETRIES:-5}"
|
|
DRY_RUN="${DRY_RUN:-false}"
|
|
CHART_PATH="deploy/helm/veylant-proxy"
|
|
|
|
# ── Helpers ───────────────────────────────────────────────────────────────────
|
|
log() { echo "[blue-green] $*"; }
|
|
die() { echo "[blue-green] ERROR: $*" >&2; exit 1; }
|
|
|
|
run() {
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
echo "[dry-run] $*"
|
|
else
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
# ── Step 1: Detect active slot ────────────────────────────────────────────────
|
|
log "Detecting active slot from VirtualService..."
|
|
ACTIVE_SLOT=$(kubectl get virtualservice veylant-proxy -n "$NAMESPACE" -o jsonpath='{.spec.http[0].route[0].destination.subset}' 2>/dev/null || echo "blue")
|
|
|
|
if [[ "$ACTIVE_SLOT" == "blue" ]]; then
|
|
INACTIVE_SLOT="green"
|
|
else
|
|
INACTIVE_SLOT="blue"
|
|
fi
|
|
|
|
log "Active slot: ${ACTIVE_SLOT} → deploying to INACTIVE slot: ${INACTIVE_SLOT}"
|
|
|
|
HELM_RELEASE="veylant-proxy-${INACTIVE_SLOT}"
|
|
VALUES_FILE="${CHART_PATH}/values-${INACTIVE_SLOT}.yaml"
|
|
|
|
# ── Step 2: Deploy to inactive slot ──────────────────────────────────────────
|
|
log "Deploying image tag '${IMAGE_TAG}' to slot '${INACTIVE_SLOT}' (release: ${HELM_RELEASE})..."
|
|
run helm upgrade --install "$HELM_RELEASE" "$CHART_PATH" \
|
|
-f "$VALUES_FILE" \
|
|
--namespace "$NAMESPACE" \
|
|
--create-namespace \
|
|
--set image.tag="$IMAGE_TAG" \
|
|
--set slot="$INACTIVE_SLOT" \
|
|
--wait \
|
|
--timeout "$ROLLOUT_TIMEOUT"
|
|
|
|
log "Helm deploy complete for slot '${INACTIVE_SLOT}'."
|
|
|
|
# ── Step 3: Wait for rollout ──────────────────────────────────────────────────
|
|
log "Waiting for deployment rollout (timeout: ${ROLLOUT_TIMEOUT})..."
|
|
run kubectl rollout status "deployment/${HELM_RELEASE}" \
|
|
-n "$NAMESPACE" \
|
|
--timeout "$ROLLOUT_TIMEOUT"
|
|
|
|
log "Rollout complete."
|
|
|
|
# ── Step 4: Smoke test on inactive slot ──────────────────────────────────────
|
|
log "Smoke-testing inactive slot via port-forward..."
|
|
PF_PORT=19090
|
|
# Start port-forward in background; capture PID for cleanup.
|
|
if [[ "$DRY_RUN" != "true" ]]; then
|
|
kubectl port-forward \
|
|
"deployment/${HELM_RELEASE}" \
|
|
"${PF_PORT}:8090" \
|
|
-n "$NAMESPACE" &>/tmp/veylant-pf.log &
|
|
PF_PID=$!
|
|
# Give it 3s to establish.
|
|
sleep 3
|
|
|
|
SMOKE_OK=false
|
|
for i in $(seq 1 5); do
|
|
HTTP_STATUS=$(curl -sf -o /dev/null -w "%{http_code}" "http://localhost:${PF_PORT}/healthz" 2>/dev/null || echo "000")
|
|
if [[ "$HTTP_STATUS" == "200" ]]; then
|
|
SMOKE_OK=true
|
|
break
|
|
fi
|
|
log " Smoke attempt ${i}/5: HTTP ${HTTP_STATUS} — retrying..."
|
|
sleep 2
|
|
done
|
|
|
|
kill "$PF_PID" 2>/dev/null || true
|
|
wait "$PF_PID" 2>/dev/null || true
|
|
|
|
if [[ "$SMOKE_OK" != "true" ]]; then
|
|
die "Smoke test failed on inactive slot '${INACTIVE_SLOT}'. Deployment ABORTED — active slot unchanged."
|
|
fi
|
|
fi
|
|
|
|
log "Smoke test passed."
|
|
|
|
# ── Step 5: Switch traffic to new slot ───────────────────────────────────────
|
|
log "Switching 100%% traffic from '${ACTIVE_SLOT}' → '${INACTIVE_SLOT}'..."
|
|
run kubectl patch virtualservice veylant-proxy -n "$NAMESPACE" --type merge \
|
|
-p "{\"spec\":{\"http\":[{\"route\":[{\"destination\":{\"host\":\"veylant-proxy\",\"subset\":\"${INACTIVE_SLOT}\"},\"weight\":100}]}]}}"
|
|
|
|
log "Traffic switched."
|
|
|
|
# ── Step 6: Verify post-switch ────────────────────────────────────────────────
|
|
log "Verifying health post-switch (${SMOKE_RETRIES} attempts)..."
|
|
VEYLANT_URL="${VEYLANT_URL:-http://localhost:8090}"
|
|
POST_SWITCH_OK=false
|
|
if [[ "$DRY_RUN" != "true" ]]; then
|
|
for i in $(seq 1 "$SMOKE_RETRIES"); do
|
|
HTTP_STATUS=$(curl -sf -o /dev/null -w "%{http_code}" "${VEYLANT_URL}/healthz" 2>/dev/null || echo "000")
|
|
if [[ "$HTTP_STATUS" == "200" ]]; then
|
|
POST_SWITCH_OK=true
|
|
break
|
|
fi
|
|
log " Post-switch check ${i}/${SMOKE_RETRIES}: HTTP ${HTTP_STATUS} — retrying..."
|
|
sleep 2
|
|
done
|
|
else
|
|
POST_SWITCH_OK=true
|
|
fi
|
|
|
|
if [[ "$POST_SWITCH_OK" != "true" ]]; then
|
|
log "Post-switch verification FAILED. Rolling back to '${ACTIVE_SLOT}'..."
|
|
kubectl patch virtualservice veylant-proxy -n "$NAMESPACE" --type merge \
|
|
-p "{\"spec\":{\"http\":[{\"route\":[{\"destination\":{\"host\":\"veylant-proxy\",\"subset\":\"${ACTIVE_SLOT}\"},\"weight\":100}]}]}}"
|
|
die "Rollback complete. Active slot reverted to '${ACTIVE_SLOT}'."
|
|
fi
|
|
|
|
log "Post-switch verification passed."
|
|
|
|
# ── Step 7: Scale down old slot ───────────────────────────────────────────────
|
|
log "Scaling down old slot '${ACTIVE_SLOT}' to 0 replicas..."
|
|
OLD_RELEASE="veylant-proxy-${ACTIVE_SLOT}"
|
|
run kubectl scale deployment "$OLD_RELEASE" --replicas=0 -n "$NAMESPACE" 2>/dev/null || \
|
|
log " (scale-down skipped — release ${OLD_RELEASE} not found)"
|
|
|
|
log ""
|
|
log "✓ Blue/green deployment complete."
|
|
log " Previous slot : ${ACTIVE_SLOT} (scaled to 0)"
|
|
log " Active slot : ${INACTIVE_SLOT} (image: ${IMAGE_TAG})"
|
|
log " Rollback : make deploy-rollback ACTIVE_SLOT=${ACTIVE_SLOT} NAMESPACE=${NAMESPACE}"
|