Files
homelab-docs/deploy-reduced-alerts.sh

83 lines
3.1 KiB
Bash
Executable File

#!/bin/bash
# Deploy Reduced Alert Configuration
# Updates Prometheus alert rules and Alertmanager config to reduce notification noise
# Only CRITICAL alerts trigger Discord notifications
set -e
PROMETHEUS_HOST="10.0.10.25"
PROMETHEUS_USER="root"
echo "🚀 Deploying reduced alert configuration to Prometheus..."
echo ""
# Check if files exist
if [ ! -f "prometheus-alert-rules-updated.yml" ]; then
echo "❌ Error: prometheus-alert-rules-updated.yml not found"
exit 1
fi
if [ ! -f "alertmanager-config-updated.yml" ]; then
echo "❌ Error: alertmanager-config-updated.yml not found"
exit 1
fi
# Backup existing configs
echo "📦 Backing up existing configurations..."
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} 'mkdir -p /etc/prometheus/backups'
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} "cp /etc/prometheus/alertmanager.yml /etc/prometheus/backups/alertmanager.yml.$(date +%Y%m%d-%H%M%S)" 2>/dev/null || true
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} "cp /etc/prometheus/rules/homelab-alerts.yml /etc/prometheus/backups/homelab-alerts.yml.$(date +%Y%m%d-%H%M%S)" 2>/dev/null || true
echo "✅ Backups created in /etc/prometheus/backups/"
echo ""
# Upload new configs
echo "📤 Uploading new configurations..."
scp alertmanager-config-updated.yml ${PROMETHEUS_USER}@${PROMETHEUS_HOST}:/etc/prometheus/alertmanager.yml
scp prometheus-alert-rules-updated.yml ${PROMETHEUS_USER}@${PROMETHEUS_HOST}:/etc/prometheus/rules/homelab-alerts.yml
echo "✅ Files uploaded"
echo ""
# Reload services
echo "🔄 Reloading Prometheus and Alertmanager..."
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} 'systemctl reload prometheus'
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} 'systemctl reload prometheus-alertmanager'
echo "✅ Services reloaded"
echo ""
# Verify configuration
echo "🔍 Verifying configuration..."
echo ""
echo "Prometheus status:"
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} 'systemctl status prometheus --no-pager -l | head -10'
echo ""
echo "Alertmanager status:"
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} 'systemctl status prometheus-alertmanager --no-pager -l | head -10'
echo ""
# Test API endpoints
echo "Testing API endpoints..."
echo ""
echo "Prometheus rules API:"
curl -s http://${PROMETHEUS_HOST}:9090/api/v1/rules | head -200
echo ""
echo "Alertmanager status API:"
curl -s http://${PROMETHEUS_HOST}:9093/api/v1/status | head -100
echo ""
echo "✅ Deployment complete!"
echo ""
echo "📊 Summary of changes:"
echo " • CPU alert threshold: 80%+ over 5 minutes (warning)"
echo " • CPU critical threshold: 95%+ over 5 minutes (notification)"
echo " • Only CRITICAL alerts sent to Discord"
echo " • WARNING alerts logged but NOT sent"
echo " • Email notifications completely disabled"
echo ""
echo "🔗 Check alert status:"
echo " Prometheus: http://${PROMETHEUS_HOST}:9090/alerts"
echo " Alertmanager: http://${PROMETHEUS_HOST}:9093/#/alerts"
echo ""
echo "🧪 Test critical alert (sends to Discord):"
echo " curl -X POST http://${PROMETHEUS_HOST}:9093/api/v1/alerts -d '[{\"labels\":{\"alertname\":\"TestCriticalAlert\",\"severity\":\"critical\",\"instance\":\"test:9100\"},\"annotations\":{\"summary\":\"Test - please ignore\"}}]'"
echo ""