Initial infrastructure documentation - comprehensive homelab reference
This commit is contained in:
82
deploy-reduced-alerts.sh
Executable file
82
deploy-reduced-alerts.sh
Executable file
@@ -0,0 +1,82 @@
|
||||
#!/bin/bash
|
||||
# Deploy Reduced Alert Configuration
|
||||
# Updates Prometheus alert rules and Alertmanager config to reduce notification noise
|
||||
# Only CRITICAL alerts trigger Discord notifications
|
||||
|
||||
set -e
|
||||
|
||||
PROMETHEUS_HOST="10.0.10.25"
|
||||
PROMETHEUS_USER="root"
|
||||
|
||||
echo "🚀 Deploying reduced alert configuration to Prometheus..."
|
||||
echo ""
|
||||
|
||||
# Check if files exist
|
||||
if [ ! -f "prometheus-alert-rules-updated.yml" ]; then
|
||||
echo "❌ Error: prometheus-alert-rules-updated.yml not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "alertmanager-config-updated.yml" ]; then
|
||||
echo "❌ Error: alertmanager-config-updated.yml not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Backup existing configs
|
||||
echo "📦 Backing up existing configurations..."
|
||||
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} 'mkdir -p /etc/prometheus/backups'
|
||||
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} "cp /etc/prometheus/alertmanager.yml /etc/prometheus/backups/alertmanager.yml.$(date +%Y%m%d-%H%M%S)" 2>/dev/null || true
|
||||
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} "cp /etc/prometheus/rules/homelab-alerts.yml /etc/prometheus/backups/homelab-alerts.yml.$(date +%Y%m%d-%H%M%S)" 2>/dev/null || true
|
||||
echo "✅ Backups created in /etc/prometheus/backups/"
|
||||
echo ""
|
||||
|
||||
# Upload new configs
|
||||
echo "📤 Uploading new configurations..."
|
||||
scp alertmanager-config-updated.yml ${PROMETHEUS_USER}@${PROMETHEUS_HOST}:/etc/prometheus/alertmanager.yml
|
||||
scp prometheus-alert-rules-updated.yml ${PROMETHEUS_USER}@${PROMETHEUS_HOST}:/etc/prometheus/rules/homelab-alerts.yml
|
||||
echo "✅ Files uploaded"
|
||||
echo ""
|
||||
|
||||
# Reload services
|
||||
echo "🔄 Reloading Prometheus and Alertmanager..."
|
||||
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} 'systemctl reload prometheus'
|
||||
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} 'systemctl reload prometheus-alertmanager'
|
||||
echo "✅ Services reloaded"
|
||||
echo ""
|
||||
|
||||
# Verify configuration
|
||||
echo "🔍 Verifying configuration..."
|
||||
echo ""
|
||||
echo "Prometheus status:"
|
||||
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} 'systemctl status prometheus --no-pager -l | head -10'
|
||||
echo ""
|
||||
echo "Alertmanager status:"
|
||||
ssh ${PROMETHEUS_USER}@${PROMETHEUS_HOST} 'systemctl status prometheus-alertmanager --no-pager -l | head -10'
|
||||
echo ""
|
||||
|
||||
# Test API endpoints
|
||||
echo "Testing API endpoints..."
|
||||
echo ""
|
||||
echo "Prometheus rules API:"
|
||||
curl -s http://${PROMETHEUS_HOST}:9090/api/v1/rules | head -200
|
||||
echo ""
|
||||
echo "Alertmanager status API:"
|
||||
curl -s http://${PROMETHEUS_HOST}:9093/api/v1/status | head -100
|
||||
echo ""
|
||||
|
||||
echo "✅ Deployment complete!"
|
||||
echo ""
|
||||
echo "📊 Summary of changes:"
|
||||
echo " • CPU alert threshold: 80%+ over 5 minutes (warning)"
|
||||
echo " • CPU critical threshold: 95%+ over 5 minutes (notification)"
|
||||
echo " • Only CRITICAL alerts sent to Discord"
|
||||
echo " • WARNING alerts logged but NOT sent"
|
||||
echo " • Email notifications completely disabled"
|
||||
echo ""
|
||||
echo "🔗 Check alert status:"
|
||||
echo " Prometheus: http://${PROMETHEUS_HOST}:9090/alerts"
|
||||
echo " Alertmanager: http://${PROMETHEUS_HOST}:9093/#/alerts"
|
||||
echo ""
|
||||
echo "🧪 Test critical alert (sends to Discord):"
|
||||
echo " curl -X POST http://${PROMETHEUS_HOST}:9093/api/v1/alerts -d '[{\"labels\":{\"alertname\":\"TestCriticalAlert\",\"severity\":\"critical\",\"instance\":\"test:9100\"},\"annotations\":{\"summary\":\"Test - please ignore\"}}]'"
|
||||
echo ""
|
||||
Reference in New Issue
Block a user