# Alertmanager Configuration for Fred's Homelab - UPDATED # Location: /etc/prometheus/alertmanager.yml # Updated: 2026-02-03 (Reduced alert noise) # # Changes: # - Only CRITICAL alerts trigger Discord notifications # - WARNING alerts are logged but NOT sent to notification channels # - Removed email notifications entirely global: resolve_timeout: 5m # Root route - all alerts enter here route: # Group alerts by these labels to reduce noise group_by: ['alertname', 'severity', 'instance'] # Wait 30s before sending first notification (allows grouping) group_wait: 30s # Wait 5min before sending additional alerts for same group group_interval: 5m # Resend alert every 12 hours if still firing repeat_interval: 12h # Default receiver - drops everything (warnings go here) receiver: 'null' # Child routes for specific alert types routes: # CRITICAL alerts - send to Discord webhook - matchers: - severity="critical" receiver: 'discord-critical' group_wait: 10s repeat_interval: 1h # WARNING alerts - explicitly drop (logged by Prometheus, not sent) - matchers: - severity="warning" receiver: 'null' repeat_interval: 24h # Inhibition rules - prevent alert spam inhibit_rules: # If critical alert is firing, suppress warnings for same alert - source_matchers: - severity="critical" target_matchers: - severity="warning" equal: ['alertname', 'instance'] # If host is down, suppress all other alerts from that host - source_matchers: - alertname="HostDown" target_matchers: - alertname!="HostDown" equal: ['instance'] # Receivers - define where alerts go receivers: # Null receiver - drops alerts (used for warnings) - name: 'null' # Discord webhook for CRITICAL alerts - name: 'discord-critical' webhook_configs: - url: 'https://discord.com/api/webhooks/1462667503301038285/ZVJDuek6VADA-RdI09xJDvqjveOWXgxQnMBcsQzoKwVPnNOACMCL5v-HN55-KVe4IZY0' send_resolved: true http_config: follow_redirects: true max_alerts: 0 # Send all alerts (no limit) # ==================================== # Deployment Instructions # ==================================== # # 1. Backup existing config: # ssh root@10.0.10.25 'cp /etc/prometheus/alertmanager.yml /etc/prometheus/alertmanager.yml.backup' # # 2. Upload this file: # scp alertmanager-config-updated.yml root@10.0.10.25:/etc/prometheus/alertmanager.yml # # 3. Upload updated alert rules: # scp prometheus-alert-rules-updated.yml root@10.0.10.25:/etc/prometheus/rules/homelab-alerts.yml # # 4. Reload Alertmanager: # ssh root@10.0.10.25 'systemctl reload prometheus-alertmanager' # # 5. Reload Prometheus: # ssh root@10.0.10.25 'systemctl reload prometheus' # # 6. Verify configuration: # curl http://10.0.10.25:9093/api/v1/status # curl http://10.0.10.25:9090/api/v1/rules # # 7. Test Discord webhook: # curl -X POST http://10.0.10.25:9093/api/v1/alerts -d '[ # { # "labels": { # "alertname": "TestCriticalAlert", # "severity": "critical", # "instance": "test:9100" # }, # "annotations": { # "summary": "Test alert - please ignore" # } # } # ]' # # ==================================== # Alert Flow Summary # ==================================== # # CRITICAL alerts: # Prometheus → Alertmanager → Discord Webhook → Your Discord Server # # WARNING alerts: # Prometheus → Alertmanager → null receiver (logged, not sent) # # You can view WARNING alerts in: # - Prometheus UI: http://10.0.10.25:9090/alerts # - Alertmanager UI: http://10.0.10.25:9093/#/alerts # # ==================================== # Expected Behavior After Update # ==================================== # # Your Discord will ONLY receive: # ✅ Host completely down (HostDown) # ✅ CPU >95% for 5 minutes (CriticalCPUUsage) # ✅ Memory >95% for 5 minutes (CriticalMemoryUsage) # ✅ Disk <5% free (DiskSpaceCritical) # ✅ Proxmox node down (ProxmoxNodeDown) # ✅ PostgreSQL down (PostgreSQLDown) # ✅ VPS unreachable (VPSDown) # ✅ Prometheus config reload failed # # Your inbox will receive: # 🚫 NOTHING - all email notifications disabled # # Warnings (CPU 80-95%, memory 85-95%, etc.): # 📊 Logged in Prometheus/Alertmanager UI only # # This should dramatically reduce notification noise while still # catching critical issues that need immediate attention.