From: Tom Hughes Date: Sun, 24 Jul 2022 13:45:59 +0000 (+0100) Subject: Add an alert for failing healthchecks on the CDN X-Git-Url: https://git.openstreetmap.org/chef.git/commitdiff_plain/faeb3b7259889dc1b789ea14c223b41a0b78116b Add an alert for failing healthchecks on the CDN --- diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index a52901233..c108451ab 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -114,13 +114,18 @@ groups: delay: "{{ $value | humanizeDuration }}" - name: fastly rules: - - alert: error rate + - alert: fastly error rate expr: sum(rate(fastly_rt_status_group_total{status_group="5xx"}[5m])) by (service_name, datacenter) / sum(rate(fastly_rt_status_group_total[5m])) by (service_name, datacenter) > 0.005 for: 15m labels: alertgroup: fastly annotations: error_rate: "{{ $value | humanizePercentage }}" + - alert: fastly healthcheck failing + expr: fastly_healthcheck_status = 0 + for: 5m + labels: + alertgroup: fastly - name: filesystem rules: - alert: readonly filesystem