From 6cdd946b4e56b4a6593a5f7df3051c7ba95bebeb Mon Sep 17 00:00:00 2001 From: Tom Hughes Date: Thu, 25 Nov 2021 18:33:56 +0000 Subject: [PATCH] Add an alert for the mail queue --- .../prometheus/templates/default/alert_rules.yml.erb | 9 +++++++++ cookbooks/prometheus/templates/default/chef.prom.erb | 2 +- roles/base.rb | 8 ++++++++ roles/mail.rb | 5 +++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index 620b44f54..9a3609e03 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -130,6 +130,15 @@ groups: for: 5m labels: alertgroup: "{{ $labels.instance }}" + - name: mail + rules: + - alert: mail queue length + expr: exim_queue > exim_queue_limit + for: 60m + labels: + alertgroup: mail + annotations: + queue_length: "{{ $value }}" - name: mdadm rules: - alert: mdadm array inactive diff --git a/cookbooks/prometheus/templates/default/chef.prom.erb b/cookbooks/prometheus/templates/default/chef.prom.erb index f9be619f0..fd8089f4b 100644 --- a/cookbooks/prometheus/templates/default/chef.prom.erb +++ b/cookbooks/prometheus/templates/default/chef.prom.erb @@ -11,5 +11,5 @@ chef_role{name="<%= role %>"} 1 <% node[:prometheus][:metrics].sort.each do |name, details| -%> # HELP <%= name %> <%= details[:help] %> # TYPE <%= name %> gauge -<%= name %>{<%= details[:labels].map { |k,v| "#{k}=\"#{v}\"" }.join(",") %>} 1 +<%= name %>{<%= Hash(details[:labels]).map { |k,v| "#{k}=\"#{v}\"" }.join(",") %>} <%= details[:metric] || 1 %> <% end -%> diff --git a/roles/base.rb b/roles/base.rb index 4d453e45c..d5230dc60 100644 --- a/roles/base.rb +++ b/roles/base.rb @@ -24,6 +24,14 @@ default_attributes( }, :search => ["openstreetmap.org"] }, + :prometheus => { + :metrics => { + :exim_queue_limit => { + :help => "Mail queue alert level", + :metric => 50 + } + } + }, :sysctl => { :panic => { :comment => "Reboot automatically after a panic", diff --git a/roles/mail.rb b/roles/mail.rb index 50382b03b..4184b47f3 100644 --- a/roles/mail.rb +++ b/roles/mail.rb @@ -90,6 +90,11 @@ default_attributes( } } } + }, + :prometheus => { + :metrics => { + :exim_queue_limit => { :metric => 500 } + } } ) -- 2.43.2