From: Tom Hughes Date: Mon, 5 Oct 2020 21:27:11 +0000 (+0100) Subject: Report database size and replication delay for postgres X-Git-Url: https://git.openstreetmap.org/chef.git/commitdiff_plain/480d14d1f1967e4911302e485ebde397c12d8795 Report database size and replication delay for postgres --- diff --git a/cookbooks/postgresql/recipes/default.rb b/cookbooks/postgresql/recipes/default.rb index 4d7aec10e..6a05f34e4 100644 --- a/cookbooks/postgresql/recipes/default.rb +++ b/cookbooks/postgresql/recipes/default.rb @@ -158,9 +158,17 @@ ports = clusters.collect do |_, details| "port=#{details[:port]}" end +template "/etc/prometheus/collectors/postgres_queries.yml" do + source "postgres_queries.yml.erb" + owner "root" + group "root" + mode "644" +end + prometheus_exporter "postgres" do port 9187 user "postgres" + options "--extend.query-path=/etc/prometheus/collectors/postgres_queries.yml" environment "DATA_SOURCE_NAME" => "user=postgres host=/run/postgresql #{ports.join(',')}", "PG_EXPORTER_EXCLUDE_DATABASES" => "postgres,template0,template1" end diff --git a/cookbooks/postgresql/templates/default/postgres_queries.yml.erb b/cookbooks/postgresql/templates/default/postgres_queries.yml.erb new file mode 100644 index 000000000..50c9ae49f --- /dev/null +++ b/cookbooks/postgresql/templates/default/postgres_queries.yml.erb @@ -0,0 +1,22 @@ +pg_replication: + query: "SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp())) AS lag_seconds" + master: true + metrics: + - lag_seconds: + usage: "GAUGE" + description: "Replication lag behind master in seconds" + +pg_database: + query: "SELECT pg_database.oid AS datid, pg_database.datname, pg_database_size(pg_database.datname) AS size_bytes FROM pg_database" + master: true + cache_seconds: 30 + metrics: + - datid: + usage: "LABEL" + description: "ID of the database" + - datname: + usage: "LABEL" + description: "Name of the database" + - size_bytes: + usage: "GAUGE" + description: "Disk space used by the database"