]> git.openstreetmap.org Git - chef.git/commitdiff
Improve monitoring of primary database for api, tile and nominatim
authorTom Hughes <tom@compton.nu>
Tue, 4 Jul 2023 09:28:55 +0000 (10:28 +0100)
committerTom Hughes <tom@compton.nu>
Tue, 4 Jul 2023 09:37:48 +0000 (10:37 +0100)
cookbooks/db/attributes/default.rb
cookbooks/nominatim/attributes/default.rb
cookbooks/postgresql/attributes/default.rb
cookbooks/postgresql/recipes/default.rb
cookbooks/tile/attributes/default.rb

index 56e96007ed5173e68bd92ca89fae87aaf06a2901..cbc5635d83848aee1c9afb8271e902e62e7f7952 100644 (file)
@@ -1,3 +1,4 @@
 default[:db][:cluster] = "15/main"
 
 default[:postgresql][:versions] |= ["15"]
+default[:postgresql][:monitor_database] = "openstreetmap"
index 301faf53d905aa4f029b9ab5cd452bcad35da870..c504739d8329778a7797369847a722ada0fc6a78 100644 (file)
@@ -31,5 +31,6 @@ default[:nominatim][:config] = {
 default[:nominatim][:redirects] = {}
 
 default[:postgresql][:versions] |= [node[:nominatim][:dbcluster].split("/").first]
+default[:postgresql][:monitor_database] = "nominatim"
 
 default[:accounts][:users][:nominatim][:status] = :role
index 038fadeff959899ae134b85eb7b63d18829abeee..88942aa6f8bc85525719a67f72ad21163ba55162 100644 (file)
@@ -1,5 +1,6 @@
 default[:postgresql][:versions] = []
 default[:postgresql][:clusters] = {}
+default[:postgresql][:monitor_database] = "postgres"
 default[:postgresql][:settings][:defaults][:port] = "5432"
 default[:postgresql][:settings][:defaults][:max_connections] = "100"
 default[:postgresql][:settings][:defaults][:ssl] = "true"
index f5bab62318e1b8ecdc6c2c2876b8404dd67aedef..3bf0ad154e6ca39e09a1819a4ca5e06c5bad386c 100644 (file)
@@ -108,39 +108,63 @@ ohai_plugin "postgresql" do
   template "ohai.rb.erb"
 end
 
+template "/etc/prometheus/exporters/postgres_queries.yml" do
+  source "postgres_queries.yml.erb"
+  owner "root"
+  group "root"
+  mode "644"
+end
+
 package "pgtop"
 package "libdbd-pg-perl"
 
 clusters = node[:postgresql][:clusters] || []
 
 clusters.each do |name, details|
-  suffix = name.tr("/", ":")
+  prometheus_suffix = name.tr("/", "-")
+  prometheus_database = node[:postgresql][:monitor_database]
+
+  prometheus_exporter "postgres" do
+    port 10000 + details[:port].to_i
+    service "postgres-#{prometheus_suffix}"
+    labels "cluster" => name
+    scrape_interval "1m"
+    scrape_timeout "1m"
+    user "postgres"
+    options "--no-collector.process_idle --extend.query-path=/etc/prometheus/exporters/postgres_queries.yml"
+    environment "DATA_SOURCE_NAME" => "postgres:///#{prometheus_database}?host=/run/postgresql&port=#{details[:port]}"
+    restrict_address_families "AF_UNIX"
+    remove_ipc false
+    subscribes :restart, "template[/etc/prometheus/exporters/postgres_queries.yml]"
+  end
 
-  munin_plugin "postgres_bgwriter_#{suffix}" do
+  munin_suffix = name.tr("/", ":")
+
+  munin_plugin "postgres_bgwriter_#{munin_suffix}" do
     target "postgres_bgwriter"
     conf "munin.erb"
     conf_variables :port => details[:port]
   end
 
-  munin_plugin "postgres_checkpoints_#{suffix}" do
+  munin_plugin "postgres_checkpoints_#{munin_suffix}" do
     target "postgres_checkpoints"
     conf "munin.erb"
     conf_variables :port => details[:port]
   end
 
-  munin_plugin "postgres_connections_db_#{suffix}" do
+  munin_plugin "postgres_connections_db_#{munin_suffix}" do
     target "postgres_connections_db"
     conf "munin.erb"
     conf_variables :port => details[:port]
   end
 
-  munin_plugin "postgres_users_#{suffix}" do
+  munin_plugin "postgres_users_#{munin_suffix}" do
     target "postgres_users"
     conf "munin.erb"
     conf_variables :port => details[:port]
   end
 
-  munin_plugin "postgres_xlog_#{suffix}" do
+  munin_plugin "postgres_xlog_#{munin_suffix}" do
     target "postgres_xlog"
     conf "munin.erb"
     conf_variables :port => details[:port]
@@ -148,36 +172,17 @@ clusters.each do |name, details|
 
   next unless File.exist?("/var/lib/postgresql/#{details[:version]}/main/recovery.conf")
 
-  munin_plugin "postgres_replication_#{suffix}" do
+  munin_plugin "postgres_replication_#{munin_suffix}" do
     target "postgres_replication"
     conf "munin.erb"
     conf_variables :port => details[:port]
   end
 end
 
-uris = clusters.collect do |_, details|
-  "postgres@:#{details[:port]}/postgres?host=/run/postgresql"
-end
-
-template "/etc/prometheus/exporters/postgres_queries.yml" do
-  source "postgres_queries.yml.erb"
-  owner "root"
-  group "root"
-  mode "644"
+service "prometheus-postgres-exporter" do
+  action [:stop, :disable]
 end
 
-# lag / lag_seconds
-# process_idle missing state
-prometheus_exporter "postgres" do
-  port 9187
-  scrape_interval "1m"
-  scrape_timeout "1m"
-  user "postgres"
-  options "--no-collector.process_idle --extend.query-path=/etc/prometheus/exporters/postgres_queries.yml"
-  environment "DATA_SOURCE_URI" => uris.sort.uniq.first,
-              "PG_EXPORTER_AUTO_DISCOVER_DATABASES" => "true",
-              "PG_EXPORTER_EXCLUDE_DATABASES" => "postgres,template0,template1"
-  restrict_address_families "AF_UNIX"
-  remove_ipc false
-  subscribes :restart, "template[/etc/prometheus/exporters/postgres_queries.yml]"
+systemd_service "prometheus-postgres-exporter" do
+  action :delete
 end
index ef3efc3da49ad9ffb5379eb17ad567b0c01f3c63..d3003257dd54c41c21f0b5d4f0699ace46c54490 100644 (file)
@@ -14,5 +14,6 @@ default[:tile][:data] = {}
 default[:tile][:styles] = {}
 
 default[:postgresql][:versions] |= [node[:tile][:database][:cluster].split("/").first]
+default[:postgresql][:monitor_database] = "gis"
 
 default[:accounts][:users][:tile][:status] = :role