]> git.openstreetmap.org Git - rails.git/blob - lib/tasks/backfill_changeset_stats.rake
Merge pull request #6696 from mmd-osm/patch/wiki2026
[rails.git] / lib / tasks / backfill_changeset_stats.rake
1 # frozen_string_literal: true
2
3 # rubocop:disable Metrics/BlockLength
4 namespace :db do
5   desc "Backfill enhanced changeset stats"
6   task :changeset_stats => :environment do
7     chunk_size = ENV["CHUNK_SIZE"]&.to_i || 10_000
8
9     min_id = ENV["MIN_CHANGESET"]&.to_i || Changeset.minimum(:id)
10     max_id = ENV["MAX_CHANGESET"]&.to_i || Changeset.maximum(:id)
11
12     puts "Backfilling enhanced changeset stats from id #{min_id} to #{max_id} in chunks of #{chunk_size}"
13
14     Changeset.where(:id => min_id..max_id)
15              .where('(num_created_nodes + num_modified_nodes + num_deleted_nodes +
16                       num_created_ways + num_modified_ways + num_deleted_ways +
17                       num_created_relations + num_modified_relations + num_deleted_relations)
18                      != num_changes')
19              .in_batches(:of => chunk_size) do |batch|
20                ids = batch.ids
21                print "Processing changesets #{ids.first} to #{ids.last} ..."
22
23                id_list = "{#{ids.join(',')}}"
24
25                # Backfill enhanced changeset stats
26                sql = <<~SQL.squish
27                  WITH changes AS (
28                      SELECT
29                        nodes.changeset_id,
30                        CASE WHEN nodes.version = 1 THEN 1 ELSE 0 END AS num_created_nodes,
31                        CASE WHEN nodes.version > 1 AND nodes.visible THEN 1 ELSE 0 END AS num_modified_nodes,
32                        CASE WHEN nodes.version > 1 AND NOT nodes.visible THEN 1 ELSE 0 END AS num_deleted_nodes,
33                        0 AS num_created_ways,
34                        0 AS num_modified_ways,
35                        0 AS num_deleted_ways,
36                        0 AS num_created_relations,
37                        0 AS num_modified_relations,
38                        0 AS num_deleted_relations
39                      FROM nodes
40                      WHERE nodes.changeset_id = ANY($1::bigint[])
41                    UNION ALL
42                      SELECT
43                        ways.changeset_id,
44                        0 AS num_created_nodes,
45                        0 AS num_modified_nodes,
46                        0 AS num_deleted_nodes,
47                        CASE WHEN ways.version = 1 THEN 1 ELSE 0 END AS num_created_ways,
48                        CASE WHEN ways.version > 1 AND ways.visible THEN 1 ELSE 0 END AS num_modified_ways,
49                        CASE WHEN ways.version > 1 AND NOT ways.visible THEN 1 ELSE 0 END AS num_deleted_ways,
50                        0 AS num_created_relations,
51                        0 AS num_modified_relations,
52                        0 AS num_deleted_relations
53                      FROM ways
54                      WHERE ways.changeset_id = ANY($1::bigint[])
55                    UNION ALL
56                      SELECT
57                        relations.changeset_id,
58                        0 AS num_created_nodes,
59                        0 AS num_modified_nodes,
60                        0 AS num_deleted_nodes,
61                        0 AS num_created_ways,
62                        0 AS num_modified_ways,
63                        0 AS num_deleted_ways,
64                        CASE WHEN relations.version = 1 THEN 1 ELSE 0 END AS num_created_relations,
65                        CASE WHEN relations.version > 1 AND relations.visible THEN 1 ELSE 0 END AS num_modified_relations,
66                        CASE WHEN relations.version > 1 AND NOT relations.visible THEN 1 ELSE 0 END AS num_deleted_relations
67                      FROM relations
68                      WHERE relations.changeset_id = ANY($1::bigint[])
69                  ),
70                  total AS (
71                    SELECT
72                      changes.changeset_id,
73                      SUM(changes.num_created_nodes) AS num_created_nodes,
74                      SUM(changes.num_modified_nodes) AS num_modified_nodes,
75                      SUM(changes.num_deleted_nodes) AS num_deleted_nodes,
76                      SUM(changes.num_created_ways) AS num_created_ways,
77                      SUM(changes.num_modified_ways) AS num_modified_ways,
78                      SUM(changes.num_deleted_ways) AS num_deleted_ways,
79                      SUM(changes.num_created_relations) AS num_created_relations,
80                      SUM(changes.num_modified_relations) AS num_modified_relations,
81                      SUM(changes.num_deleted_relations) AS num_deleted_relations
82                    FROM changes
83                    GROUP BY changes.changeset_id
84                  )
85                  UPDATE changesets
86                  SET num_created_nodes      = total.num_created_nodes,
87                      num_modified_nodes     = total.num_modified_nodes,
88                      num_deleted_nodes      = total.num_deleted_nodes,
89                      num_created_ways       = total.num_created_ways,
90                      num_modified_ways      = total.num_modified_ways,
91                      num_deleted_ways       = total.num_deleted_ways,
92                      num_created_relations  = total.num_created_relations,
93                      num_modified_relations = total.num_modified_relations,
94                      num_deleted_relations  = total.num_deleted_relations,
95                      num_changes            = total.num_created_nodes +
96                                               total.num_modified_nodes +
97                                               total.num_deleted_nodes +
98                                               total.num_created_ways +
99                                               total.num_modified_ways +
100                                               total.num_deleted_ways +
101                                               total.num_created_relations +
102                                               total.num_modified_relations +
103                                               total.num_deleted_relations
104                  FROM total
105                  WHERE changesets.id = total.changeset_id
106                SQL
107
108                binds = [
109                  ActiveRecord::Relation::QueryAttribute.new(
110                    "ids",
111                    id_list,
112                    ActiveRecord::Type::String.new
113                  )
114                ]
115                rows_affected = ActiveRecord::Base.connection.exec_update(sql, "UpdateStats", binds)
116                puts " #{rows_affected} changesets updated."
117     end
118
119     puts "\nDone."
120   end
121 end
122 # rubocop:enable Metrics/BlockLength