]> git.openstreetmap.org Git - rails.git/blob - lib/tasks/backfill_changeset_stats.rake
Merge remote-tracking branch 'upstream/pull/6347'
[rails.git] / lib / tasks / backfill_changeset_stats.rake
1 # rubocop:disable Metrics/BlockLength
2 namespace :db do
3   desc "Backfill enhanced changeset stats"
4   task :changeset_stats => :environment do
5     chunk_size = ENV["CHUNK_SIZE"]&.to_i || 10_000
6
7     min_id = ENV["MIN_CHANGESET"]&.to_i || Changeset.minimum(:id)
8     max_id = ENV["MAX_CHANGESET"]&.to_i || Changeset.maximum(:id)
9
10     puts "Backfilling enhanced changeset stats from id #{min_id} to #{max_id} in chunks of #{chunk_size}"
11
12     Changeset.where(:id => min_id..max_id)
13              .where('(num_created_nodes + num_modified_nodes + num_deleted_nodes +
14                       num_created_ways + num_modified_ways + num_deleted_ways +
15                       num_created_relations + num_modified_relations + num_deleted_relations)
16                      != num_changes')
17              .in_batches(:of => chunk_size) do |batch|
18       ids = batch.ids
19       print "Processing changesets #{ids.first} to #{ids.last} ..."
20
21       id_list = "{#{ids.join(',')}}"
22
23       # Backfill enhanced changeset stats
24       sql = <<~SQL.squish
25         WITH changes AS (
26             SELECT
27               nodes.changeset_id,
28               CASE WHEN nodes.version = 1 THEN 1 ELSE 0 END AS num_created_nodes,
29               CASE WHEN nodes.version > 1 AND nodes.visible THEN 1 ELSE 0 END AS num_modified_nodes,
30               CASE WHEN nodes.version > 1 AND NOT nodes.visible THEN 1 ELSE 0 END AS num_deleted_nodes,
31               0 AS num_created_ways,
32               0 AS num_modified_ways,
33               0 AS num_deleted_ways,
34               0 AS num_created_relations,
35               0 AS num_modified_relations,
36               0 AS num_deleted_relations
37             FROM nodes
38             WHERE nodes.changeset_id = ANY($1::bigint[])
39           UNION ALL
40             SELECT
41               ways.changeset_id,
42               0 AS num_created_nodes,
43               0 AS num_modified_nodes,
44               0 AS num_deleted_nodes,
45               CASE WHEN ways.version = 1 THEN 1 ELSE 0 END AS num_created_ways,
46               CASE WHEN ways.version > 1 AND ways.visible THEN 1 ELSE 0 END AS num_modified_ways,
47               CASE WHEN ways.version > 1 AND NOT ways.visible THEN 1 ELSE 0 END AS num_deleted_ways,
48               0 AS num_created_relations,
49               0 AS num_modified_relations,
50               0 AS num_deleted_relations
51             FROM ways
52             WHERE ways.changeset_id = ANY($1::bigint[])
53           UNION ALL
54             SELECT
55               relations.changeset_id,
56               0 AS num_created_nodes,
57               0 AS num_modified_nodes,
58               0 AS num_deleted_nodes,
59               0 AS num_created_ways,
60               0 AS num_modified_ways,
61               0 AS num_deleted_ways,
62               CASE WHEN relations.version = 1 THEN 1 ELSE 0 END AS num_created_relations,
63               CASE WHEN relations.version > 1 AND relations.visible THEN 1 ELSE 0 END AS num_modified_relations,
64               CASE WHEN relations.version > 1 AND NOT relations.visible THEN 1 ELSE 0 END AS num_deleted_relations
65             FROM relations
66             WHERE relations.changeset_id = ANY($1::bigint[])
67         ),
68         total AS (
69           SELECT
70             changes.changeset_id,
71             SUM(changes.num_created_nodes) AS num_created_nodes,
72             SUM(changes.num_modified_nodes) AS num_modified_nodes,
73             SUM(changes.num_deleted_nodes) AS num_deleted_nodes,
74             SUM(changes.num_created_ways) AS num_created_ways,
75             SUM(changes.num_modified_ways) AS num_modified_ways,
76             SUM(changes.num_deleted_ways) AS num_deleted_ways,
77             SUM(changes.num_created_relations) AS num_created_relations,
78             SUM(changes.num_modified_relations) AS num_modified_relations,
79             SUM(changes.num_deleted_relations) AS num_deleted_relations
80           FROM changes
81           GROUP BY changes.changeset_id
82         )
83         UPDATE changesets
84         SET num_created_nodes      = total.num_created_nodes,
85             num_modified_nodes     = total.num_modified_nodes,
86             num_deleted_nodes      = total.num_deleted_nodes,
87             num_created_ways       = total.num_created_ways,
88             num_modified_ways      = total.num_modified_ways,
89             num_deleted_ways       = total.num_deleted_ways,
90             num_created_relations  = total.num_created_relations,
91             num_modified_relations = total.num_modified_relations,
92             num_deleted_relations  = total.num_deleted_relations
93         FROM total
94         WHERE changesets.id = total.changeset_id
95       SQL
96
97       binds = [
98         ActiveRecord::Relation::QueryAttribute.new(
99           "ids",
100           id_list,
101           ActiveRecord::Type::String.new
102         )
103       ]
104       rows_affected = ActiveRecord::Base.connection.exec_update(sql, "UpdateStats", binds)
105       puts " #{rows_affected} changesets updated."
106     end
107
108     puts "\nDone."
109   end
110 end
111 # rubocop:enable Metrics/BlockLength