From 6c865cb0b71abd768882fe7275bd0a4a7eb517b3 Mon Sep 17 00:00:00 2001 From: Marwin Hochfelsner <50826859+hlfan@users.noreply.github.com> Date: Mon, 27 Oct 2025 21:16:24 +0100 Subject: [PATCH] Increase tag2link item leniency --- lib/tag2link.rb | 39 +++++++++++++++++++++++++++++++-------- test/lib/tag2link_test.rb | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 8 deletions(-) diff --git a/lib/tag2link.rb b/lib/tag2link.rb index b4ab9c3b7..58bdb16dc 100644 --- a/lib/tag2link.rb +++ b/lib/tag2link.rb @@ -19,13 +19,36 @@ module Tag2link data # exclude deprecated and third-party URLs .reject { |item| item["rank"] == "deprecated" || item["source"] == "wikidata:P3303" } - .group_by { |item| item["key"] } - .transform_keys { |key| key.sub(/^Key:/, "") } - # move preferred to the start of the array - .transform_values { |items| items.sort_by { |item| item["rank"] == "preferred" ? 0 : 1 }.uniq { |item| item["url"] } } - # exclude any that are ambiguous, i.e. the best and second-best have the same rank - .reject { |_key, value| value[1] && value[0]["rank"] == value[1]["rank"] } - # keep only the best match - .transform_values { |items| items[0]["url"] } + .group_by { |item| item["key"].sub(/^Key:/, "") } + .transform_values { |items| choose_best_item(items) } + .compact + .transform_values { |items| items["url"] } end + + def self.choose_best_item(items) + return nil if items.blank? + + return items.first if items.size == 1 + + # move preferred to the start of the array + ranked = items.sort_by { |item| item["rank"] == "preferred" ? 0 : 1 }.uniq { |item| item["url"] } + top_rank = ranked.first["rank"] + top_items = ranked.select { |i| i["rank"] == top_rank } + + # if only one top-ranked item, prefer that + return top_items.first if top_items.size == 1 + + grouped = top_items.group_by { |i| i["source"] } + return nil if grouped.size > 2 + + # if both sources have exactly one preferred, prefer osmwiki + return grouped["osmwiki:P8"]&.first || grouped.values.flatten.first if grouped.all? { |_s, vals| vals.size == 1 } + + # if one source has multiple preferreds and the other has one, prefer the single one + return grouped.min_by { |_s, vals| vals.size }.last.first if grouped.any? { |_s, vals| vals.size == 1 } + + # exclude any that are ambiguous + nil + end + private_class_method :choose_best_item end diff --git a/test/lib/tag2link_test.rb b/test/lib/tag2link_test.rb index f7dbd6abd..cc207cfa8 100644 --- a/test/lib/tag2link_test.rb +++ b/test/lib/tag2link_test.rb @@ -36,6 +36,14 @@ class Tag2linkTest < ActiveSupport::TestCase dict = Tag2link.build_dict(data) assert_equal "http://example.com/$1", dict["example"] + data = [ + { "key" => "Key:example", "url" => "http://example2.com/$1", "rank" => "preferred", "source" => "osmwiki:P8" }, + { "key" => "Key:example", "url" => "http://example.com/$1", "rank" => "preferred", "source" => "wikidata:P1630" }, + { "key" => "Key:example", "url" => "http://example3.com/$1", "rank" => "preferred", "source" => "osmwiki:P8" } + ] + dict = Tag2link.build_dict(data) + assert_equal "http://example.com/$1", dict["example"] + data = [ { "key" => "Key:example", "url" => "http://example2.com/$1", "rank" => "preferred", "source" => "osmwiki:P8" } ] @@ -67,4 +75,31 @@ class Tag2linkTest < ActiveSupport::TestCase dict = Tag2link.build_dict(data) assert_not_includes dict, "example" end + + def test_build_dict_chooses_osmwiki_when_both_have_single_preferred + data = [ + { "key" => "Key:example", "url" => "http://example1.com/$1", "rank" => "preferred", "source" => "osmwiki:P8" }, + { "key" => "Key:example", "url" => "http://example2.com/$1", "rank" => "preferred", "source" => "wikidata:P1630" } + ] + dict = Tag2link.build_dict(data) + assert_equal "http://example1.com/$1", dict["example"] + + data = [ + { "key" => "Key:example", "url" => "http://example1.com/$1", "rank" => "normal", "source" => "osmwiki:P8" }, + { "key" => "Key:example", "url" => "http://example2.com/$1", "rank" => "normal", "source" => "wikidata:P1630" } + ] + dict = Tag2link.build_dict(data) + assert_equal "http://example1.com/$1", dict["example"] + end + + def test_build_dict_multiple_sources_more_than_two + data = [ + { "key" => "Key:example", "url" => "http://example1.com/$1", "rank" => "normal", "source" => "osmwiki:P8" }, + { "key" => "Key:example", "url" => "http://example2.com/$1", "rank" => "normal", "source" => "wikidata:P1630" }, + { "key" => "Key:example", "url" => "http://example3.com/$1", "rank" => "normal", "source" => "other:source" } + ] + dict = Tag2link.build_dict(data) + # Should not happen with current tag2link schema, but ensure we handle it gracefully + assert_not_includes dict, "example" + end end -- 2.39.5