From cc114650f57c73ea316defcc3c42e011304c5dba Mon Sep 17 00:00:00 2001 From: Marwin Hochfelsner <50826859+hlfan@users.noreply.github.com> Date: Mon, 27 Oct 2025 21:01:54 +0100 Subject: [PATCH] Move tag2link transforms into module --- config/initializers/tag2link.rb | 18 ++---------- lib/tag2link.rb | 20 ++++++++++++- test/lib/tag2link_test.rb | 52 +++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 16 deletions(-) diff --git a/config/initializers/tag2link.rb b/config/initializers/tag2link.rb index 771411e3e..e5fd8aea1 100644 --- a/config/initializers/tag2link.rb +++ b/config/initializers/tag2link.rb @@ -1,17 +1,5 @@ # frozen_string_literal: true -# A map of each OSM key to its formatter URL. For example: -# { "ref:vatin" => "https://example.com/$1" } -# The JSON data is an array with duplicate entries, which is not efficient for lookups. -# So, convert it to a hash and only keep the item with the best rank. -TAG2LINK = JSON.parse(Rails.root.join("node_modules/tag2link/index.json").read) - # exclude deprecated and third-party URLs - .reject { |item| item["rank"] == "deprecated" || item["source"] == "wikidata:P3303" } - .group_by { |item| item["key"] } - .transform_keys { |key| key.sub(/^Key:/, "") } - # move preferred to the start of the array - .transform_values { |items| items.sort_by { |item| item["rank"] == "preferred" ? 0 : 1 }.uniq { |item| item["url"] } } - # exclude any that are ambiguous, i.e. the best and second-best have the same rank - .reject { |_key, value| value[1] && value[0]["rank"] == value[1]["rank"] } - # keep only the best match - .transform_values { |items| items[0]["url"] } +require "tag2link" + +Tag2link.load(Rails.root.join("node_modules/tag2link/index.json")) diff --git a/lib/tag2link.rb b/lib/tag2link.rb index 8eb9484f5..b4ab9c3b7 100644 --- a/lib/tag2link.rb +++ b/lib/tag2link.rb @@ -1,13 +1,31 @@ # frozen_string_literal: true module Tag2link + def self.load(path) + @dict = build_dict(JSON.parse(path.read)).freeze + end + def self.link(key, value) # skip if it's a full URL return nil if %r{\Ahttps?://}.match?(value) - url_template = TAG2LINK[key] + url_template = @dict[key] return nil unless url_template url_template.gsub("$1", value) end + + def self.build_dict(data) + data + # exclude deprecated and third-party URLs + .reject { |item| item["rank"] == "deprecated" || item["source"] == "wikidata:P3303" } + .group_by { |item| item["key"] } + .transform_keys { |key| key.sub(/^Key:/, "") } + # move preferred to the start of the array + .transform_values { |items| items.sort_by { |item| item["rank"] == "preferred" ? 0 : 1 }.uniq { |item| item["url"] } } + # exclude any that are ambiguous, i.e. the best and second-best have the same rank + .reject { |_key, value| value[1] && value[0]["rank"] == value[1]["rank"] } + # keep only the best match + .transform_values { |items| items[0]["url"] } + end end diff --git a/test/lib/tag2link_test.rb b/test/lib/tag2link_test.rb index 6d4fec58d..f7dbd6abd 100644 --- a/test/lib/tag2link_test.rb +++ b/test/lib/tag2link_test.rb @@ -15,4 +15,56 @@ class Tag2linkTest < ActiveSupport::TestCase url = Tag2link.link("wikidata", "Q936") assert_equal "https://www.wikidata.org/entity/Q936", url end + + def test_build_dict_rejects_deprecated_and_third_party + data = [ + { "key" => "Key:example", "url" => "http://example.com/$1", "rank" => "deprecated", "source" => "osmwiki:P8" }, + { "key" => "Key:example2", "url" => "http://example2.com/$1", "rank" => "preferred", "source" => "wikidata:P3303" }, + { "key" => "Key:example3", "url" => "http://example3.com/$1", "rank" => "preferred", "source" => "osmwiki:P8" } + ] + dict = Tag2link.build_dict(data) + assert_not_includes dict, "example" + assert_not_includes dict, "example2" + assert_includes dict, "example3" + end + + def test_build_dict_chooses_single_preferred_item + data = [ + { "key" => "Key:example", "url" => "http://example2.com/$1", "rank" => "normal", "source" => "osmwiki:P8" }, + { "key" => "Key:example", "url" => "http://example.com/$1", "rank" => "preferred", "source" => "osmwiki:P8" } + ] + dict = Tag2link.build_dict(data) + assert_equal "http://example.com/$1", dict["example"] + + data = [ + { "key" => "Key:example", "url" => "http://example2.com/$1", "rank" => "preferred", "source" => "osmwiki:P8" } + ] + dict = Tag2link.build_dict(data) + assert_equal "http://example2.com/$1", dict["example"] + end + + def test_build_dict_deduplicates_urls + data = [ + { "key" => "Key:example", "url" => "http://example.com/$1", "rank" => "preferred", "source" => "osmwiki:P8" }, + { "key" => "Key:example", "url" => "http://example.com/$1", "rank" => "normal", "source" => "wikidata:P1630" } + ] + dict = Tag2link.build_dict(data) + assert_equal "http://example.com/$1", dict["example"] + end + + def test_build_dict_rejects_multiple_equally_preferred_items + data = [ + { "key" => "Key:example", "url" => "http://example1.com/$1", "rank" => "preferred", "source" => "osmwiki:P8" }, + { "key" => "Key:example", "url" => "http://example2.com/$1", "rank" => "preferred", "source" => "osmwiki:P8" } + ] + dict = Tag2link.build_dict(data) + assert_not_includes dict, "example" + + data = [ + { "key" => "Key:example", "url" => "http://example1.com/$1", "rank" => "normal", "source" => "osmwiki:P8" }, + { "key" => "Key:example", "url" => "http://example2.com/$1", "rank" => "normal", "source" => "osmwiki:P8" } + ] + dict = Tag2link.build_dict(data) + assert_not_includes dict, "example" + end end -- 2.39.5