]> git.openstreetmap.org Git - nominatim.git/commitdiff
initial flex import scripts
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 3 Nov 2022 16:15:01 +0000 (17:15 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 10 Nov 2022 08:37:38 +0000 (09:37 +0100)
Only implements the extratags style for the moment. Tests pass
for the same behaviour as the gazetteer output. Updates still need
to be done.

CMakeLists.txt
nominatim/clicmd/args.py
nominatim/tools/exec_utils.py
settings/flex-base.lua [new file with mode: 0644]
settings/import-extratags.lua [new file with mode: 0644]
test/bdd/osm2pgsql/import/tags.feature
test/bdd/steps/steps_osm_data.py

index 036dda31b03418d575a763ae88705646bfd9a240..f5f776a171a0ddf9d6d8d6fcd9ed99a16cee7da9 100644 (file)
@@ -63,7 +63,6 @@ if (BUILD_IMPORTER AND BUILD_OSM2PGSQL)
     endif()
     set(BUILD_TESTS_SAVED "${BUILD_TESTS}")
     set(BUILD_TESTS off)
-    set(WITH_LUA off CACHE BOOL "")
     add_subdirectory(osm2pgsql)
     set(BUILD_TESTS ${BUILD_TESTS_SAVED})
 endif()
index 2f8273d602890be4971d59ac5495f7a54858cd48..b120ee7364f5a686d2e34563745274a4456ccd8e 100644 (file)
@@ -184,6 +184,7 @@ class NominatimArgs:
         return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.osm2pgsql_path,
                     osm2pgsql_cache=self.osm2pgsql_cache or default_cache,
                     osm2pgsql_style=self.config.get_import_style_file(),
+                    osm2pgsql_style_path=self.config.config_dir,
                     threads=self.threads or default_threads,
                     dsn=self.config.get_libpq_dsn(),
                     flatnode_file=str(self.config.get_path('FLATNODE_FILE') or ''),
index 610e2182d5436b8723b0bc3ebe2cb24fcd672f2c..675e070b3f30d00d8d9149c88454d2e61795279c 100644 (file)
@@ -10,6 +10,7 @@ Helper functions for executing external programs.
 from typing import Any, Union, Optional, Mapping, IO
 from pathlib import Path
 import logging
+import os
 import subprocess
 import urllib.request as urlrequest
 from urllib.parse import urlencode
@@ -120,9 +121,16 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
            '--log-progress', 'true',
            '--number-processes', str(options['threads']),
            '--cache', str(options['osm2pgsql_cache']),
-           '--output', 'gazetteer',
            '--style', str(options['osm2pgsql_style'])
           ]
+
+    if str(options['osm2pgsql_style']).endswith('.lua'):
+        env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / 'flex-base.lua'),
+                                    os.environ.get('LUAPATH', ';')))
+        cmd.extend(('--output', 'flex'))
+    else:
+        cmd.extend(('--output', 'gazetteer'))
+
     if options['append']:
         cmd.append('--append')
     else:
diff --git a/settings/flex-base.lua b/settings/flex-base.lua
new file mode 100644 (file)
index 0000000..d91299a
--- /dev/null
@@ -0,0 +1,382 @@
+-- Core functions for Nominatim import flex style.
+--
+
+
+-- The single place table.
+place_table = osm2pgsql.define_table{
+    name = "place",
+    ids = { type = 'any', id_column = 'osm_id', type_column = 'osm_type' },
+    columns = {
+        { column = 'class', type = 'text', not_null = true },
+        { column = 'type', type = 'text', not_null = true },
+        { column = 'admin_level', type = 'smallint' },
+        { column = 'name', type = 'hstore' },
+        { column = 'address', type = 'hstore' },
+        { column = 'extratags', type = 'hstore' },
+        { column = 'geometry', type = 'geometry', projection = 'WGS84', not_null = true },
+    }
+}
+
+------------- Place class ------------------------------------------
+
+local Place = {}
+Place.__index = Place
+
+function Place.new(object, geom_func)
+    local self = setmetatable({}, Place)
+    self.object = object
+    self.geom_func = geom_func
+
+    self.admin_level = tonumber(self.object:grab_tag('admin_level'))
+    if self.admin_level == nil
+       or self.admin_level <= 0 or self.admin_level > 15
+       or math.floor(self.admin_level) ~= self.admin_level then
+        self.admin_level = 15
+    end
+
+    self.num_entries = 0
+    self.has_name = false
+    self.names = {}
+    self.address = {}
+    self.extratags = {}
+
+    return self
+end
+
+function Place:delete(data)
+    if data.match ~= nil then
+        for k, v in pairs(self.object.tags) do
+            if data.match(k, v) then
+                self.object.tags[k] = nil
+            end
+        end
+    end
+end
+
+function Place:grab_extratags(data)
+    local count = 0
+
+    if data.match ~= nil then
+        for k, v in pairs(self.object.tags) do
+            if data.match(k, v) then
+                self.object.tags[k] = nil
+                self.extratags[k] = v
+                count = count + 1
+            end
+        end
+    end
+
+    return count
+end
+
+function Place:grab_address(data)
+    local count = 0
+
+    if data.match ~= nil then
+        for k, v in pairs(self.object.tags) do
+            if data.match(k, v) then
+                self.object.tags[k] = nil
+
+                if data.include_on_name == true then
+                    self.has_name = true
+                end
+
+                if data.out_key ~= nil then
+                    self.address[data.out_key] = v
+                    return 1
+                end
+
+                if k:sub(1, 5) == 'addr:' then
+                    self.address[k:sub(6)] = v
+                elseif k:sub(1, 6) == 'is_in:' then
+                    self.address[k:sub(7)] = v
+                else
+                    self.address[k] = v
+                end
+                count = count + 1
+            end
+        end
+    end
+
+    return count
+end
+
+function Place:set_address(key, value)
+    self.address[key] = value
+end
+
+function Place:grab_name(data)
+    local count = 0
+
+    if data.match ~= nil then
+        for k, v in pairs(self.object.tags) do
+            if data.match(k, v) then
+                self.object.tags[k] = nil
+                self.names[k] = v
+                if data.include_on_name ~= false then
+                    self.has_name = true
+                end
+                count = count + 1
+            end
+        end
+    end
+
+    return count
+end
+
+function Place:grab_tag(key)
+    return self.object:grab_tag(key)
+end
+
+function Place:tags()
+    return self.object.tags
+end
+
+function Place:write_place(k, v, mtype, save_extra_mains)
+    if mtype == nil then
+        return 0
+    end
+
+    v = v or self.object.tags[k]
+    if v == nil then
+        return 0
+    end
+
+    if type(mtype) == 'table' then
+        mtype = mtype[v] or mtype[1]
+    end
+
+    if mtype == 'always' or (self.has_name and mtype == 'named') then
+        return self:write_row(k, v, save_extra_mains)
+    end
+
+    if mtype == 'named_with_key' then
+        local names = {}
+        local prefix = k .. ':name'
+        for namek, namev in pairs(self.object.tags) do
+            if namek:sub(1, #prefix) == prefix
+               and (#namek == #prefix
+                    or namek:sub(#prefix + 1, #prefix + 1) == ':') then
+                names[namek:sub(#k + 2)] = namev
+            end
+        end
+
+        if next(names) ~= nil then
+            local saved_names = self.names
+            self.names = names
+
+            local results = self:write_row(k, v, save_extra_mains)
+
+            self.names = saved_names
+
+            return results
+        end
+    end
+
+    return 0
+end
+
+function Place:write_row(k, v, save_extra_mains)
+    if self.geometry == nil then
+        self.geometry = self.geom_func(self.object)
+    end
+    if self.geometry:is_null() then
+        return 0
+    end
+
+    if save_extra_mains then
+        for extra_k, extra_v in pairs(self.object.tags) do
+            if extra_k ~= k then
+                self.extratags[extra_k] = extra_v
+            end
+        end
+    end
+
+    place_table:insert{
+        class = k,
+        type = v,
+        admin_level = self.admin_level,
+        name = next(self.names) and self.names,
+        address = next(self.address) and self.address,
+        extratags = next(self.extratags) and self.extratags,
+        geometry = self.geometry
+    }
+
+    if save_extra_mains then
+        for k, v in pairs(self.object.tags) do
+            self.extratags[k] = nil
+        end
+    end
+
+    self.num_entries = self.num_entries + 1
+
+    return 1
+end
+
+
+function tag_match(data)
+    if data == nil or next(data) == nil then
+        return nil
+    end
+
+    local tests = {}
+
+    if data.keys ~= nil then
+        for _, key in pairs(data.keys) do
+            if key:sub(1, 1) == '*' then
+                if #key > 1 then
+                    local suffix = key:sub(2)
+                    tests[#tests + 1] = function (k, v)
+                        return k:sub(-#suffix) == suffix
+                    end
+                end
+            elseif key:sub(#key, #key) == '*' then
+                local prefix = key:sub(1, #key - 1)
+                tests[#tests + 1] = function (k, v)
+                    return k:sub(1, #prefix) == prefix
+                end
+            else
+                tests[#tests + 1] = function (k, v)
+                    return k == key
+                end
+            end
+        end
+    end
+
+    if data.tags ~= nil then
+        local tags = {}
+        for k, vlist in pairs(data.tags) do
+            tags[k] = {}
+            for _, v in pairs(vlist) do
+                tags[k][v] = true
+            end
+        end
+        tests[#tests + 1] = function (k, v)
+            return tags[k] ~= nil and tags[k][v] ~= nil
+        end
+    end
+
+    return function (k, v)
+        for _, func in pairs(tests) do
+            if func(k, v) then
+                return true
+            end
+        end
+        return false
+    end
+end
+
+
+-- Process functions for all data types
+function osm2pgsql.process_node(object)
+
+    local function geom_func(o)
+        return o:as_point()
+    end
+
+    process_tags(Place.new(object, geom_func))
+end
+
+function osm2pgsql.process_way(object)
+
+    local function geom_func(o)
+        local geom = o:as_polygon()
+
+        if geom:is_null() then
+            geom = o:as_linestring()
+        end
+
+        return geom
+    end
+
+    process_tags(Place.new(object, geom_func))
+end
+
+function relation_as_multipolygon(o)
+    return o:as_multipolygon()
+end
+
+function relation_as_multiline(o)
+    return o:as_multilinestring():line_merge()
+end
+
+function osm2pgsql.process_relation(object)
+    local geom_func = RELATION_TYPES[object.tags.type]
+
+    if geom_func ~= nil then
+        process_tags(Place.new(object, geom_func))
+    end
+end
+
+function process_tags(o)
+    local fallback
+
+    o:delete{match = PRE_DELETE}
+    o:grab_extratags{match = PRE_EXTRAS}
+
+    -- Exception for boundary/place double tagging
+    if o.object.tags.boundary == 'administrative' then
+        o:grab_extratags{match = function (k, v)
+            return k == 'place' and v:sub(1,3) ~= 'isl'
+        end}
+    end
+
+    -- address keys
+    o:grab_address{match=function (k, v) return COUNTRY_TAGS(k, v) and #v == 2 end,
+                   out_key='country'}
+    if o:grab_name{match=HOUSENAME_TAGS} > 0 then
+        fallback = {'place', 'house'}
+    end
+    if o:grab_address{match=HOUSENUMBER_TAGS, include_on_name = true} > 0 and fallback == nil then
+        fallback = {'place', 'house'}
+    end
+    if o:grab_address{match=POSTCODES, out_key='postcode'} > 0 and fallback == nil then
+        fallback = {'place', 'postcode'}
+    end
+
+    local is_interpolation = o:grab_address{match=INTERPOLATION_TAGS} > 0
+
+    if ADD_TIGER_COUNTY then
+        local v = o:grab_tag('tiger:county')
+        if v ~= nil then
+            v, num = v:gsub(',.*', ' county')
+            if num == 0 then
+                v = v .. ' county'
+            end
+            o:set_address('tiger:county', v)
+        end
+    end
+    o:grab_address{match=ADDRESS_TAGS}
+
+    if is_interpolation then
+        o:write_place('place', 'houses', 'always', SAVE_EXTRA_MAINS)
+        return
+    end
+
+    -- name keys
+    o:grab_name{match = NAMES}
+    o:grab_name{match = REFS, include_on_name = false}
+
+    o:delete{match = POST_DELETE}
+    o:grab_extratags{match = POST_EXTRAS}
+
+    -- collect main keys
+    local num_mains = 0
+    for k, v in pairs(o:tags()) do
+        num_mains = num_mains + o:write_place(k, v, MAIN_KEYS[k], SAVE_EXTRA_MAINS)
+    end
+
+    if num_mains == 0 then
+        for tag, mtype in pairs(MAIN_FALLBACK_KEYS) do
+            if o:write_place(tag, nil, mtype, SAVE_EXTRA_MAINS) > 0 then
+                return
+            end
+        end
+
+        if fallback ~= nil then
+            o:write_place(fallback[1], fallback[2], 'always', SAVE_EXTRA_MAINS)
+        end
+    end
+end
+
+
diff --git a/settings/import-extratags.lua b/settings/import-extratags.lua
new file mode 100644 (file)
index 0000000..535af3c
--- /dev/null
@@ -0,0 +1,130 @@
+require('flex-base')
+
+RELATION_TYPES = {
+    multipolygon = relation_as_multipolygon,
+    boundary = relation_as_multipolygon,
+    waterway = relation_as_multiline
+}
+
+MAIN_KEYS = {
+    emergency = 'always',
+    historic = 'always',
+    military = 'always',
+    natural = 'named',
+    landuse = 'named',
+    highway = {'always',
+               street_lamp = 'named',
+               traffic_signals = 'named',
+               service = 'named',
+               cycleway = 'named',
+               path = 'named',
+               footway = 'named',
+               steps = 'named',
+               bridleway = 'named',
+               track = 'named',
+               motorway_link = 'named',
+               trunk_link = 'named',
+               primary_link = 'named',
+               secondary_link = 'named',
+               tertiary_link = 'named'},
+    railway = 'named',
+    man_made = 'always',
+    aerialway = 'always',
+    boundary = {'named',
+                postal_code = 'named'},
+    aeroway = 'always',
+    amenity = 'always',
+    club = 'always',
+    craft = 'always',
+    leisure = 'always',
+    office = 'always',
+    mountain_pass = 'always',
+    shop = 'always',
+    tourism = 'always',
+    bridge = 'named_with_key',
+    tunnel = 'named_with_key',
+    waterway = 'named',
+    place = 'always'
+}
+
+MAIN_FALLBACK_KEYS = {
+    building = 'named',
+    landuse = 'named',
+    junction = 'named',
+    healthcare = 'named'
+}
+
+
+PRE_DELETE = tag_match{keys = {'note', 'note:*', 'source', 'source*', 'attribution',
+                               'comment', 'fixme', 'FIXME', 'created_by', 'NHD:*',
+                               'nhd:*', 'gnis:*', 'geobase:*', 'KSJ2:*', 'yh:*',
+                               'osak:*', 'naptan:*', 'CLC:*', 'import', 'it:fvg:*',
+                               'type', 'lacounty:*', 'ref:ruian:*', 'building:ruian:type',
+                               'ref:linz:*', 'is_in:postcode'},
+                       tags = {emergency = {'yes', 'no', 'fire_hydrant'},
+                               historic = {'yes', 'no'},
+                               military = {'yes', 'no'},
+                               natural = {'yes', 'no', 'coastline'},
+                               highway = {'no', 'turning_circle', 'mini_roundabout',
+                                          'noexit', 'crossing', 'give_way', 'stop'},
+                               railway = {'level_crossing', 'no', 'rail'},
+                               man_made = {'survey_point', 'cutline'},
+                               aerialway = {'pylon', 'no'},
+                               aeroway = {'no'},
+                               amenity = {'no'},
+                               club = {'no'},
+                               craft = {'no'},
+                               leisure = {'no'},
+                               office = {'no'},
+                               mountain_pass = {'no'},
+                               shop = {'no'},
+                               tourism = {'yes', 'no'},
+                               bridge = {'no'},
+                               tunnel = {'no'},
+                               waterway = {'riverbank'},
+                               building = {'no'},
+                               boundary = {'place'}}
+                      }
+
+POST_DELETE = tag_match{keys = {'tiger:*'}}
+
+PRE_EXTRAS = tag_match{keys = {'*:prefix', '*:suffix', 'name:prefix:*', 'name:suffix:*',
+                               'name:etymology', 'name:signed', 'name:botanical',
+                               'wikidata', '*:wikidata',
+                               'addr:street:name', 'addr:street:type'}
+                      }
+
+
+NAMES = tag_match{keys = {'name', 'name:*',
+                          'int_name', 'int_name:*',
+                          'nat_name', 'nat_name:*',
+                          'reg_name', 'reg_name:*',
+                          'loc_name', 'loc_name:*',
+                          'old_name', 'old_name:*',
+                          'alt_name', 'alt_name:*', 'alt_name_*',
+                          'official_name', 'official_name:*',
+                          'place_name', 'place_name:*',
+                          'short_name', 'short_name:*', 'brand'}}
+
+REFS = tag_match{keys = {'ref', 'int_ref', 'nat_ref', 'reg_ref', 'loc_ref', 'old_ref',
+                         'iata', 'icao', 'pcode', 'pcode:*', 'ISO3166-2'}}
+
+POSTCODES = tag_match{keys = {'postal_code', 'postcode', 'addr:postcode',
+                              'tiger:zip_left', 'tiger:zip_right'}}
+
+COUNTRY_TAGS = tag_match{keys = {'country_code', 'ISO3166-1',
+                                 'addr:country_code', 'is_in:country_code',
+                                 'addr:country', 'is_in:country'}}
+
+HOUSENAME_TAGS = tag_match{keys = {'addr:housename'}}
+
+HOUSENUMBER_TAGS = tag_match{keys = {'addr:housenumber', 'addr:conscriptionnumber',
+                                     'addr:streetnumber'}}
+
+INTERPOLATION_TAGS = tag_match{keys = {'addr:interpolation'}}
+
+ADDRESS_TAGS = tag_match{keys = {'addr:*', 'is_in:*'}}
+ADD_TIGER_COUNTY = true
+
+SAVE_EXTRA_MAINS = true
+
index 8c3d5c139a1ac90d60ff26bd4b9d83fc4a2e4965..83d7fe52e7d48309e825ff36e991c253e236fb33 100644 (file)
@@ -106,10 +106,10 @@ Feature: Tag evaluation
             n7002 Thighway=primary,bridge=yes,bridge:name=1
             """
         Then place contains exactly
-            | object        | class   | type    | name        | extratags         |
-            | N7001         | highway | primary | 'name': '1' | -                 |
-            | N7002:highway | highway | primary | -           | 'bridge:name': '1'|
-            | N7002:bridge  | bridge  | yes     | 'name': '1' | 'bridge:name': '1'|
+            | object        | class   | type    | name        | extratags+bridge:name |
+            | N7001         | highway | primary | 'name': '1' | -                     |
+            | N7002:highway | highway | primary | -           | 1                     |
+            | N7002:bridge  | bridge  | yes     | 'name': '1' | 1                     |
 
 
     Scenario: Global fallback and skipping
@@ -153,13 +153,15 @@ Feature: Tag evaluation
             n10002 Tboundary=natural,place=city,name=B
             n10003 Tboundary=administrative,place=island,name=C
             """
-        Then place contains exactly
+        Then place contains
             | object          | class    | type           | extratags       |
             | N10001          | boundary | administrative | 'place': 'city' |
-            | N10002:boundary | boundary | natural        | - |
-            | N10002:place    | place    | city           | - |
-            | N10003:boundary | boundary | administrative | - |
-            | N10003:place    | place    | island         | - |
+        And place contains
+            | object          | class    | type           |
+            | N10002:boundary | boundary | natural        |
+            | N10002:place    | place    | city           |
+            | N10003:boundary | boundary | administrative |
+            | N10003:place    | place    | island         |
 
 
     Scenario: Shorten tiger:county tags
@@ -200,6 +202,6 @@ Feature: Tag evaluation
             n13002 Taddr:interpolation=even,place=city
             """
         Then place contains exactly
-            | object | class | type   | extratags       | address                 |
-            | N13001 | place | houses | -               | 'interpolation': 'odd'  |
-            | N13002 | place | houses | 'place': 'city' | 'interpolation': 'even' |
+            | object | class | type   | address                 |
+            | N13001 | place | houses | 'interpolation': 'odd'  |
+            | N13002 | place | houses | 'interpolation': 'even' |
index 6271f6b827dae37c9fb8423a0af016af7d19288e..0082bd081e3056445ea3dd6383fb3db4cea9611c 100644 (file)
@@ -18,6 +18,7 @@ def get_osm2pgsql_options(nominatim_env, fname, append):
                 osm2pgsql=str(nominatim_env.build_dir / 'osm2pgsql' / 'osm2pgsql'),
                 osm2pgsql_cache=50,
                 osm2pgsql_style=str(nominatim_env.get_test_config().get_import_style_file()),
+                osm2pgsql_style_path=nominatim_env.get_test_config().config_dir,
                 threads=1,
                 dsn=nominatim_env.get_libpq_dsn(),
                 flatnode_file='',