From: Sarah Hoffmann Date: Mon, 5 Apr 2021 13:48:22 +0000 (+0200) Subject: Merge pull request #2250 from lonvia/save-transliterated-housenumbers X-Git-Tag: v3.7.0~3 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/5d69c7ade1938818484bb81f36ec12c64e79bf9d?hp=c0f0b665091dfacd29727efed5efbe111c5900bf Merge pull request #2250 from lonvia/save-transliterated-housenumbers Switch to saving transliterated housenumbers in placex --- diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index 2b39443f..dd205502 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -621,7 +621,7 @@ class SearchDescription $aOrder[0] .= ' SELECT place_id'; $aOrder[0] .= ' FROM placex'; $aOrder[0] .= ' WHERE parent_place_id = search_name.place_id'; - $aOrder[0] .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'"; + $aOrder[0] .= " AND housenumber ~* E'".$sHouseNumberRegex."'"; $aOrder[0] .= ' LIMIT 1'; $aOrder[0] .= ') '; // also housenumbers from interpolation lines table are needed @@ -751,7 +751,7 @@ class SearchDescription $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M'; $sSQL = 'SELECT place_id FROM placex '; $sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')'; - $sSQL .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'"; + $sSQL .= " AND housenumber ~* E'".$sHouseNumberRegex."'"; $sSQL .= $this->oContext->excludeSQL(' AND place_id'); Debug::printSQL($sSQL); diff --git a/lib-sql/functions/address_lookup.sql b/lib-sql/functions/address_lookup.sql index f49bc93e..5ec977d1 100644 --- a/lib-sql/functions/address_lookup.sql +++ b/lib-sql/functions/address_lookup.sql @@ -164,7 +164,10 @@ BEGIN -- POI objects in the placex table IF place IS NULL THEN SELECT parent_place_id as place_id, country_code, - housenumber, postcode, + coalesce(address->'housenumber', + address->'streetnumber', + address->'conscriptionnumber')::text as housenumber, + postcode, class, type, name, address, centroid @@ -178,7 +181,7 @@ BEGIN -- place we should be using instead. IF place IS NULL THEN select coalesce(linked_place_id, place_id) as place_id, country_code, - housenumber, postcode, + null::text as housenumber, postcode, class, type, null as name, address, null as centroid diff --git a/lib-sql/functions/normalization.sql b/lib-sql/functions/normalization.sql index 6fcdf552..f283f916 100644 --- a/lib-sql/functions/normalization.sql +++ b/lib-sql/functions/normalization.sql @@ -47,6 +47,25 @@ END; $$ LANGUAGE plpgsql; +-- Create housenumber tokens from an OSM addr:housenumber. +-- The housnumber is split at comma and semicolon as necessary. +-- The function returns the normalized form of the housenumber suitable +-- for comparison. +CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT) + RETURNS TEXT + AS $$ +DECLARE + normtext TEXT; +BEGIN + SELECT array_to_string(array_agg(trans), ';') + INTO normtext + FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) + FROM (SELECT make_standard_name(h) as lookup_word + FROM regexp_split_to_table(housenumber, '[,;]') h) x) y; + + return normtext; +END; +$$ LANGUAGE plpgsql STABLE STRICT; CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT) RETURNS INTEGER diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql index 086ba930..6998224e 100644 --- a/lib-sql/functions/placex_triggers.sql +++ b/lib-sql/functions/placex_triggers.sql @@ -666,20 +666,17 @@ BEGIN NEW.housenumber := NULL; IF NEW.address is not NULL THEN IF NEW.address ? 'conscriptionnumber' THEN - i := getorcreate_housenumber_id(make_standard_name(NEW.address->'conscriptionnumber')); IF NEW.address ? 'streetnumber' THEN - i := getorcreate_housenumber_id(make_standard_name(NEW.address->'streetnumber')); NEW.housenumber := (NEW.address->'conscriptionnumber') || '/' || (NEW.address->'streetnumber'); ELSE NEW.housenumber := NEW.address->'conscriptionnumber'; END IF; ELSEIF NEW.address ? 'streetnumber' THEN NEW.housenumber := NEW.address->'streetnumber'; - i := getorcreate_housenumber_id(make_standard_name(NEW.address->'streetnumber')); ELSEIF NEW.address ? 'housenumber' THEN NEW.housenumber := NEW.address->'housenumber'; - i := getorcreate_housenumber_id(make_standard_name(NEW.housenumber)); END IF; + NEW.housenumber := create_housenumber_id(NEW.housenumber); addr_street := NEW.address->'street'; addr_place := NEW.address->'place'; diff --git a/lib-sql/words.sql b/lib-sql/words.sql index ac379221..8be17814 100644 --- a/lib-sql/words.sql +++ b/lib-sql/words.sql @@ -5,7 +5,7 @@ CREATE TABLE word_frequencies AS GROUP BY id); select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null; -select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w; +select count(create_housenumber_id(v)) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w; -- copy the word frequencies update word set search_name_count = count from word_frequencies wf where wf.id = word.word_id; diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index 756a1d3a..b5f0b80e 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -130,3 +130,29 @@ def add_nominatim_property_table(conn, config, **_): value TEXT); GRANT SELECT ON TABLE nominatim_properties TO "{}"; """.format(config.DATABASE_WEBUSER)) + +@_migration(3, 6, 0, 0) +def change_housenumber_transliteration(conn, **_): + """ Transliterate housenumbers. + + The database schema switched from saving raw housenumbers in + placex.housenumber to saving transliterated ones. + """ + with conn.cursor() as cur: + cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT) + RETURNS TEXT AS $$ + DECLARE + normtext TEXT; + BEGIN + SELECT array_to_string(array_agg(trans), ';') + INTO normtext + FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) + FROM (SELECT make_standard_name(h) as lookup_word + FROM regexp_split_to_table(housenumber, '[,;]') h) x) y; + return normtext; + END; + $$ LANGUAGE plpgsql STABLE STRICT;""") + cur.execute("DELETE FROM word WHERE class = 'place' and type = 'house'") + cur.execute("""UPDATE placex + SET housenumber = create_housenumber_id(housenumber) + WHERE housenumber is not null""") diff --git a/nominatim/version.py b/nominatim/version.py index e7f31a12..352e6e55 100644 --- a/nominatim/version.py +++ b/nominatim/version.py @@ -10,7 +10,7 @@ Version information for Nominatim. # and must always be increased when there is a change to the database or code # that requires a migration. # Released versions always have a database patch level of 0. -NOMINATIM_VERSION = (3, 6, 0, 0) +NOMINATIM_VERSION = (3, 6, 0, 1) POSTGRESQL_REQUIRED_VERSION = (9, 3) POSTGIS_REQUIRED_VERSION = (2, 2) diff --git a/test/bdd/db/query/normalization.feature b/test/bdd/db/query/normalization.feature index 32052647..8a324a22 100644 --- a/test/bdd/db/query/normalization.feature +++ b/test/bdd/db/query/normalization.feature @@ -137,7 +137,7 @@ Feature: Import and search of names | ID | osm_type | osm_id | | 0 | R | 1 | - Scenario: Unprintable characters in postcodes are ignored + Scenario: Unprintable characters in postcodes are ignored Given the named places | osm | class | type | address | | N234 | amenity | prison | 'postcode' : u'1234\u200e' | @@ -146,3 +146,60 @@ Feature: Import and search of names Then results contain | ID | osm_type | | 0 | P | + + Scenario Outline: Housenumbers with special characters are found + Given the grid + | 1 | | | | 2 | + | | | 9 | | | + And the places + | osm | class | type | name | geometry | + | W1 | highway | primary | Main St | 1,2 | + And the places + | osm | class | type | housenr | geometry | + | N1 | building | yes | | 9 | + When importing + And searching for "Main St " + Then results contain + | osm_type | osm_id | name | + | N | 1 | , Main St | + + Examples: + | nr | + | 1 | + | 3456 | + | 1 a | + | 56b | + | 1 A | + | 2號 | + | 1Б | + | 1 к1 | + | 23-123 | + + Scenario Outline: Housenumbers in lists are found + Given the grid + | 1 | | | | 2 | + | | | 9 | | | + And the places + | osm | class | type | name | geometry | + | W1 | highway | primary | Main St | 1,2 | + And the places + | osm | class | type | housenr | geometry | + | N1 | building | yes | | 9 | + When importing + And searching for "Main St " + Then results contain + | osm_type | osm_id | name | + | N | 1 | , Main St | + + Examples: + | nr-list | nr | + | 1,2,3 | 1 | + | 1,2,3 | 2 | + | 1, 2, 3 | 3 | + | 45 ;67;3 | 45 | + | 45 ;67;3 | 67 | + | 1a;1k | 1a | + | 1a;1k | 1k | + | 34/678 | 34 | + | 34/678 | 678 | + | 34/678 | 34/678 |