]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 5 Apr 2021 13:48:39 +0000 (15:48 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 5 Apr 2021 13:48:39 +0000 (15:48 +0200)
12 files changed:
docs/admin/Installation.md
lib-php/SearchDescription.php
lib-sql/functions/address_lookup.sql
lib-sql/functions/normalization.sql
lib-sql/functions/placex_triggers.sql
lib-sql/indices.sql
lib-sql/words.sql
nominatim/db/sql_preprocessor.py
nominatim/tools/migration.py
nominatim/version.py
test/bdd/db/query/normalization.feature
test/php/Nominatim/TokenListTest.php

index 6237a9d4a37b9026c4d1dd3938a6f3cb76e945b1..32fa8caad86d1e91a31f187766d66fc2a35a9f1e 100644 (file)
@@ -37,7 +37,7 @@ For compiling:
 
 For running Nominatim:
 
-  * [PostgreSQL](https://www.postgresql.org) (9.3+)
+  * [PostgreSQL](https://www.postgresql.org) (9.3+ will work, 11+ strongly recommended)
   * [PostGIS](https://postgis.net) (2.2+)
   * [Python 3](https://www.python.org/) (3.5+)
   * [Psycopg2](https://www.psycopg.org) (2.7+)
index 2b39443f6e79f1b86b1571520ac0c1d4369cf6bc..dd20550214325b952452ec98de3bf4a96351071e 100644 (file)
@@ -621,7 +621,7 @@ class SearchDescription
             $aOrder[0] .= '  SELECT place_id';
             $aOrder[0] .= '  FROM placex';
             $aOrder[0] .= '  WHERE parent_place_id = search_name.place_id';
-            $aOrder[0] .= "    AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'";
+            $aOrder[0] .= "    AND housenumber ~* E'".$sHouseNumberRegex."'";
             $aOrder[0] .= '  LIMIT 1';
             $aOrder[0] .= ') ';
             // also housenumbers from interpolation lines table are needed
@@ -751,7 +751,7 @@ class SearchDescription
         $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
         $sSQL = 'SELECT place_id FROM placex ';
         $sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')';
-        $sSQL .= "  AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'";
+        $sSQL .= "  AND housenumber ~* E'".$sHouseNumberRegex."'";
         $sSQL .= $this->oContext->excludeSQL(' AND place_id');
 
         Debug::printSQL($sSQL);
index 3206e8dbe4ba3f2f7832621a53f16168afb6946c..03b0ea54d9f68105027e22ab3725f936b74fe1c3 100644 (file)
@@ -164,7 +164,10 @@ BEGIN
   -- POI objects in the placex table
   IF place IS NULL THEN
     SELECT parent_place_id as place_id, country_code,
-           housenumber, postcode,
+           coalesce(address->'housenumber',
+                    address->'streetnumber',
+                    address->'conscriptionnumber')::text as housenumber,
+           postcode,
            class, type,
            name, address,
            centroid
@@ -178,7 +181,7 @@ BEGIN
   -- place we should be using instead.
   IF place IS NULL THEN
     select coalesce(linked_place_id, place_id) as place_id,  country_code,
-           housenumber, postcode,
+           null::text as housenumber, postcode,
            class, type,
            null as name, address,
            null as centroid
index 6fcdf55250511077b74366651f062aac45125c55..f283f9165d9e324590ecbdaaa5fc9fc872651e16 100644 (file)
@@ -47,6 +47,25 @@ END;
 $$
 LANGUAGE plpgsql;
 
+-- Create housenumber tokens from an OSM addr:housenumber.
+-- The housnumber is split at comma and semicolon as necessary.
+-- The function returns the normalized form of the housenumber suitable
+-- for comparison.
+CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
+  RETURNS TEXT
+  AS $$
+DECLARE
+  normtext TEXT;
+BEGIN
+  SELECT array_to_string(array_agg(trans), ';')
+    INTO normtext
+    FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
+          FROM (SELECT make_standard_name(h) as lookup_word
+                FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
+
+  return normtext;
+END;
+$$ LANGUAGE plpgsql STABLE STRICT;
 
 CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
   RETURNS INTEGER
index 086ba9300f8145d317b7e915894aa4c514f9864d..6998224e7b851893e590d9f63ca47ca6acd1b18e 100644 (file)
@@ -666,20 +666,17 @@ BEGIN
   NEW.housenumber := NULL;
   IF NEW.address is not NULL THEN
       IF NEW.address ? 'conscriptionnumber' THEN
-        i := getorcreate_housenumber_id(make_standard_name(NEW.address->'conscriptionnumber'));
         IF NEW.address ? 'streetnumber' THEN
-            i := getorcreate_housenumber_id(make_standard_name(NEW.address->'streetnumber'));
             NEW.housenumber := (NEW.address->'conscriptionnumber') || '/' || (NEW.address->'streetnumber');
         ELSE
             NEW.housenumber := NEW.address->'conscriptionnumber';
         END IF;
       ELSEIF NEW.address ? 'streetnumber' THEN
         NEW.housenumber := NEW.address->'streetnumber';
-        i := getorcreate_housenumber_id(make_standard_name(NEW.address->'streetnumber'));
       ELSEIF NEW.address ? 'housenumber' THEN
         NEW.housenumber := NEW.address->'housenumber';
-        i := getorcreate_housenumber_id(make_standard_name(NEW.housenumber));
       END IF;
+      NEW.housenumber := create_housenumber_id(NEW.housenumber);
 
       addr_street := NEW.address->'street';
       addr_place := NEW.address->'place';
index f8c9d2ce86511ef993231507883aa64998ef2eee..c121a9631b84b5d758dc552f9d45e84695bdc8be 100644 (file)
@@ -61,4 +61,11 @@ CREATE INDEX {{sql.if_index_not_exists}} idx_postcode_postcode
     ON search_name USING GIN (name_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
   CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_centroid
     ON search_name USING GIST (centroid) {{db.tablespace.search_index}};
+
+  {% if postgres.has_index_non_key_column %}
+    CREATE INDEX {{sql.if_index_not_exists}} idx_placex_housenumber
+      ON placex USING btree (parent_place_id) INCLUDE (housenumber) WHERE housenumber is not null;
+    CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_osm_id_with_hnr
+      ON location_property_osmline USING btree(parent_place_id) INCLUDE (startnumber, endnumber);
+  {% endif %}
 {% endif %}
index ac379221c69def092639b340fbe5659b2332443e..8be178142f0d272b9d8bf6bc081b9f8341b10f7f 100644 (file)
@@ -5,7 +5,7 @@ CREATE TABLE word_frequencies AS
  GROUP BY id);
 
 select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null;
-select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
+select count(create_housenumber_id(v)) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
 
 -- copy the word frequencies
 update word set search_name_count = count from word_frequencies wf where wf.id = word.word_id;
index 852447525ab26fcb0dc80edf69bfc5096b9c2368..7ffe88818ba94a9538a9f8a61e7e8bb658e55e00 100644 (file)
@@ -49,12 +49,21 @@ def _setup_postgres_sql(conn):
     pg_version = conn.server_version_tuple()
     # CREATE INDEX IF NOT EXISTS was introduced in PG9.5.
     # Note that you need to ignore failures on older versions when
-    # unsing this construct.
+    # using this construct.
     out['if_index_not_exists'] = ' IF NOT EXISTS ' if pg_version >= (9, 5, 0) else ''
 
     return out
 
 
+def _setup_postgresql_features(conn):
+    """ Set up a dictionary with various optional Postgresql/Postgis features that
+        depend on the database version.
+    """
+    pg_version = conn.server_version_tuple()
+    return {
+        'has_index_non_key_column' : pg_version >= (11, 0, 0)
+    }
+
 class SQLPreprocessor: # pylint: disable=too-few-public-methods
     """ A environment for preprocessing SQL files from the
         lib-sql directory.
@@ -79,6 +88,7 @@ class SQLPreprocessor: # pylint: disable=too-few-public-methods
         self.env.globals['config'] = config
         self.env.globals['db'] = db_info
         self.env.globals['sql'] = _setup_postgres_sql(conn)
+        self.env.globals['postgres'] = _setup_postgresql_features(conn)
         self.env.globals['modulepath'] = config.DATABASE_MODULE_PATH or \
                                          str((config.project_dir / 'module').resolve())
 
index 756a1d3a903326a8540dcbee9ee3b545bb11d843..b5f0b80e7786328955c910e7b58ffa6b7190c71c 100644 (file)
@@ -130,3 +130,29 @@ def add_nominatim_property_table(conn, config, **_):
                                value TEXT);
                            GRANT SELECT ON TABLE nominatim_properties TO "{}";
                         """.format(config.DATABASE_WEBUSER))
+
+@_migration(3, 6, 0, 0)
+def change_housenumber_transliteration(conn, **_):
+    """ Transliterate housenumbers.
+
+        The database schema switched from saving raw housenumbers in
+        placex.housenumber to saving transliterated ones.
+    """
+    with conn.cursor() as cur:
+        cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
+                       RETURNS TEXT AS $$
+                       DECLARE
+                         normtext TEXT;
+                       BEGIN
+                         SELECT array_to_string(array_agg(trans), ';')
+                           INTO normtext
+                           FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
+                                 FROM (SELECT make_standard_name(h) as lookup_word
+                                       FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
+                         return normtext;
+                       END;
+                       $$ LANGUAGE plpgsql STABLE STRICT;""")
+        cur.execute("DELETE FROM word WHERE class = 'place' and type = 'house'")
+        cur.execute("""UPDATE placex
+                       SET housenumber = create_housenumber_id(housenumber)
+                       WHERE housenumber is not null""")
index e7f31a12b5ab4dcebcfed07e9a32effdb1e716c4..352e6e55d79c39e455db9d003cd863f2bc337ad9 100644 (file)
@@ -10,7 +10,7 @@ Version information for Nominatim.
 # and must always be increased when there is a change to the database or code
 # that requires a migration.
 # Released versions always have a database patch level of 0.
-NOMINATIM_VERSION = (3, 6, 0, 0)
+NOMINATIM_VERSION = (3, 6, 0, 1)
 
 POSTGRESQL_REQUIRED_VERSION = (9, 3)
 POSTGIS_REQUIRED_VERSION = (2, 2)
index 3205264753f1c67cef5a0beefb2af0e5425df293..8a324a229b86bfaa450a35d5d095bed647d483d3 100644 (file)
@@ -137,7 +137,7 @@ Feature: Import and search of names
          | ID | osm_type | osm_id |
          | 0  | R        | 1 |
 
-     Scenario: Unprintable characters in postcodes are ignored
+    Scenario: Unprintable characters in postcodes are ignored
         Given the named places
             | osm  | class   | type   | address |
             | N234 | amenity | prison | 'postcode' : u'1234\u200e' |
@@ -146,3 +146,60 @@ Feature: Import and search of names
         Then results contain
          | ID | osm_type |
          | 0  | P        |
+
+    Scenario Outline: Housenumbers with special characters are found
+        Given the grid
+            | 1 |  |   |  | 2 |
+            |   |  | 9 |  |   |
+        And the places
+            | osm | class   | type    | name    | geometry |
+            | W1  | highway | primary | Main St | 1,2      |
+        And the places
+            | osm | class    | type | housenr | geometry |
+            | N1  | building | yes  | <nr>    | 9        |
+        When importing
+        And searching for "Main St <nr>"
+        Then results contain
+         | osm_type | osm_id | name |
+         | N        | 1      | <nr>, Main St |
+
+    Examples:
+        | nr |
+        | 1  |
+        | 3456 |
+        | 1 a |
+        | 56b |
+        | 1 A |
+        | 2號 |
+        | 1Б  |
+        | 1 к1 |
+        | 23-123 |
+
+    Scenario Outline: Housenumbers in lists are found
+        Given the grid
+            | 1 |  |   |  | 2 |
+            |   |  | 9 |  |   |
+        And the places
+            | osm | class   | type    | name    | geometry |
+            | W1  | highway | primary | Main St | 1,2      |
+        And the places
+            | osm | class    | type | housenr   | geometry |
+            | N1  | building | yes  | <nr-list> | 9        |
+        When importing
+        And searching for "Main St <nr>"
+        Then results contain
+         | osm_type | osm_id | name |
+         | N        | 1      | <nr-list>, Main St |
+
+    Examples:
+        | nr-list    | nr |
+        | 1,2,3      | 1  |
+        | 1,2,3      | 2  |
+        | 1, 2, 3    | 3  |
+        | 45 ;67;3   | 45 |
+        | 45 ;67;3   | 67 |
+        | 1a;1k      | 1a |
+        | 1a;1k      | 1k |
+        | 34/678     | 34 |
+        | 34/678     | 678 |
+        | 34/678     | 34/678 |
index 3ef4d84d715df5bc4ecc8776a5986190dcff498d..14a595ea9f3c115da1e8063e3436739174699e18 100644 (file)
@@ -77,6 +77,15 @@ class TokenTest extends \PHPUnit\Framework\TestCase
                                                          'type' => 'house'
                                                         ));
                     }
+                    if (preg_match('/hauptstr/', $sql)) {
+                        $aResults[] = $this->wordResult(array(
+                                                         'word_id' => 999,
+                                                         'word_token' => 'hauptstr',
+                                                         'class' => 'place',
+                                                         'type' => 'street',
+                                                         'operator' => true
+                                                        ));
+                    }
                     if (preg_match('/64286/', $sql)) {
                         $aResults[] = $this->wordResult(array(
                                                          'word_id' => 999,
@@ -116,11 +125,12 @@ class TokenTest extends \PHPUnit\Framework\TestCase
 
         $TL = new TokenList;
         $TL->addTokensFromDB($oDbStub, $aTokens, $aCountryCodes, $sNormQuery, $this->oNormalizer);
-        $this->assertEquals(4, $TL->count());
+        $this->assertEquals(5, $TL->count());
 
         $this->assertEquals(array(new Token\HouseNumber(999, '1051')), $TL->get('1051'));
         $this->assertEquals(array(new Token\Country(999, 'de')), $TL->get('alemagne'));
         $this->assertEquals(array(new Token\Postcode(999, '64286')), $TL->get('64286'));
         $this->assertEquals(array(new Token\Word(999, true, 533, 0)), $TL->get('darmstadt'));
+        $this->assertEquals(array(new Token\SpecialTerm(999, 'place', 'street', true)), $TL->get('hauptstr'));
     }
 }