From: Sarah Hoffmann Date: Wed, 23 Sep 2020 07:21:09 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~213 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/b1b25d9a10ea7b97e12effc4313f140c17730c78?hp=61e3a053723a6a1131e234aa8ad1a37f18bbec56 Merge remote-tracking branch 'upstream/master' --- diff --git a/docs/admin/Installation.md b/docs/admin/Installation.md index b9705eb5..8ca41e8f 100644 --- a/docs/admin/Installation.md +++ b/docs/admin/Installation.md @@ -37,9 +37,9 @@ For compiling: For running Nominatim: * [PostgreSQL](https://www.postgresql.org) (9.3+) - * [PostGIS](https://postgis.org) (2.2+) + * [PostGIS](https://postgis.net) (2.2+) * [Python 3](https://www.python.org/) - * [Psycopg2](https://initd.org/psycopg) + * [Psycopg2](https://www.psycopg.org) * [PHP](https://php.net) (7.0 or later) * PHP-pgsql * PHP-intl (bundled with PHP) diff --git a/docs/develop/Development-Environment.md b/docs/develop/Development-Environment.md index 2ebaaedc..4f790641 100644 --- a/docs/develop/Development-Environment.md +++ b/docs/develop/Development-Environment.md @@ -28,8 +28,8 @@ following packages should get you started: The Nominatim tests suite consists of behavioural tests (using behave) and unit tests (using PHPUnit). It has the following additional requirements: -* [behave test framework](https://github.com/behave/behave) >= 1.2.5 -* [nose](https://nose.readthedocs.org) +* [behave test framework](https://behave.readthedocs.io) >= 1.2.5 +* [nose](https://nose.readthedocs.io) * [pytidylib](http://countergram.com/open-source/pytidylib) * [phpunit](https://phpunit.de) >= 7.3 * [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer) diff --git a/lib/Geocode.php b/lib/Geocode.php index 3d6838b6..7ab53d97 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -642,12 +642,6 @@ class Geocode $oValidTokens = new TokenList(); if (!empty($aTokens)) { - $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count'; - $sSQL .= ' FROM word '; - $sSQL .= ' WHERE word_token in ('.join(',', $this->oDB->getDBQuotedList($aTokens)).')'; - - Debug::printSQL($sSQL); - $oValidTokens->addTokensFromDB( $this->oDB, $aTokens, diff --git a/settings/import-address.style b/settings/import-address.style index 16f2e929..a2708086 100644 --- a/settings/import-address.style +++ b/settings/import-address.style @@ -5,7 +5,7 @@ "no" : "skip" } }, -{ "keys" : ["wikipedia", "wikipedia:*", "wikidata"], +{ "keys" : ["wikipedia", "wikipedia:*", "wikidata", "area"], "values" : { "" : "extra" } diff --git a/settings/import-extratags.style b/settings/import-extratags.style index b90a07b1..63c4a7ae 100644 --- a/settings/import-extratags.style +++ b/settings/import-extratags.style @@ -211,7 +211,7 @@ } }, { - "keys" : ["addr:*", "is_in:*", "tiger:county", "is_in"], + "keys" : ["addr:*", "is_in:*", "tiger:county"], "values" : { "" : "address" } diff --git a/settings/import-full.style b/settings/import-full.style index c57c2c3a..6728bd57 100644 --- a/settings/import-full.style +++ b/settings/import-full.style @@ -211,7 +211,7 @@ } }, { - "keys" : ["addr:*", "is_in:*", "tiger:county", "is_in"], + "keys" : ["addr:*", "is_in:*", "tiger:county"], "values" : { "" : "address" } @@ -237,7 +237,8 @@ "population", "description", "image", "attribution", "fax", "email", "url", "website", "phone", "real_ale", "smoking", "food", "camera", "brewery", "locality", "wikipedia", - "wikipedia:*", "access:*", "contact:*", "drink:*", "toll:*"], + "wikipedia:*", "access:*", "contact:*", "drink:*", "toll:*", + "area"], "values" : { "" : "extra" } diff --git a/settings/import-street.style b/settings/import-street.style index d2f0ae65..7f0c03b9 100644 --- a/settings/import-street.style +++ b/settings/import-street.style @@ -1,5 +1,5 @@ [ -{ "keys" : ["wikipedia", "wikipedia:*", "wikidata"], +{ "keys" : ["wikipedia", "wikipedia:*", "wikidata", "area"], "values" : { "" : "extra" } diff --git a/sql/functions/normalization.sql b/sql/functions/normalization.sql index 08087172..71b14ee5 100644 --- a/sql/functions/normalization.sql +++ b/sql/functions/normalization.sql @@ -412,3 +412,99 @@ BEGIN END; $$ LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION create_poi_search_terms(parent_place_id BIGINT, + address HSTORE, + housenumber TEXT, + initial_name_vector INTEGER[], + OUT name_vector INTEGER[], + OUT nameaddress_vector INTEGER[]) + AS $$ +DECLARE + parent_name_vector INTEGER[]; + parent_address_vector INTEGER[]; + addr_place_ids INTEGER[]; + + addr_item RECORD; +BEGIN + -- Compute all search terms from the addr: tags. + nameaddress_vector := '{}'::INTEGER[]; + + IF address IS NOT NULL THEN + FOR addr_item IN SELECT * FROM each(address) + LOOP + IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province', + 'district', 'region', 'county', 'municipality', + 'hamlet', 'village', 'subdistrict', 'town', + 'neighbourhood', 'quarter', 'parish') + THEN + nameaddress_vector := array_merge(nameaddress_vector, + addr_ids_from_name(addr_item.value)); + END IF; + END LOOP; + END IF; + + -- If the POI is named, simply mix in all address terms and be done. + IF array_length(initial_name_vector, 1) is not NULL THEN + -- Cheating here by not recomputing all terms but simply using the ones + -- from the parent object. + SELECT array_merge(s.name_vector, s.nameaddress_vector) + INTO parent_address_vector + FROM search_name s + WHERE s.place_id = parent_place_id; + + name_vector := initial_name_vector; + nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); + + IF not address ? 'street' and address ? 'place' THEN + -- make sure addr:place terms are always searchable + nameaddress_vector := array_merge(nameaddress_vector, + addr_ids_from_name(address->'place')); + END IF; + + RETURN; + END IF; + + ----- unnamed POIS + + IF (array_length(nameaddress_vector, 1) is null + and (address ? 'street'or not address ? 'place')) + or housenumber is null + THEN + RETURN; + END IF; + + SELECT s.name_vector, s.nameaddress_vector + INTO parent_name_vector, parent_address_vector + FROM search_name s + WHERE s.place_id = parent_place_id; + + -- Check if the parent covers all address terms. + -- If not, create a search name entry with the house number as the name. + -- This is unusual for the search_name table but prevents that the place + -- is returned when we only search for the street/place. + + IF not nameaddress_vector <@ parent_address_vector THEN + name_vector := ARRAY[getorcreate_name_id(housenumber)]; + END IF; + + IF not address ? 'street' and address ? 'place' THEN + addr_place_ids := addr_ids_from_name(address->'place'); + IF not addr_place_ids <@ parent_name_vector THEN + -- addr:place tag exists without a corresponding place. Mix in addr:place + -- in the address and drop the name from the parent. This would only be + -- the street name of the nearest street. + nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids); + name_vector := ARRAY[getorcreate_name_id(housenumber)]; + END IF; + ELSE + nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector); + END IF; + + -- The address vector always gets merged in. + nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); + +END; +$$ +LANGUAGE plpgsql; diff --git a/sql/functions/placex_triggers.sql b/sql/functions/placex_triggers.sql index a935b75a..2bc7efad 100644 --- a/sql/functions/placex_triggers.sql +++ b/sql/functions/placex_triggers.sql @@ -92,7 +92,16 @@ BEGIN END IF; IF fallback THEN - IF ST_Area(bbox) < 0.005 THEN + IF addr_street is null and addr_place is not null THEN + -- The address is attached to a place we don't know. Find the + -- nearest place instead. + FOR location IN + SELECT place_id FROM getNearFeatures(poi_partition, bbox, 26, '{}'::INTEGER[]) + ORDER BY rank_address DESC, isguess asc, distance LIMIT 1 + LOOP + parent_place_id := location.place_id; + END LOOP; + ELSEIF ST_Area(bbox) < 0.005 THEN -- for smaller features get the nearest road SELECT getNearestRoadPlaceId(poi_partition, bbox) INTO parent_place_id; --DEBUG: RAISE WARNING 'Checked for nearest way (%)', parent_place_id; @@ -224,7 +233,7 @@ LANGUAGE plpgsql STABLE; -- \param maxrank Rank of the place. All address features must have -- a search rank lower than the given rank. -- \param address Address terms for the place. --- \param geoemtry Geometry to which the address objects should be close. +-- \param geometry Geometry to which the address objects should be close. -- -- \retval parent_place_id Place_id of the address object that is the direct -- ancestor. @@ -281,23 +290,6 @@ BEGIN END IF; END IF; END LOOP; - - IF address ? 'is_in' THEN - -- is_in items need splitting - isin := regexp_split_to_array(address->'is_in', E'[;,]'); - IF array_upper(isin, 1) IS NOT NULL THEN - FOR i IN 1..array_upper(isin, 1) LOOP - isin_tokens := array_merge(isin_tokens, - word_ids_from_name(isin[i])); - - -- merge word into address vector - IF NOT %REVERSE-ONLY% THEN - nameaddress_vector := array_merge(nameaddress_vector, - addr_ids_from_name(isin[i])); - END IF; - END LOOP; - END IF; - END IF; END IF; IF NOT %REVERSE-ONLY% THEN nameaddress_vector := array_merge(nameaddress_vector, isin_tokens); @@ -417,7 +409,12 @@ BEGIN NEW.name := hstore('ref', NEW.address->'postcode'); - ELSEIF NEW.class = 'boundary' AND NOT is_area THEN + ELSEIF NEW.class = 'highway' AND is_area AND NEW.name is null + AND NEW.extratags ? 'area' AND NEW.extratags->'area' = 'yes' + THEN + RETURN NULL; + ELSEIF NEW.class = 'boundary' AND NOT is_area + THEN RETURN NULL; ELSEIF NEW.class = 'boundary' AND NEW.type = 'administrative' AND NEW.admin_level <= 4 AND NEW.osm_type = 'W' @@ -541,6 +538,9 @@ DECLARE name_vector INTEGER[]; nameaddress_vector INTEGER[]; + addr_nameaddress_vector INTEGER[]; + + inherited_address HSTORE; linked_node_id BIGINT; linked_importance FLOAT; @@ -643,7 +643,7 @@ BEGIN addr_street := NEW.address->'street'; addr_place := NEW.address->'place'; - IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(,|;)%' THEN + IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(:|,|;)%' THEN i := getorcreate_postcode_id(NEW.address->'postcode'); END IF; END IF; @@ -710,6 +710,7 @@ BEGIN -- if we have a POI and there is no address information, -- see if we can get it from a surrounding building + inherited_address := ''::HSTORE; IF NEW.osm_type = 'N' AND addr_street IS NULL AND addr_place IS NULL AND NEW.housenumber IS NULL THEN FOR location IN @@ -724,6 +725,7 @@ BEGIN NEW.housenumber := location.address->'housenumber'; addr_street := location.address->'street'; addr_place := location.address->'place'; + inherited_address := location.address; END LOOP; END IF; @@ -754,28 +756,26 @@ BEGIN NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry); END IF; - -- If there is no name it isn't searchable, don't bother to create a search record - IF NEW.name is NULL THEN - --DEBUG: RAISE WARNING 'Not a searchable place % %', NEW.osm_type, NEW.osm_id; - return NEW; - END IF; + IF NEW.name is not NULL THEN + NEW.name := add_default_place_name(NEW.country_code, NEW.name); + name_vector := make_keywords(NEW.name); - NEW.name := add_default_place_name(NEW.country_code, NEW.name); - name_vector := make_keywords(NEW.name); + IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, + name_vector, NEW.rank_search, NEW.rank_address, + upper(trim(NEW.address->'postcode')), NEW.geometry); + --DEBUG: RAISE WARNING 'Place added to location table'; + END IF; - -- Performance, it would be more acurate to do all the rest of the import - -- process but it takes too long - -- Just be happy with inheriting from parent road only - result := insertSearchName(NEW.partition, NEW.place_id, name_vector, - NEW.rank_search, NEW.rank_address, NEW.geometry); + END IF; IF NOT %REVERSE-ONLY% THEN - -- Merge address from parent - SELECT array_merge(s.name_vector, s.nameaddress_vector) - INTO nameaddress_vector - FROM search_name s - WHERE s.place_id = NEW.parent_place_id; + SELECT * INTO name_vector, nameaddress_vector + FROM create_poi_search_terms(NEW.parent_place_id, + inherited_address || NEW.address, + NEW.housenumber, name_vector); + IF array_length(name_vector, 1) is not NULL THEN INSERT INTO search_name (place_id, search_rank, address_rank, importance, country_code, name_vector, nameaddress_vector, centroid) @@ -784,8 +784,9 @@ BEGIN nameaddress_vector, NEW.centroid); --DEBUG: RAISE WARNING 'Place added to search table'; END IF; + END IF; - return NEW; + RETURN NEW; END IF; END IF; diff --git a/test/bdd/db/import/parenting.feature b/test/bdd/db/import/parenting.feature index 7974fe52..62d65cef 100644 --- a/test/bdd/db/import/parenting.feature +++ b/test/bdd/db/import/parenting.feature @@ -374,7 +374,7 @@ Feature: Parenting of objects | W1 | N4 | 3 | | N1 | W2 | None | | N2 | W3 | 4 | - | N3 | W2 | None | + | N3 | N4 | None | Scenario: POIs parent a road if they are attached to it Given the scene points-on-roads diff --git a/test/bdd/db/import/search_name.feature b/test/bdd/db/import/search_name.feature index 8006045f..3fda7ae8 100644 --- a/test/bdd/db/import/search_name.feature +++ b/test/bdd/db/import/search_name.feature @@ -2,7 +2,7 @@ Feature: Creation of search terms Tests that search_name table is filled correctly - Scenario: POIs without a name have no search entry + Scenario: Unnamed POIs have no search entry Given the scene roads-with-pois And the places | osm | class | type | geometry | @@ -13,6 +13,167 @@ Feature: Creation of search terms When importing Then search_name has no entry for N1 + Scenario: Unnamed POI has a search entry when it has unknown addr: tags + Given the scene roads-with-pois + And the places + | osm | class | type | housenr | addr+city | geometry | + | N1 | place | house | 23 | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + When importing + Then search_name contains + | object | name_vector | nameaddress_vector | + | N1 | #23 | Rose Street, Walltown | + When searching for "23 Rose Street, Walltown" + Then results contain + | osm_type | osm_id | + | N | 1 | + + Scenario: Unnamed POI has no search entry when it has known addr: tags + Given the scene roads-with-pois + And the places + | osm | class | type | housenr | addr+city | geometry | + | N1 | place | house | 23 | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | addr+city | geometry | + | W1 | highway | residential | Rose Street | Walltown | :w-north | + When importing + Then search_name has no entry for N1 + When searching for "23 Rose Street, Walltown" + Then results contain + | osm_type | osm_id | + | N | 1 | + + Scenario: Unnamed POI must have a house number to get a search entry + Given the scene roads-with-pois + And the places + | osm | class | type | addr+city | geometry | + | N1 | place | house | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + When importing + Then search_name has no entry for N1 + + Scenario: Unnamed POIs doesn't inherit parent name when unknown addr:place is present + Given the scene roads-with-pois + And the places + | osm | class | type | housenr | addr+place | geometry | + | N1 | place | house | 23 | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + | N2 | place | city | Strange Town | :p-N1 | + When importing + Then search_name contains + | object | name_vector | nameaddress_vector | + | N1 | #23 | Walltown | + When searching for "23 Rose Street" + Then exactly 1 results are returned + And results contain + | osm_type | osm_id | + | W | 1 | + When searching for "23 Walltown" + Then results contain + | osm_type | osm_id | + | N | 1 | + + Scenario: Unnamed POIs doesn't inherit parent name when addr:place is present only in parent address + Given the scene roads-with-pois + And the places + | osm | class | type | housenr | addr+place | geometry | + | N1 | place | house | 23 | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | addr+city | geometry | + | W1 | highway | residential | Rose Street | Walltown | :w-north | + | N2 | place | suburb | Strange Town | Walltown | :p-N1 | + When importing + Then search_name contains + | object | name_vector | nameaddress_vector | + | N1 | #23 | Walltown | + When searching for "23 Rose Street, Walltown" + Then exactly 1 result is returned + And results contain + | osm_type | osm_id | + | W | 1 | + When searching for "23 Walltown" + Then exactly 1 result is returned + And results contain + | osm_type | osm_id | + | N | 1 | + + Scenario: Unnamed POIs does inherit parent name when unknown addr:place and addr:street is present + Given the scene roads-with-pois + And the places + | osm | class | type | housenr | addr+place | addr+street | geometry | + | N1 | place | house | 23 | Walltown | Lily Street | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + When importing + Then search_name has no entry for N1 + When searching for "23 Rose Street" + Then results contain + | osm_type | osm_id | + | N | 1 | + When searching for "23 Lily Street" + Then exactly 0 results are returned + + Scenario: An unknown addr:street is ignored + Given the scene roads-with-pois + And the places + | osm | class | type | housenr | addr+street | geometry | + | N1 | place | house | 23 | Lily Street | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + When importing + Then search_name has no entry for N1 + When searching for "23 Rose Street" + Then results contain + | osm_type | osm_id | + | N | 1 | + When searching for "23 Lily Street" + Then exactly 0 results are returned + + Scenario: Named POIs have unknown address tags added in the search_name table + Given the scene roads-with-pois + And the places + | osm | class | type | name+name | addr+city | geometry | + | N1 | place | house | Green Moss | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + When importing + Then search_name contains + | object | name_vector | nameaddress_vector | + | N1 | #Green Moss | Rose Street, Walltown | + When searching for "Green Moss, Rose Street, Walltown" + Then results contain + | osm_type | osm_id | + | N | 1 | + + Scenario: Named POI doesn't inherit parent name when addr:place is present only in parent address + Given the scene roads-with-pois + And the places + | osm | class | type | name+name | addr+place | geometry | + | N1 | place | house | Green Moss | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + | N2 | place | suburb | Strange Town | :p-N1 | + When importing + Then search_name contains + | object | name_vector | nameaddress_vector | + | N1 | #Green Moss | Walltown | + When searching for "Green Moss, Rose Street, Walltown" + Then exactly 0 result is returned + When searching for "Green Moss, Walltown" + Then results contain + | osm_type | osm_id | + | N | 1 | + Scenario: Named POIs inherit address from parent Given the scene roads-with-pois And the places @@ -62,21 +223,6 @@ Feature: Creation of search terms | object | nameaddress_vector | | W1 | 12345 | - Scenario: is_in is split and added to the address search terms - Given the scene roads-with-pois - And the places - | osm | class | type | name | geometry | - | N1 | place | state | new york | 80 80 | - | N2 | place | city | bonn | 81 81 | - | N3 | place | suburb | smalltown| 80 81 | - And the named places - | osm | class | type | addr+is_in | geometry | - | W1 | highway | service | bonn, New York, Smalltown | :w-north | - When importing - Then search_name contains - | object | nameaddress_vector | - | W1 | bonn, new york, smalltown | - Scenario: a linked place does not show up in search name Given the named places | osm | class | type | admin | geometry | diff --git a/test/bdd/db/update/parenting.feature b/test/bdd/db/update/parenting.feature index 50a647ad..99199de4 100644 --- a/test/bdd/db/update/parenting.feature +++ b/test/bdd/db/update/parenting.feature @@ -9,8 +9,8 @@ Scenario: POI inside building inherits addr:street change | N2 | shop | bakery | :n-edge-NS | | N3 | shop | supermarket| :n-edge-WE | And the places - | osm | class | type | addr_place | housenr | geometry | - | W1 | building | yes | nowhere | 3 | :w-building | + | osm | class | type | street | housenr | geometry | + | W1 | building | yes | nowhere | 3 | :w-building | And the places | osm | class | type | name | geometry | | W2 | highway | primary | bar | :w-WE | @@ -34,5 +34,3 @@ Scenario: POI inside building inherits addr:street change | N1 | W3 | 3 | | N2 | W3 | 3 | | N3 | W3 | 3 | - - diff --git a/test/bdd/steps/db_ops.py b/test/bdd/steps/db_ops.py index 9604fd10..377c977d 100644 --- a/test/bdd/steps/db_ops.py +++ b/test/bdd/steps/db_ops.py @@ -470,12 +470,22 @@ def check_search_name_contents(context, exclude): for res in cur: for h in row.headings: if h in ('name_vector', 'nameaddress_vector'): - terms = [x.strip().replace('#', ' ') for x in row[h].split(',')] + terms = [x.strip() for x in row[h].split(',') if not x.strip().startswith('#')] + words = [x.strip()[1:] for x in row[h].split(',') if x.strip().startswith('#')] subcur = context.db.cursor() - subcur.execute("""SELECT word_id, word_token - FROM word, (SELECT unnest(%s) as term) t - WHERE word_token = make_standard_name(t.term)""", - (terms,)) + subcur.execute(""" SELECT word_id, word_token + FROM word, (SELECT unnest(%s::TEXT[]) as term) t + WHERE word_token = make_standard_name(t.term) + and class is null and country_code is null + and operator is null + UNION + SELECT word_id, word_token + FROM word, (SELECT unnest(%s::TEXT[]) as term) t + WHERE word_token = ' ' || make_standard_name(t.term) + and class is null and country_code is null + and operator is null + """, + (terms, words)) if not exclude: ok_(subcur.rowcount >= len(terms), "No word entry found for " + row[h])