From: Sarah Hoffmann Date: Wed, 25 Nov 2020 15:58:40 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~197 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/dc3b1abfe2d06c790317bb66e51abfd9334e08f8?hp=e871fabd5ffccb9b6db16a813c3addc89683d4e6 Merge remote-tracking branch 'upstream/master' --- diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 79830673..f1da4616 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -97,3 +97,9 @@ jobs: php ./utils/update.php --init-updates php ./utils/update.php --import-osmosis working-directory: build + + - name: Run reverse-only import + run : | + dropdb nominatim + php ./utils/setup.php --osm-file ../monaco-latest.osm.pbf --reverse-only --all + working-directory: build diff --git a/docs/api/Search.md b/docs/api/Search.md index c18655dc..9f73594d 100644 --- a/docs/api/Search.md +++ b/docs/api/Search.md @@ -116,7 +116,7 @@ Limit the number of returned results. (Default: 10, Maximum: 50) * `viewbox=,,,` The preferred area to find search results. Any two corner points of the box -are accepted in any order as long as they span a real box. `x` is longitude, +are accepted as long as they span a real box. `x` is longitude, `y` is latitude. diff --git a/lib/Geocode.php b/lib/Geocode.php index cce85f2b..6589f754 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -650,6 +650,8 @@ class Geocode $this->oNormalizer ); + $oCtx->setFullNameWords($oValidTokens->getFullWordIDs()); + // Try more interpretations for Tokens that could not be matched. foreach ($aTokens as $sToken) { if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) { diff --git a/lib/SearchContext.php b/lib/SearchContext.php index 3d399bdc..c2898d27 100644 --- a/lib/SearchContext.php +++ b/lib/SearchContext.php @@ -32,7 +32,18 @@ class SearchContext public $sqlCountryList = ''; /// List of place IDs to exclude (as SQL). private $sqlExcludeList = ''; + /// Subset of word ids of full words in the query. + private $aFullNameWords = array(); + public function setFullNameWords($aWordList) + { + $this->aFullNameWords = $aWordList; + } + + public function getFullNameTerms() + { + return $this->aFullNameWords; + } /** * Check if a reference point is defined. diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index 52d15976..1e1955c2 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -21,8 +21,6 @@ class SearchDescription private $bRareName = false; /// List of word ids making up the address of the object. private $aAddress = array(); - /// Subset of word ids of full words making up the address. - private $aFullNameAddress = array(); /// List of word ids that appear in the name but should be ignored. private $aNameNonSearch = array(); /// List of word ids that appear in the address but should be ignored. @@ -219,6 +217,9 @@ class SearchDescription ) { $oSearch = clone $this; $oSearch->iSearchRank++; + if (strlen($oSearchTerm->sPostcode) < 4) { + $oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode); + } $oSearch->sPostcode = $oSearchTerm->sPostcode; $aNewSearches[] = $oSearch; } @@ -247,6 +248,18 @@ class SearchDescription $oSearch->iSearchRank++; } $aNewSearches[] = $oSearch; + // Housenumbers may appear in the name when the place has its own + // address terms. + if ($oSearchTerm->iId !== null + && ($this->iNamePhrase >= 0 || empty($this->aName)) + && empty($this->aAddress) + ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->aAddress = $this->aName; + $oSearch->aName = array($oSearchTerm->iId => $oSearchTerm->iId); + $aNewSearches[] = $oSearch; + } } } elseif ($sPhraseType == '' && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm') @@ -283,11 +296,9 @@ class SearchDescription if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) { if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) { $oSearch = clone $this; - $oSearch->iSearchRank += 5; + $oSearch->iSearchRank += 3 * $oSearchTerm->iTermCount; $oSearch->aAddress[$iWordID] = $iWordID; $aNewSearches[] = $oSearch; - } else { - $this->aFullNameAddress[$iWordID] = $iWordID; } } else { $oSearch = clone $this; @@ -333,16 +344,19 @@ class SearchDescription ) { if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { $oSearch = clone $this; - $oSearch->iSearchRank += 2; + $oSearch->iSearchRank += $oSearchTerm->iTermCount; + if (empty($this->aName)) { + $oSearch->iSearchRank++; + } + if (preg_match('#^[0-9]+$#', $sToken)) { + $oSearch->iSearchRank++; + } $oSearch->aAddress[$iWordID] = $iWordID; $aNewSearches[] = $oSearch; } else { $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aAddressNonSearch[$iWordID] = $iWordID; - if (preg_match('#^[0-9]+$#', $sToken)) { - $oSearch->iSearchRank += 2; - } if (!empty($aFullTokens)) { $oSearch->iSearchRank++; } @@ -352,7 +366,7 @@ class SearchDescription foreach ($aFullTokens as $oSearchTermToken) { if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) { $oSearch = clone $this; - $oSearch->iSearchRank++; + $oSearch->iSearchRank += 3; $oSearch->aAddress[$oSearchTermToken->iId] = $oSearchTermToken->iId; $aNewSearches[] = $oSearch; @@ -691,10 +705,11 @@ class SearchDescription $sImportanceSQL .= $this->oContext->viewboxImportanceSQL('centroid'); $aOrder[] = "$sImportanceSQL DESC"; - if (!empty($this->aFullNameAddress)) { + $aFullNameAddress = $this->oContext->getFullNameTerms(); + if (!empty($aFullNameAddress)) { $sExactMatchSQL = ' ( '; $sExactMatchSQL .= ' SELECT count(*) FROM ( '; - $sExactMatchSQL .= ' SELECT unnest('.$oDB->getArraySQL($this->aFullNameAddress).')'; + $sExactMatchSQL .= ' SELECT unnest('.$oDB->getArraySQL($aFullNameAddress).')'; $sExactMatchSQL .= ' INTERSECT '; $sExactMatchSQL .= ' SELECT unnest(nameaddress_vector)'; $sExactMatchSQL .= ' ) s'; diff --git a/lib/TokenList.php b/lib/TokenList.php index fce5f940..1b6a1dcf 100644 --- a/lib/TokenList.php +++ b/lib/TokenList.php @@ -80,6 +80,21 @@ class TokenList return isset($this->aTokens[$sWord]) ? $this->aTokens[$sWord] : array(); } + public function getFullWordIDs() + { + $ids = array(); + + foreach ($this->aTokens as $aTokenList) { + foreach ($aTokenList as $oToken) { + if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) { + $ids[$oToken->iId] = $oToken->iId; + } + } + } + + return $ids; + } + /** * Add token information from the word table in the database. * @@ -151,7 +166,8 @@ class TokenList $oToken = new Token\Word( $iId, $aWord['word_token'][0] != ' ', - (int) $aWord['count'] + (int) $aWord['count'], + substr_count($aWord['word_token'], ' ') ); } diff --git a/lib/TokenWord.php b/lib/TokenWord.php index 54622cbc..fc28535d 100644 --- a/lib/TokenWord.php +++ b/lib/TokenWord.php @@ -13,12 +13,15 @@ class Word public $bPartial; /// Number of appearances in the database. public $iSearchNameCount; + /// Number of terms in the word. + public $iTermCount; - public function __construct($iId, $bPartial, $iSearchNameCount) + public function __construct($iId, $bPartial, $iSearchNameCount, $iTermCount) { $this->iId = $iId; $this->bPartial = $bPartial; $this->iSearchNameCount = $iSearchNameCount; + $this->iTermCount = $iTermCount; } public function debugInfo() diff --git a/settings/address-levels.json b/settings/address-levels.json index d41d6536..26ee6e70 100644 --- a/settings/address-levels.json +++ b/settings/address-levels.json @@ -164,6 +164,16 @@ "administrative8" : 14 } } +}, +{ "countries" : [ "nl" ], + "tags" : { + "boundary" : { + "administrative7" : [13, 0], + "administrative8" : 14, + "administrative9" : [15, 0], + "administrative10" : 16 + } + } } ] diff --git a/sql/functions/address_lookup.sql b/sql/functions/address_lookup.sql index de6fe1d1..2426a698 100644 --- a/sql/functions/address_lookup.sql +++ b/sql/functions/address_lookup.sql @@ -289,7 +289,7 @@ BEGIN IF search_unlisted_place is not null THEN RETURN NEXT ROW(null, null, null, hstore('name', search_unlisted_place), - 'place', 'locality', null, null, true, true, 26, 0)::addressline; + 'place', 'locality', null, null, true, true, 25, 0)::addressline; END IF; IF searchpostcode IS NOT NULL THEN diff --git a/sql/functions/normalization.sql b/sql/functions/normalization.sql index 4aea0e9b..8bb4915b 100644 --- a/sql/functions/normalization.sql +++ b/sql/functions/normalization.sql @@ -490,53 +490,34 @@ BEGIN END LOOP; END IF; - - -- If the POI is named, simply mix in all address terms and be done. - IF array_length(initial_name_vector, 1) is not NULL THEN - -- Cheating here by not recomputing all terms but simply using the ones - -- from the parent object. - name_vector := initial_name_vector; - nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector); - nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); - - IF not address ? 'street' and address ? 'place' THEN - -- make sure addr:place terms are always searchable - nameaddress_vector := array_merge(nameaddress_vector, - addr_ids_from_name(address->'place')); - END IF; - - RETURN; - END IF; - - ----- unnamed POIS - - IF (array_length(nameaddress_vector, 1) is null - and (address ? 'street'or not address ? 'place')) - or housenumber is null - THEN - RETURN; - END IF; + name_vector := initial_name_vector; -- Check if the parent covers all address terms. -- If not, create a search name entry with the house number as the name. -- This is unusual for the search_name table but prevents that the place -- is returned when we only search for the street/place. - IF not nameaddress_vector <@ parent_address_vector THEN - name_vector := ARRAY[getorcreate_name_id(housenumber)]; + IF housenumber is not null and not nameaddress_vector <@ parent_address_vector THEN + name_vector := array_merge(name_vector, + ARRAY[getorcreate_housenumber_id(make_standard_name(housenumber))]); END IF; IF not address ? 'street' and address ? 'place' THEN addr_place_ids := addr_ids_from_name(address->'place'); IF not addr_place_ids <@ parent_name_vector THEN - -- addr:place tag exists without a corresponding place. Mix in addr:place - -- in the address. - name_vector := ARRAY[getorcreate_name_id(housenumber)]; + -- make sure addr:place terms are always searchable nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids); + -- If there is a housenumber, also add the place name as a name, + -- so we can search it by the usual housenumber+place algorithms. + IF housenumber is not null THEN + name_vector := array_merge(name_vector, + ARRAY[getorcreate_name_id(make_standard_name(address->'place'))]); + END IF; END IF; END IF; - -- Merge the parent name and address. + -- Cheating here by not recomputing all terms but simply using the ones + -- from the parent object. nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector); nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); diff --git a/sql/functions/placex_triggers.sql b/sql/functions/placex_triggers.sql index 7d6352b4..6fd9e225 100644 --- a/sql/functions/placex_triggers.sql +++ b/sql/functions/placex_triggers.sql @@ -814,8 +814,8 @@ BEGIN END IF; - IF array_length(name_vector, 1) is not NULL - OR inherited_address is not NULL OR NEW.address is not NULL + IF not %REVERSE-ONLY% AND (array_length(name_vector, 1) is not NULL + OR inherited_address is not NULL OR NEW.address is not NULL) THEN SELECT * INTO name_vector, nameaddress_vector FROM create_poi_search_terms(NEW.place_id, @@ -824,7 +824,7 @@ BEGIN NEW.country_code, NEW.housenumber, name_vector, NEW.centroid); - IF not %REVERSE-ONLY% AND array_length(name_vector, 1) is not NULL THEN + IF array_length(name_vector, 1) is not NULL THEN INSERT INTO search_name (place_id, search_rank, address_rank, importance, country_code, name_vector, nameaddress_vector, centroid) diff --git a/sql/functions/ranking.sql b/sql/functions/ranking.sql index 51dcd0d0..1f236211 100644 --- a/sql/functions/ranking.sql +++ b/sql/functions/ranking.sql @@ -194,10 +194,10 @@ BEGIN FROM get_postcode_rank(country, postcode); ELSEIF extended_type = 'N' AND place_class = 'highway' THEN search_rank = 30; - address_rank = 0; + address_rank = 30; ELSEIF place_class = 'landuse' AND extended_type != 'A' THEN search_rank = 30; - address_rank = 0; + address_rank = 30; ELSE IF place_class = 'boundary' and place_type = 'administrative' THEN classtype = place_type || admin_level::TEXT; @@ -211,11 +211,8 @@ BEGIN AND l.class = place_class AND (l.type = classtype or l.type is NULL) ORDER BY l.country_code, l.class, l.type LIMIT 1; - IF search_rank is NULL THEN + IF search_rank is NULL OR address_rank is NULL THEN search_rank := 30; - END IF; - - IF address_rank is NULL THEN address_rank := 30; END IF; diff --git a/test/bdd/db/import/placex.feature b/test/bdd/db/import/placex.feature index a9e81c44..db36ab58 100644 --- a/test/bdd/db/import/placex.feature +++ b/test/bdd/db/import/placex.feature @@ -170,7 +170,7 @@ Feature: Import into placex When importing Then placex contains | object | rank_search | rank_address | - | N1 | 30 | 0 | + | N1 | 30 | 30 | | W1 | 26 | 26 | | W2 | 26 | 26 | | W3 | 26 | 26 | @@ -191,8 +191,8 @@ Feature: Import into placex When importing Then placex contains | object | rank_search | rank_address | - | N2 | 30 | 0 | - | W2 | 30 | 0 | + | N2 | 30 | 30 | + | W2 | 30 | 30 | | W4 | 22 | 22 | | R2 | 22 | 22 | | R3 | 22 | 0 | diff --git a/test/bdd/db/import/search_name.feature b/test/bdd/db/import/search_name.feature index 30c430a6..0e922e1d 100644 --- a/test/bdd/db/import/search_name.feature +++ b/test/bdd/db/import/search_name.feature @@ -23,12 +23,20 @@ Feature: Creation of search terms | W1 | highway | residential | Rose Street | :w-north | When importing Then search_name contains - | object | name_vector | nameaddress_vector | - | N1 | #23 | Rose Street, Walltown | + | object | nameaddress_vector | + | N1 | Rose, Street, Walltown | When searching for "23 Rose Street, Walltown" Then results contain | osm_type | osm_id | name | | N | 1 | 23, Rose Street | + When searching for "Walltown, Rose Street 23" + Then results contain + | osm_type | osm_id | name | + | N | 1 | 23, Rose Street | + When searching for "Rose Street 23, Walltown" + Then results contain + | osm_type | osm_id | name | + | N | 1 | 23, Rose Street | Scenario: Searching for unknown addr: tags also works for multiple words Given the scene roads-with-pois @@ -40,12 +48,20 @@ Feature: Creation of search terms | W1 | highway | residential | Rose Street | :w-north | When importing Then search_name contains - | object | name_vector | nameaddress_vector | - | N1 | #23 | Rose Street, Little, Big, Town | + | object | nameaddress_vector | + | N1 | Rose Street, Little, Big, Town | When searching for "23 Rose Street, Little Big Town" Then results contain | osm_type | osm_id | name | | N | 1 | 23, Rose Street | + When searching for "Rose Street 23, Little Big Town" + Then results contain + | osm_type | osm_id | name | + | N | 1 | 23, Rose Street | + When searching for "Little big Town, Rose Street 23" + Then results contain + | osm_type | osm_id | name | + | N | 1 | 23, Rose Street | Scenario: Unnamed POI has no search entry when it has known addr: tags Given the scene roads-with-pois @@ -88,7 +104,7 @@ Feature: Creation of search terms | N1 | N2 | Then search_name contains | object | name_vector | nameaddress_vector | - | N1 | #23 | Walltown, Strange, Town | + | N1 | #Walltown | Strange, Town | When searching for "23 Rose Street" Then exactly 1 results are returned And results contain @@ -98,6 +114,70 @@ Feature: Creation of search terms Then results contain | osm_type | osm_id | name | | N | 1 | 23, Walltown, Strange Town | + When searching for "Walltown 23, Strange Town" + Then results contain + | osm_type | osm_id | name | + | N | 1 | 23, Walltown, Strange Town | + When searching for "Strange Town, Walltown 23" + Then results contain + | osm_type | osm_id | name | + | N | 1 | 23, Walltown, Strange Town | + + Scenario: Named POIs can be searched by housenumber when unknown addr:place is present + Given the scene roads-with-pois + And the places + | osm | class | type | name | housenr | addr+place | geometry | + | N1 | place | house | Blue house | 23 | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + | N2 | place | city | Strange Town | :p-N1 | + When importing + Then search_name contains + | object | name_vector | nameaddress_vector | + | N1 | #Walltown, #Blue house | Walltown, Strange, Town | + When searching for "23 Walltown, Strange Town" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Blue house, 23, Walltown, Strange Town | + When searching for "Walltown 23, Strange Town" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Blue house, 23, Walltown, Strange Town | + When searching for "Strange Town, Walltown 23" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Blue house, 23, Walltown, Strange Town | + When searching for "Strange Town, Walltown 23, Blue house" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Blue house, 23, Walltown, Strange Town | + When searching for "Strange Town, Walltown, Blue house" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Blue house, 23, Walltown, Strange Town | + + Scenario: Named POIs can be found when unknown multi-word addr:place is present + Given the scene roads-with-pois + And the places + | osm | class | type | name | housenr | addr+place | geometry | + | N1 | place | house | Blue house | 23 | Moon sun | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + | N2 | place | city | Strange Town | :p-N1 | + When importing + Then search_name contains + | object | name_vector | nameaddress_vector | + | N1 | #Moon sun, #Blue house | Moon, Sun, Strange, Town | + When searching for "23 Moon Sun, Strange Town" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Blue house, 23, Moon sun, Strange Town | + When searching for "Blue house, Moon Sun, Strange Town" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Blue house, 23, Moon sun, Strange Town | Scenario: Unnamed POIs doesn't inherit parent name when addr:place is present only in parent address Given the scene roads-with-pois @@ -111,7 +191,7 @@ Feature: Creation of search terms When importing Then search_name contains | object | name_vector | nameaddress_vector | - | N1 | #23 | Walltown | + | N1 | #Walltown | Strange, Town | When searching for "23 Rose Street, Walltown" Then exactly 1 result is returned And results contain @@ -157,22 +237,38 @@ Feature: Creation of search terms When searching for "23 Lily Street" Then exactly 0 results are returned - Scenario: Named POIs have unknown address tags added in the search_name table + Scenario: Named POIs get unknown address tags added in the search_name table Given the scene roads-with-pois And the places - | osm | class | type | name+name | addr+city | geometry | - | N1 | place | house | Green Moss | Walltown | :p-N1 | + | osm | class | type | name+name | housenr | addr+city | geometry | + | N1 | place | house | Green Moss | 26 | Walltown | :p-N1 | And the places | osm | class | type | name+name | geometry | | W1 | highway | residential | Rose Street | :w-north | When importing Then search_name contains | object | name_vector | nameaddress_vector | - | N1 | #Green Moss | Rose Street, Walltown | + | N1 | #Green Moss | Rose, Street, Walltown | When searching for "Green Moss, Rose Street, Walltown" Then results contain | osm_type | osm_id | name | - | N | 1 | Green Moss, Rose Street | + | N | 1 | Green Moss, 26, Rose Street | + When searching for "Green Moss, 26, Rose Street, Walltown" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Green Moss, 26, Rose Street | + When searching for "26, Rose Street, Walltown" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Green Moss, 26, Rose Street | + When searching for "Rose Street 26, Walltown" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Green Moss, 26, Rose Street | + When searching for "Walltown, Rose Street 26" + Then results contain + | osm_type | osm_id | name | + | N | 1 | Green Moss, 26, Rose Street | Scenario: Named POI doesn't inherit parent name when addr:place is present only in parent address Given the scene roads-with-pois diff --git a/test/bdd/steps/db_ops.py b/test/bdd/steps/db_ops.py index 377c977d..2b012c41 100644 --- a/test/bdd/steps/db_ops.py +++ b/test/bdd/steps/db_ops.py @@ -487,8 +487,8 @@ def check_search_name_contents(context, exclude): """, (terms, words)) if not exclude: - ok_(subcur.rowcount >= len(terms), - "No word entry found for " + row[h]) + ok_(subcur.rowcount >= len(terms) + len(words), + "No word entry found for " + row[h] + ". Entries found: " + str(subcur.rowcount)) for wid in subcur: if exclude: assert_not_in(wid[0], res[h], diff --git a/test/php/Nominatim/TokenListTest.php b/test/php/Nominatim/TokenListTest.php index 191a09dc..ca43aabb 100644 --- a/test/php/Nominatim/TokenListTest.php +++ b/test/php/Nominatim/TokenListTest.php @@ -121,6 +121,6 @@ class TokenTest extends \PHPUnit\Framework\TestCase $this->assertEquals(array(new Token\HouseNumber(999, '1051')), $TL->get('1051')); $this->assertEquals(array(new Token\Country(999, 'de')), $TL->get('alemagne')); $this->assertEquals(array(new Token\Postcode(999, '64286')), $TL->get('64286')); - $this->assertEquals(array(new Token\Word(999, true, 533)), $TL->get('darmstadt')); + $this->assertEquals(array(new Token\Word(999, true, 533, 0)), $TL->get('darmstadt')); } } diff --git a/utils/query.php b/utils/query.php index 956bb566..6068c7c0 100644 --- a/utils/query.php +++ b/utils/query.php @@ -58,6 +58,7 @@ if (!$oParams->hasSetAny($aSearchParams)) { $oGeocode = new Nominatim\Geocode($oDB); $oGeocode->setLanguagePreference($oParams->getPreferredLanguages(false)); +$oGeocode->setReverseInPlan(true); $oGeocode->loadParamArray($oParams); if ($oParams->getBool('search')) {