From: Sarah Hoffmann Date: Thu, 13 Aug 2020 07:13:41 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~221 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/d3425c4fa916369116dec8a3f20d73b7ee9bd512?hp=1cce464f866bb56b9427b29a2230321a695394a8 Merge remote-tracking branch 'upstream/master' --- diff --git a/docs/develop/Ranking.md b/docs/develop/Ranking.md index 776de8f5..278039db 100644 --- a/docs/develop/Ranking.md +++ b/docs/develop/Ranking.md @@ -14,17 +14,59 @@ search rank will be appear higher in the result list. Search ranks are not so important these days because many well-known places use the Wikipedia importance ranking instead. +The following table gives an overview of the kind of features that Nominatim +expects for each rank: + +rank | typical place types | extent +-------|---------------------------------|------- +1-3 | oceans, continents | - +4 | countries | - +5-9 | states, regions, provinces | - +10-12 | counties | - +13-16 | cities, municipalities, islands | 7.5 km +17-18 | towns, boroughs | 4 km +19 | villages, suburbs | 2 km +20 | hamlets, farms, neighbourhoods | 1 km +21-25 | isolated dwellings, city blocks | 500 m + +The extent column describes how far a feature is assumed to reach when it +is mapped only as a point. Larger features like countries and states are usually +available with their exact area in the OpenStreetMap data. That is why no extent +is given. + ## Address rank The address rank describes where a place shows up in an address hierarchy. Usually only administrative boundaries and place nodes and areas are -eligible to be part of an address. All other objects have an address rank -of 0. +eligible to be part of an address. Places that should not appear in the +address must have an address rank of 0. + +The following table gives an overview how ranks are mapped to address parts: + + rank | address part +-------------|------------- + 1-3 | _unused_ + 4 | country + 5-9 | state + 10-12 | county + 13-16 | city + 17-21 | suburb + 22-25 | neighbourhood + 26-27 | street + 28-30 | POI/house number + +The country rank 4 usually doesn't show up in the address parts of an object. +The country is determined indirectly from the country code. + +Ranks 5-25 can be assigned more or less freely. They make up the major part +of the address. + +The street ranks 26 and 27 are handled slightly differently. Only one object +from these ranks shows up in an address. -Note that the search rank of a place plays a role in the address computation -as well. When collecting the places that should make up the address parts -then only places are taken into account that have a lower address rank than -the search rank of the base object. +For POI level objects like shops, buildings or house numbers always use rank 30. +Ranks 28 is reserved for house number interpolations. 29 is for internal use +only. ## Rank configuration @@ -84,7 +126,7 @@ Then the rank is used when no more specific value is found for the given key. Countries and key/value combination may appear in multiple definitions. Just -make sure that each combination of counrty/key/value appears only once per +make sure that each combination of country/key/value appears only once per file. Otherwise the import will fail with a UNIQUE INDEX constraint violation on import. diff --git a/docs/extra.css b/docs/extra.css index a8c3a1c8..136c59a6 100644 --- a/docs/extra.css +++ b/docs/extra.css @@ -1,3 +1,15 @@ .toctree-l3 { display: none!important } + +table { + margin-bottom: 12pt +} + +th, td { + padding: 1pt 12pt; +} + +th { + background-color: #eee; +} diff --git a/sql/functions/placex_triggers.sql b/sql/functions/placex_triggers.sql index 7542edfd..f9d5b60f 100644 --- a/sql/functions/placex_triggers.sql +++ b/sql/functions/placex_triggers.sql @@ -31,7 +31,6 @@ BEGIN LOOP FOR i IN 1..array_upper(location.members, 1) BY 2 LOOP IF location.members[i+1] = 'street' THEN - --DEBUG: RAISE WARNING 'node in relation %',relation; FOR parent IN SELECT place_id from placex WHERE osm_type = 'W' and osm_id = substring(location.members[i],2)::bigint @@ -535,6 +534,7 @@ DECLARE centroid GEOMETRY; parent_address_level SMALLINT; + place_address_level SMALLINT; addr_street TEXT; addr_place TEXT; @@ -592,7 +592,11 @@ BEGIN IF NEW.class = 'boundary' and NEW.type = 'administrative' THEN parent_address_level := get_parent_address_level(NEW.geometry, NEW.admin_level); IF parent_address_level >= NEW.rank_address THEN - NEW.rank_address := parent_address_level + 2; + IF parent_address_level >= 24 THEN + NEW.rank_address := 25; + ELSE + NEW.rank_address := parent_address_level + 2; + END IF; END IF; ELSE parent_address_level := 3; @@ -707,7 +711,6 @@ BEGIN NEW.housenumber := location.address->'housenumber'; addr_street := location.address->'street'; addr_place := location.address->'place'; - --DEBUG: RAISE WARNING 'Found surrounding building % %', location.osm_type, location.osm_id; END LOOP; END IF; @@ -830,6 +833,18 @@ BEGIN THEN NEW.importance = linked_importance; END IF; + ELSE + -- No linked place? As a last resort check if the boundary is tagged with + -- a place type and adapt the rank address. + IF NEW.rank_address > 0 and NEW.extratags ? 'place' THEN + SELECT address_rank INTO place_address_level + FROM compute_place_rank(NEW.country_code, 'A', 'place', + NEW.extratags->'place', 0::SMALLINT, False, null); + IF place_address_level > parent_address_level and + place_address_level < 26 THEN + NEW.rank_address := place_address_level; + END IF; + END IF; END IF; -- Initialise the name vector using our name @@ -846,7 +861,9 @@ BEGIN END IF; SELECT * FROM insert_addresslines(NEW.place_id, NEW.partition, - NEW.rank_search, NEW.address, + CASE WHEN NEW.rank_address = 0 + THEN NEW.rank_search ELSE NEW.rank_address END, + NEW.address, CASE WHEN NEW.rank_search >= 26 AND NEW.rank_search < 30 THEN NEW.geometry ELSE NEW.centroid END) diff --git a/sql/functions/utils.sql b/sql/functions/utils.sql index 33ae340a..a8c4638c 100644 --- a/sql/functions/utils.sql +++ b/sql/functions/utils.sql @@ -237,7 +237,7 @@ BEGIN IF word_ids is not null THEN parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, word_ids); IF parent_place_id is not null THEN - --DEBUG: RAISE WARNING 'Get parent form addr:street: %', parent.place_id; + --DEBUG: RAISE WARNING 'Get parent form addr:street: %', parent_place_id; RETURN parent_place_id; END IF; END IF; @@ -249,7 +249,7 @@ BEGIN IF word_ids is not null THEN parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, word_ids); IF parent_place_id is not null THEN - --DEBUG: RAISE WARNING 'Get parent form addr:place: %', parent.place_id; + --DEBUG: RAISE WARNING 'Get parent form addr:place: %', parent_place_id; RETURN parent_place_id; END IF; END IF; @@ -322,7 +322,7 @@ BEGIN ELSEIF ST_GeometryType(geometry) = 'ST_Point' THEN radius := near_feature_rank_distance(rank_search); - --DEBUG: RAISE WARNING 'adding % diameter %', place_id, diameter; + --DEBUG: RAISE WARNING 'adding % radius %', place_id, radius; -- Create a bounding box with an extent computed from the radius (in meters). secgeo := ST_Envelope(ST_Collect( diff --git a/test/bdd/db/import/placex.feature b/test/bdd/db/import/placex.feature index 531483b3..594f3538 100644 --- a/test/bdd/db/import/placex.feature +++ b/test/bdd/db/import/placex.feature @@ -125,73 +125,6 @@ Feature: Import into placex | N8 | ca | 25 | 0 | | N9 | ca | 25 | 0 | - Scenario: search and address ranks for places are correctly assigned - Given the named places - | osm | class | type | - | N1 | foo | bar | - | N11 | place | Continent | - | N12 | place | continent | - | N13 | place | sea | - | N14 | place | country | - | N15 | place | state | - | N16 | place | region | - | N17 | place | county | - | N18 | place | city | - | N19 | place | island | - | N20 | place | town | - | N21 | place | village | - | N22 | place | hamlet | - | N23 | place | municipality | - | N24 | place | district | - | N26 | place | borough | - | N27 | place | suburb | - | N28 | place | croft | - | N29 | place | subdivision | - | N30 | place | isolated_dwelling | - | N31 | place | farm | - | N32 | place | locality | - | N33 | place | islet | - | N34 | place | mountain_pass | - | N35 | place | neighbourhood | - | N36 | place | house | - | N37 | place | building | - | N38 | place | houses | - And the named places - | osm | class | type | extra+capital | - | N101 | place | city | yes | - When importing - Then placex contains - | object | rank_search | rank_address | - | N1 | 30 | 30 | - | N11 | 30 | 30 | - | N12 | 2 | 0 | - | N13 | 2 | 0 | - | N14 | 4 | 0 | - | N15 | 8 | 0 | - | N16 | 18 | 0 | - | N17 | 12 | 12 | - | N18 | 16 | 16 | - | N19 | 17 | 0 | - | N20 | 18 | 16 | - | N21 | 19 | 16 | - | N22 | 20 | 20 | - | N23 | 14 | 14 | - | N24 | 19 | 16 | - | N26 | 18 | 18 | - | N27 | 19 | 20 | - | N28 | 20 | 20 | - | N29 | 20 | 20 | - | N30 | 22 | 20 | - | N31 | 20 | 0 | - | N32 | 20 | 0 | - | N33 | 20 | 0 | - | N34 | 20 | 0 | - | N101 | 15 | 16 | - | N35 | 20 | 22 | - | N36 | 30 | 30 | - | N37 | 30 | 30 | - | N38 | 28 | 0 | - Scenario: search and address ranks for boundaries are correctly assigned Given the named places | osm | class | type | diff --git a/test/bdd/db/import/rank_computation.feature b/test/bdd/db/import/rank_computation.feature new file mode 100644 index 00000000..38d15e96 --- /dev/null +++ b/test/bdd/db/import/rank_computation.feature @@ -0,0 +1,115 @@ +@DB +Feature: Rank assignment + Tests for assignment of search and address ranks. + + Scenario: Ranks for place nodes are assinged according to thier type + Given the named places + | osm | class | type | + | N1 | foo | bar | + | N11 | place | Continent | + | N12 | place | continent | + | N13 | place | sea | + | N14 | place | country | + | N15 | place | state | + | N16 | place | region | + | N17 | place | county | + | N18 | place | city | + | N19 | place | island | + | N36 | place | house | + | N38 | place | houses | + And the named places + | osm | class | type | extra+capital | + | N101 | place | city | yes | + When importing + Then placex contains + | object | rank_search | rank_address | + | N1 | 30 | 30 | + | N11 | 30 | 30 | + | N12 | 2 | 0 | + | N13 | 2 | 0 | + | N14 | 4 | 0 | + | N15 | 8 | 0 | + | N16 | 18 | 0 | + | N17 | 12 | 12 | + | N18 | 16 | 16 | + | N19 | 17 | 0 | + | N101 | 15 | 16 | + | N36 | 30 | 30 | + | N38 | 28 | 0 | + + Scenario: Ranks for boundaries are assigned according to admin level + Given the named places + | osm | class | type | admin | geometry | + | R20 | boundary | administrative | 2 | (1 1, 2 2, 1 2, 1 1) | + | R21 | boundary | administrative | 32 | (3 3, 4 4, 3 4, 3 3) | + | R22 | boundary | administrative | 6 | (0 0, 1 0, 0 1, 0 0) | + | R23 | boundary | administrative | 10 | (0 0, 1 1, 1 0, 0 0) | + When importing + Then placex contains + | object | rank_search | rank_address | + | R20 | 4 | 4 | + | R21 | 30 | 30 | + | R22 | 12 | 12 | + | R23 | 20 | 20 | + + Scenario: Ranks for boundaries with place assignment go with place address ranks if available + Given the named places + | osm | class | type | admin | extra+place | geometry | + | R20 | boundary | administrative | 3 | state | (1 1, 2 2, 1 2, 1 1) | + | R21 | boundary | administrative | 32 | suburb | (3 3, 4 4, 3 4, 3 3) | + | R22 | boundary | administrative | 6 | town | (0 0, 1 0, 0 1, 0 0) | + | R23 | boundary | administrative | 10 | village | (0 0, 1 1, 1 0, 0 0) | + When importing + Then placex contains + | object | rank_search | rank_address | + | R20 | 6 | 6 | + | R21 | 30 | 20 | + | R22 | 12 | 16 | + | R23 | 20 | 16 | + + Scenario: Place address ranks cannot overtake a parent address rank + Given the named places + | osm | class | type | admin | extra+place | geometry | + | R20 | boundary | administrative | 8 | town | (0 0, 0 2, 2 2, 2 0, 0 0) | + | R21 | boundary | administrative | 9 | municipality | (0 0, 0 1, 1 1, 1 0, 0 0) | + | R22 | boundary | administrative | 9 | suburb | (0 0, 0 1, 1 1, 1 0, 0 0) | + When importing + Then place_addressline contains + | object | address | cached_rank_address | + | R21 | R20 | 16 | + | R22 | R20 | 16 | + Then placex contains + | object | rank_search | rank_address | + | R20 | 16 | 16 | + | R21 | 18 | 18 | + | R22 | 18 | 20 | + + Scenario: Admin levels cannot overtake each other due to place address ranks + Given the named places + | osm | class | type | admin | extra+place | geometry | + | R20 | boundary | administrative | 6 | town | (0 0, 0 2, 2 2, 2 0, 0 0) | + | R21 | boundary | administrative | 8 | | (0 0, 0 1, 1 1, 1 0, 0 0) | + | R22 | boundary | administrative | 8 | suburb | (0 0, 0 1, 1 1, 1 0, 0 0) | + When importing + Then placex contains + | object | rank_search | rank_address | + | R20 | 12 | 16 | + | R21 | 16 | 18 | + | R22 | 16 | 20 | + Then place_addressline contains + | object | address | cached_rank_address | + | R21 | R20 | 16 | + | R22 | R20 | 16 | + + Scenario: Admin levels must not be larger than 25 + Given the named places + | osm | class | type | admin | extra+place | geometry | + | R20 | boundary | administrative | 6 | neighbourhood | (0 0, 0 2, 2 2, 2 0, 0 0) | + | R21 | boundary | administrative | 7 | | (0 0, 0 1, 1 1, 1 0, 0 0) | + | R22 | boundary | administrative | 8 | | (0 0, 0 0.5, 0.5 0.5, 0.5 0, 0 0) | + When importing + Then placex contains + | object | rank_search | rank_address | + | R20 | 12 | 22 | + | R21 | 14 | 24 | + | R22 | 16 | 25 | diff --git a/utils/setup.php b/utils/setup.php index 23b7b3a6..7f476d5c 100644 --- a/utils/setup.php +++ b/utils/setup.php @@ -38,13 +38,12 @@ $aCMDOptions array('disable-token-precalc', '', 0, 1, 0, 0, 'bool', 'Disable name precalculation (EXPERT)'), array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'), array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'), - array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'), array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'), array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'), array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'), array('create-country-names', '', 0, 1, 0, 0, 'bool', 'Create default list of searchable country names'), array('drop', '', 0, 1, 0, 0, 'bool', 'Drop tables needed for updates, making the database readonly (EXPERIMENTAL)'), - array('setup-website', '', 0, 1, 0, 0, 'bool', 'Used to compile environment variables for the website (EXPERIMENTAL)'), + array('setup-website', '', 0, 1, 0, 0, 'bool', 'Used to compile environment variables for the website'), ); // $aCMDOptions passed to getCmdOpt by reference @@ -60,12 +59,6 @@ if ($aCMDResult['import-data'] || $aCMDResult['all']) { checkInFile($aCMDResult['osm-file']); } -// osmosis init is no longer supported -if ($aCMDResult['osmosis-init']) { - $bDidSomething = true; - echo "Command 'osmosis-init' no longer available, please use utils/update.php --init-updates.\n"; -} - // ****************************************************** // instantiate Setup class $oSetup = new SetupFunctions($aCMDResult); diff --git a/utils/update.php b/utils/update.php index 30c4ae46..db280a62 100644 --- a/utils/update.php +++ b/utils/update.php @@ -39,11 +39,9 @@ $aCMDOptions array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'), array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'), - array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'), array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'), array('update-address-levels', '', 0, 1, 0, 0, 'bool', 'Reimport address level configuration (EXPERT)'), - array('recompute-importance', '', 0, 1, 0, 0, 'bool', 'Recompute place importances'), - array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolete)'), + array('recompute-importance', '', 0, 1, 0, 0, 'bool', 'Recompute place importances') ); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true); @@ -273,78 +271,6 @@ if ($bHaveDiff) { } } -if ($aResult['deduplicate']) { - $oDB = new Nominatim\DB(); - $oDB->connect(); - - if ($oDB->getPostgresVersion() < 9.3) { - fail('ERROR: deduplicate is only currently supported in postgresql 9.3'); - } - - $sSQL = 'select partition from country_name order by country_code'; - $aPartitions = $oDB->getCol($sSQL); - $aPartitions[] = 0; - - // we don't care about empty search_name_* partitions, they can't contain mentions of duplicates - foreach ($aPartitions as $i => $sPartition) { - $sSQL = 'select count(*) from search_name_'.$sPartition; - $nEntries = $oDB->getOne($sSQL); - if ($nEntries == 0) { - unset($aPartitions[$i]); - } - } - - $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' '"; - $sSQL .= ' and class is null and type is null and country_code is null'; - $sSQL .= ' group by word_token having count(*) > 1 order by word_token'; - $aDuplicateTokens = $oDB->getAll($sSQL); - foreach ($aDuplicateTokens as $aToken) { - if (trim($aToken['word_token']) == '' || trim($aToken['word_token']) == '-') continue; - echo 'Deduping '.$aToken['word_token']."\n"; - $sSQL = 'select word_id,'; - $sSQL .= ' (select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num'; - $sSQL .= " from word where word_token = '".$aToken['word_token']; - $sSQL .= "' and class is null and type is null and country_code is null order by num desc"; - $aTokenSet = $oDB->getAll($sSQL); - - $aKeep = array_shift($aTokenSet); - $iKeepID = $aKeep['word_id']; - - foreach ($aTokenSet as $aRemove) { - $sSQL = 'update search_name set'; - $sSQL .= ' name_vector = array_replace(name_vector,'.$aRemove['word_id'].','.$iKeepID.'),'; - $sSQL .= ' nameaddress_vector = array_replace(nameaddress_vector,'.$aRemove['word_id'].','.$iKeepID.')'; - $sSQL .= ' where name_vector @> ARRAY['.$aRemove['word_id'].']'; - $oDB->exec($sSQL); - - $sSQL = 'update search_name set'; - $sSQL .= ' nameaddress_vector = array_replace(nameaddress_vector,'.$aRemove['word_id'].','.$iKeepID.')'; - $sSQL .= ' where nameaddress_vector @> ARRAY['.$aRemove['word_id'].']'; - $oDB->exec($sSQL); - - $sSQL = 'update location_area_country set'; - $sSQL .= ' keywords = array_replace(keywords,'.$aRemove['word_id'].','.$iKeepID.')'; - $sSQL .= ' where keywords @> ARRAY['.$aRemove['word_id'].']'; - $oDB->exec($sSQL); - - foreach ($aPartitions as $sPartition) { - $sSQL = 'update search_name_'.$sPartition.' set'; - $sSQL .= ' name_vector = array_replace(name_vector,'.$aRemove['word_id'].','.$iKeepID.')'; - $sSQL .= ' where name_vector @> ARRAY['.$aRemove['word_id'].']'; - $oDB->exec($sSQL); - - $sSQL = 'update location_area_country set'; - $sSQL .= ' keywords = array_replace(keywords,'.$aRemove['word_id'].','.$iKeepID.')'; - $sSQL .= ' where keywords @> ARRAY['.$aRemove['word_id'].']'; - $oDB->exec($sSQL); - } - - $sSQL = 'delete from word where word_id = '.$aRemove['word_id']; - $oDB->exec($sSQL); - } - } -} - if ($aResult['recompute-word-counts']) { info('Recompute frequency of full-word search terms'); $sTemplate = file_get_contents(CONST_BasePath.'/sql/words_from_search_name.sql');