From e10d11c6c7a5a96f645b9155a55e56f874ed6042 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 23 Nov 2018 23:02:32 +0100 Subject: [PATCH] Make rank assignments configurable The initial search and address rank is saved in a table that is set up from a json configuration file. Ranks may be assigned on a country level according to class and type of the object. Special handling that depends on the geometry or OSM type is still hard-coded in placex insert. The new default config file mimicks the current assignment as close as possible. A couple of exceptions have been removed, most notably the exception for Irish townlands. --- data/address-levels.json | 86 ++++++++++++++ lib/setup/AddressLevelParser.php | 98 ++++++++++++++++ lib/setup/SetupClass.php | 5 + settings/defaults.php | 1 + sql/functions.sql | 180 +++++++++--------------------- test/bdd/db/import/placex.feature | 28 ++--- test/bdd/db/update/simple.feature | 4 +- 7 files changed, 256 insertions(+), 146 deletions(-) create mode 100644 data/address-levels.json create mode 100644 lib/setup/AddressLevelParser.php diff --git a/data/address-levels.json b/data/address-levels.json new file mode 100644 index 00000000..b06c67ea --- /dev/null +++ b/data/address-levels.json @@ -0,0 +1,86 @@ +[ +{ "tags" : { + "place" : { + "sea" : [2, 0], + "continent" : [2, 0], + "country" : [4, 4], + "state" : [8, 8], + "region" : [18, 0], + "county" : 12, + "city" : 16, + "island" : [17, 0], + "town" : [18, 16], + "village" : [19, 16], + "hamlet" : [19, 16], + "municipality" : [19, 16], + "district" : [19, 16], + "unincorporated_area" : [19, 16], + "borough" : [19, 16], + "suburb" : 20, + "croft" : 20, + "subdivision" : 20, + "isolated_dwelling" : 20, + "farm" : [20, 0], + "locality" : [20, 0], + "islet" : [20, 0], + "mountain_pass" : [20, 0], + "neighbourhood" : 22, + "houses" : [28, 0] + }, + "boundary" : { + "administrative2" : 4, + "administrative3" : 6, + "administrative4" : 8, + "administrative5" : 10, + "administrative6" : 12, + "administrative7" : 14, + "administrative8" : 16, + "administrative9" : 18, + "administrative10" : 20, + "administrative11" : 22, + "administrative12" : 24 + }, + "landuse" : { + "residential" : 22, + "farm" : 22, + "farmyard" : 22, + "industrial" : 22, + "commercial" : 22, + "allotments" : 22, + "retail" : 22, + "" : [22, 0] + }, + "leisure" : { + "park" : [24, 0] + }, + "natural" : { + "peak" : [18, 0], + "volcano" : [18, 0], + "mountain_range" : [18, 0], + "sea" : [4, 0], + "" : [22, 0] + }, + "waterway" : { + "" : [17, 0] + }, + "highway" : { + "" : 26, + "service" : 27, + "cycleway" : 27, + "path" : 27, + "footway" : 27, + "steps" : 27, + "bridleway" : 27, + "motorway_link" : 27, + "primary_link" : 27, + "trunk_link" : 27, + "secondary_link" : 27, + "tertiary_link" : 27 + }, + "mountain_pass" : { + "" : [20, 0] + } + } +} +] + diff --git a/lib/setup/AddressLevelParser.php b/lib/setup/AddressLevelParser.php new file mode 100644 index 00000000..5bc17d63 --- /dev/null +++ b/lib/setup/AddressLevelParser.php @@ -0,0 +1,98 @@ +aLevels = json_decode($sJson, true); + if (!$this->aLevels) { + switch (json_last_error()) { + case JSON_ERROR_NONE: + break; + case JSON_ERROR_DEPTH: + fail('JSON error - Maximum stack depth exceeded'); + break; + case JSON_ERROR_STATE_MISMATCH: + fail('JSON error - Underflow or the modes mismatch'); + break; + case JSON_ERROR_CTRL_CHAR: + fail('JSON error - Unexpected control character found'); + break; + case JSON_ERROR_SYNTAX: + fail('JSON error - Syntax error, malformed JSON'); + break; + case JSON_ERROR_UTF8: + fail('JSON error - Malformed UTF-8 characters, possibly incorrectly encoded'); + break; + default: + fail('JSON error - Unknown error'); + break; + } + } + } + + /** + * Dump the description into a database table. + * + * @param object $oDB Database conneciton to use. + * @param string $sTable Name of table to create. + * + * @return null + * + * A new table is created. Any previously existing table is dropped. + * The table has the following columns: + * country, class, type, rank_search, rank_address. + */ + public function createTable($oDB, $sTable) + { + chksql($oDB->query('DROP TABLE IF EXISTS '.$sTable)); + $sSql = 'CREATE TABLE '.$sTable; + $sSql .= '(country_code varchar(2), class TEXT, type TEXT,'; + $sSql .= ' rank_search SMALLINT, rank_address SMALLINT)'; + chksql($oDB->query($sSql)); + + $sSql = 'CREATE UNIQUE INDEX ON '.$sTable.'(country_code, class, type)'; + chksql($oDB->query($sSql)); + + $sSql = 'INSERT INTO '.$sTable.' VALUES '; + foreach ($this->aLevels as $aLevel) { + $aCountries = array(); + if (isset($aLevel['countries'])) { + foreach ($aLevel['countries'] as $sCountry) { + $aCountries[$sCountry] = getDBQuoted($sCountry); + } + } else { + $aCountries['NULL'] = 'NULL'; + } + foreach ($aLevel['tags'] as $sKey => $aValues) { + foreach ($aValues as $sValue => $mRanks) { + $aFields = array( + getDBQuoted($sKey), + $sValue ? getDBQuoted($sValue) : 'NULL' + ); + if (is_array($mRanks)) { + $aFields[] = (string) $mRanks[0]; + $aFields[] = (string) $mRanks[1]; + } else { + $aFields[] = (string) $mRanks; + $aFields[] = (string) $mRanks; + } + $sLine = ','.join(',', $aFields).'),'; + + foreach ($aCountries as $sCountries) { + $sSql .= '('.$sCountries.$sLine; + } + } + } + } + chksql($oDB->query(rtrim($sSql, ','))); + } +} diff --git a/lib/setup/SetupClass.php b/lib/setup/SetupClass.php index 5c6d69e4..0f96bc8b 100755 --- a/lib/setup/SetupClass.php +++ b/lib/setup/SetupClass.php @@ -2,6 +2,8 @@ namespace Nominatim\Setup; +require_once(CONST_BasePath.'/lib/setup/AddressLevelParser.php'); + class SetupFunctions { protected $iCacheMemory; @@ -272,6 +274,9 @@ class SetupFunctions if ($bReverseOnly) { $this->pgExec('DROP TABLE search_name'); } + + $oAlParser = new AddressLevelParser(CONST_Address_Level_Config); + $oAlParser->createTable($this->oDB, 'address_levels'); } public function createPartitionTables() diff --git a/settings/defaults.php b/settings/defaults.php index 8cdbcb5a..8a286b11 100644 --- a/settings/defaults.php +++ b/settings/defaults.php @@ -49,6 +49,7 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true); @define('CONST_Pyosmium_Binary', '@PYOSMIUM_PATH@'); @define('CONST_Tiger_Data_Path', CONST_ExtraDataPath.'/tiger'); @define('CONST_Wikipedia_Data_Path', CONST_ExtraDataPath); +@define('CONST_Address_Level_Config', CONST_ExtraDataPath.'/address-levels.json'); // osm2pgsql settings @define('CONST_Osm2pgsql_Flatnode_File', null); diff --git a/sql/functions.sql b/sql/functions.sql index f17976ad..8ce36c54 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -817,11 +817,12 @@ DECLARE i INTEGER; postcode TEXT; result BOOLEAN; + is_area BOOLEAN; country_code VARCHAR(2); default_language VARCHAR(10); diameter FLOAT; classtable TEXT; - line RECORD; + classtype TEXT; BEGIN --DEBUG: RAISE WARNING '% % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type; @@ -848,148 +849,71 @@ BEGIN IF NEW.osm_type = 'X' THEN -- E'X'ternal records should already be in the right format so do nothing ELSE - NEW.rank_search := 30; - NEW.rank_address := NEW.rank_search; + is_area := ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon'); - -- By doing in postgres we have the country available to us - currently only used for postcode - IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN + IF NEW.class in ('place','boundary') + AND NEW.type in ('postcode','postal_code') THEN - IF NEW.address IS NULL OR NOT NEW.address ? 'postcode' THEN - -- most likely just a part of a multipolygon postcode boundary, throw it away - RETURN NULL; - END IF; - - NEW.name := hstore('ref', NEW.address->'postcode'); + IF NEW.address IS NULL OR NOT NEW.address ? 'postcode' THEN + -- most likely just a part of a multipolygon postcode boundary, throw it away + RETURN NULL; + END IF; - SELECT * FROM get_postcode_rank(NEW.country_code, NEW.address->'postcode') - INTO NEW.rank_search, NEW.rank_address; + NEW.name := hstore('ref', NEW.address->'postcode'); - IF NOT ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon') THEN - NEW.rank_address := 0; - END IF; + SELECT * FROM get_postcode_rank(NEW.country_code, NEW.address->'postcode') + INTO NEW.rank_search, NEW.rank_address; - ELSEIF NEW.class = 'place' THEN - IF NEW.type in ('continent', 'sea') THEN - NEW.rank_search := 2; - NEW.rank_address := 0; - NEW.country_code := NULL; - ELSEIF NEW.type in ('country') THEN - NEW.rank_search := 4; - IF ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon') THEN - NEW.rank_address := NEW.rank_search; - ELSE - NEW.rank_address := 0; - END IF; - ELSEIF NEW.type in ('state') THEN - NEW.rank_search := 8; - IF ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon') THEN - NEW.rank_address := NEW.rank_search; - ELSE - NEW.rank_address := 0; - END IF; - ELSEIF NEW.type in ('region') THEN - NEW.rank_search := 18; -- dropped from previous value of 10 - NEW.rank_address := 0; -- So badly miss-used that better to just drop it! - ELSEIF NEW.type in ('county') THEN - NEW.rank_search := 12; - NEW.rank_address := NEW.rank_search; - ELSEIF NEW.type in ('city') THEN - NEW.rank_search := 16; - NEW.rank_address := NEW.rank_search; - ELSEIF NEW.type in ('island') THEN - NEW.rank_search := 17; - NEW.rank_address := 0; - ELSEIF NEW.type in ('town') THEN - NEW.rank_search := 18; - NEW.rank_address := 16; - ELSEIF NEW.type in ('village','hamlet','municipality','district','unincorporated_area','borough') THEN - NEW.rank_search := 19; - NEW.rank_address := 16; - ELSEIF NEW.type in ('suburb','croft','subdivision','isolated_dwelling') THEN - NEW.rank_search := 20; - NEW.rank_address := NEW.rank_search; - ELSEIF NEW.type in ('farm','locality','islet','mountain_pass') THEN - NEW.rank_search := 20; - NEW.rank_address := 0; - -- Irish townlands, tagged as place=locality and locality=townland - IF (NEW.extratags -> 'locality') = 'townland' THEN - NEW.rank_address := 20; - END IF; - ELSEIF NEW.type in ('neighbourhood') THEN - NEW.rank_search := 22; - NEW.rank_address := 22; - ELSEIF NEW.type in ('house','building') THEN - NEW.rank_search := 30; - NEW.rank_address := NEW.rank_search; - ELSEIF NEW.type in ('houses') THEN - -- can't guarantee all required nodes loaded yet due to caching in osm2pgsql - NEW.rank_search := 28; - NEW.rank_address := 0; + IF NOT is_area THEN + NEW.rank_address := 0; END IF; - - ELSEIF NEW.class = 'boundary' THEN - IF ST_GeometryType(NEW.geometry) NOT IN ('ST_Polygon','ST_MultiPolygon') THEN --- RAISE WARNING 'invalid boundary %',NEW.osm_id; + ELSEIF NEW.class = 'boundary' AND NOT is_area THEN return NULL; - END IF; - NEW.rank_search := NEW.admin_level * 2; - IF NEW.type = 'administrative' THEN - NEW.rank_address := NEW.rank_search; + ELSEIF NEW.class = 'railway' AND NEW.type in ('rail') THEN + return NULL; + ELSEIF NEW.osm_type = 'N' AND NEW.class = 'highway' THEN + NEW.rank_search = 30; + NEW.rank_address = 0; + ELSEIF NEW.class = 'landuse' AND NOT is_area THEN + NEW.rank_search = 30; + NEW.rank_address = 0; + ELSE + -- do table lookup stuff + IF NEW.class = 'boundary' and NEW.type = 'administrative' THEN + classtype = NEW.type || NEW.admin_level::TEXT; ELSE - NEW.rank_address := 0; + classtype = NEW.type; END IF; - ELSEIF NEW.class = 'landuse' AND ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') THEN - NEW.rank_search := 22; - IF NEW.type in ('residential', 'farm', 'farmyard', 'industrial', 'commercial', 'allotments', 'retail') THEN - NEW.rank_address := NEW.rank_search; - ELSE - NEW.rank_address := 0; + SELECT l.rank_search, l.rank_address FROM address_levels l + WHERE (l.country_code = NEW.country_code or l.country_code is NULL) + AND l.class = NEW.class AND (l.type = classtype or l.type is NULL) + ORDER BY l.country_code, l.class, l.type LIMIT 1 + INTO NEW.rank_search, NEW.rank_address; + + IF NEW.rank_search is NULL THEN + NEW.rank_search := 30; END IF; - ELSEIF NEW.class = 'leisure' and NEW.type in ('park') THEN - NEW.rank_search := 24; - NEW.rank_address := 0; - ELSEIF NEW.class = 'natural' and NEW.type in ('peak','volcano','mountain_range') THEN - NEW.rank_search := 18; - NEW.rank_address := 0; - ELSEIF NEW.class = 'natural' and NEW.type = 'sea' THEN - NEW.rank_search := 4; - NEW.rank_address := NEW.rank_search; - -- any feature more than 5 square miles is probably worth indexing - ELSEIF ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_Area(NEW.geometry) > 0.1 THEN - NEW.rank_search := 22; - NEW.rank_address := 0; - ELSEIF NEW.class = 'railway' AND NEW.type in ('rail') THEN - RETURN NULL; - ELSEIF NEW.class = 'waterway' THEN - IF NEW.osm_type = 'R' THEN - NEW.rank_search := 16; - ELSE - NEW.rank_search := 17; + + IF NEW.rank_address is NULL THEN + NEW.rank_address := 30; END IF; - NEW.rank_address := 0; - ELSEIF NEW.class = 'highway' AND NEW.osm_type != 'N' AND NEW.type in ('service','cycleway','path','footway','steps','bridleway','motorway_link','primary_link','trunk_link','secondary_link','tertiary_link') THEN - NEW.rank_search := 27; - NEW.rank_address := NEW.rank_search; - ELSEIF NEW.class = 'highway' AND NEW.osm_type != 'N' THEN - NEW.rank_search := 26; - NEW.rank_address := NEW.rank_search; - ELSEIF NEW.class = 'mountain_pass' THEN - NEW.rank_search := 20; - NEW.rank_address := 0; END IF; - END IF; - - IF NEW.rank_search > 30 THEN - NEW.rank_search := 30; - END IF; + -- some postcorrections + IF NEW.class = 'place' THEN + IF NEW.type in ('continent', 'sea', 'country', 'state') AND NEW.osm_type = 'N' THEN + NEW.rank_address := 0; + END IF; + ELSEIF NEW.class = 'waterway' AND NEW.osm_type = 'R' THEN + -- Slightly promote waterway relations so that they are processed + -- before their members. + NEW.rank_search := NEW.rank_search - 1; + END IF; - IF NEW.rank_address > 30 THEN - NEW.rank_address := 30; - END IF; + IF (NEW.extratags -> 'capital') = 'yes' THEN + NEW.rank_search := NEW.rank_search - 1; + END IF; - IF (NEW.extratags -> 'capital') = 'yes' THEN - NEW.rank_search := NEW.rank_search - 1; END IF; -- a country code make no sense below rank 4 (country) diff --git a/test/bdd/db/import/placex.feature b/test/bdd/db/import/placex.feature index 08ba9cbd..46827c43 100644 --- a/test/bdd/db/import/placex.feature +++ b/test/bdd/db/import/placex.feature @@ -26,8 +26,8 @@ Feature: Import into placex | R1 | boundary | administrative | 2 | de | (-100 40, -101 40, -101 41, -100 41, -100 40) | When importing Then placex contains - | object | addr+country | country_code | - | R1 | de | de | + | object | rank_search| addr+country | country_code | + | R1 | 4 | de | de | Scenario: Illegal country code tag for countries is ignored Given the named places @@ -157,9 +157,6 @@ Feature: Import into placex | N36 | place | house | | N37 | place | building | | N38 | place | houses | - And the named places - | osm | class | type | extra+locality | - | N100 | place | locality | townland | And the named places | osm | class | type | extra+capital | | N101 | place | city | yes | @@ -191,7 +188,6 @@ Feature: Import into placex | N32 | 20 | 0 | | N33 | 20 | 0 | | N34 | 20 | 0 | - | N100 | 20 | 20 | | N101 | 15 | 16 | | N35 | 22 | 22 | | N36 | 30 | 30 | @@ -222,8 +218,8 @@ Feature: Import into placex | object | rank_search | rank_address | | R20 | 4 | 4 | | R21 | 30 | 30 | - | R22 | 12 | 0 | - | R23 | 20 | 0 | + | R22 | 30 | 30 | + | R23 | 30 | 30 | | R40 | 4 | 4 | | R41 | 8 | 8 | @@ -243,7 +239,7 @@ Feature: Import into placex When importing Then placex contains | object | rank_search | rank_address | - | N1 | 30 | 30 | + | N1 | 30 | 0 | | W1 | 26 | 26 | | W2 | 26 | 26 | | W3 | 26 | 26 | @@ -264,11 +260,11 @@ Feature: Import into placex When importing Then placex contains | object | rank_search | rank_address | - | N2 | 30 | 30 | - | W2 | 30 | 30 | + | N2 | 30 | 0 | + | W2 | 30 | 0 | | W4 | 22 | 22 | | R2 | 22 | 22 | - | R3 | 22 | 0 | + | R3 | 22 | 0 | Scenario: rank and inclusion of naturals Given the named places @@ -289,11 +285,11 @@ Feature: Import into placex | object | rank_search | rank_address | | N2 | 18 | 0 | | N4 | 18 | 0 | - | N5 | 30 | 30 | + | N5 | 22 | 0 | | W2 | 18 | 0 | | R3 | 18 | 0 | | R4 | 22 | 0 | - | R5 | 4 | 4 | - | R6 | 4 | 4 | - | W3 | 30 | 30 | + | R5 | 4 | 0 | + | R6 | 4 | 0 | + | W3 | 22 | 0 | diff --git a/test/bdd/db/update/simple.feature b/test/bdd/db/update/simple.feature index d48e97fa..195d101e 100644 --- a/test/bdd/db/update/simple.feature +++ b/test/bdd/db/update/simple.feature @@ -34,7 +34,7 @@ Feature: Update of simple objects When importing Then placex contains | object | rank_address | - | R1 | 0 | + | R1 | 30 | | W1 | 30 | When marking for delete R1,W1 Then placex has no entry for W1 @@ -103,4 +103,4 @@ Feature: Update of simple objects | W1 | boundary | historic | Haha | 5 | (1, 2, 4, 3, 1) | Then placex contains | object | rank_address | - | W1 | 0 | + | W1 | 30 | -- 2.43.2