From: Sarah Hoffmann Date: Sat, 22 Apr 2017 16:14:05 +0000 (+0200) Subject: Merge branch 'roques-tetris' X-Git-Tag: v3.0.0~36 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/86d5209118bdbacaac817b8f8a6ab7a6c769bdc7?hp=c62957323038f53d3b1ed6cd23b0ce473e46650e Merge branch 'roques-tetris' --- diff --git a/sql/functions.sql b/sql/functions.sql index f4be01e2..7b36ac50 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -1246,7 +1246,8 @@ BEGIN END IF; -- Adding ourselves to the list simplifies address calculations later - INSERT INTO place_addressline VALUES (NEW.place_id, NEW.place_id, true, true, 0, NEW.rank_address); + INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) + VALUES (NEW.place_id, NEW.place_id, true, true, 0, NEW.rank_address); -- What level are we searching from search_maxrank := NEW.rank_search; @@ -1734,7 +1735,8 @@ BEGIN IF location.rank_search > 4 THEN nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); END IF; - INSERT INTO place_addressline VALUES (NEW.place_id, location.place_id, true, location_isaddress, location.distance, location.rank_address); + INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) + VALUES (NEW.place_id, location.place_id, true, location_isaddress, location.distance, location.rank_address); IF location_isaddress THEN @@ -1768,7 +1770,8 @@ BEGIN IF location.rank_search > 4 THEN nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); - INSERT INTO place_addressline VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); + INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) + VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); address_havelevel[location.rank_address] := true; IF location.rank_address > parent_place_id_rank THEN @@ -1800,7 +1803,8 @@ BEGIN -- Add it to the list of search terms nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); - INSERT INTO place_addressline VALUES (NEW.place_id, location.place_id, true, false, location.distance, location.rank_address); + INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) + VALUES (NEW.place_id, location.place_id, true, false, location.distance, location.rank_address); END IF; @@ -1912,7 +1916,7 @@ BEGIN IF st_area(OLD.geometry) > 2 and st_isvalid(OLD.geometry) THEN SELECT bool_or(not (rank_address = 0 or rank_address > 26)) as ranked FROM placex WHERE osm_type = OLD.osm_type and osm_id = OLD.osm_id and class = OLD.class and type = OLD.type INTO has_rank; IF has_rank THEN - insert into import_polygon_delete values (OLD.osm_type,OLD.osm_id,OLD.class,OLD.type); + insert into import_polygon_delete (osm_type, osm_id, class, type) values (OLD.osm_type,OLD.osm_id,OLD.class,OLD.type); RETURN NULL; END IF; END IF; @@ -1948,9 +1952,8 @@ BEGIN --DEBUG: RAISE WARNING 'place_insert: % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,st_area(NEW.geometry); -- filter wrong tupels IF ST_IsEmpty(NEW.geometry) OR NOT ST_IsValid(NEW.geometry) OR ST_X(ST_Centroid(NEW.geometry))::text in ('NaN','Infinity','-Infinity') OR ST_Y(ST_Centroid(NEW.geometry))::text in ('NaN','Infinity','-Infinity') THEN - INSERT INTO import_polygon_error values (NEW.osm_type, NEW.osm_id, NEW.class, NEW.type, - NEW.name, NEW.address->'country', - now(), ST_IsValidReason(NEW.geometry), null, NEW.geometry); + INSERT INTO import_polygon_error (osm_type, osm_id, class, type, name, country_code, updated, errormessage, prevgeometry, newgeometry) + VALUES (NEW.osm_type, NEW.osm_id, NEW.class, NEW.type, NEW.name, NEW.address->'country', now(), ST_IsValidReason(NEW.geometry), null, NEW.geometry); -- RAISE WARNING 'Invalid Geometry: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type; RETURN null; END IF; @@ -2039,7 +2042,8 @@ BEGIN AND ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND st_area(NEW.geometry) < st_area(existing.geometry)*0.5 THEN - INSERT INTO import_polygon_error values (NEW.osm_type, NEW.osm_id, NEW.class, NEW.type, NEW.name, NEW.country_code, now(), + INSERT INTO import_polygon_error (osm_type, osm_id, class, type, name, country_code, updated, errormessage, prevgeometry, newgeometry) + VALUES (NEW.osm_type, NEW.osm_id, NEW.class, NEW.type, NEW.name, NEW.country_code, now(), 'Area reduced from '||st_area(existing.geometry)||' to '||st_area(NEW.geometry), existing.geometry, NEW.geometry); RETURN null; END IF; diff --git a/sql/loaddata.sql b/sql/loaddata.sql index 7bef299c..a3c8914a 100644 --- a/sql/loaddata.sql +++ b/sql/loaddata.sql @@ -6,8 +6,11 @@ TRUNCATE location_area; DROP SEQUENCE seq_place; CREATE SEQUENCE seq_place start 100000; -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) select * from place where osm_type = 'N'; -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) select * from place where osm_type = 'W'; -insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) select * from place where osm_type = 'R'; +insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) + select osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry from place where osm_type = 'N'; +insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) + select osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry from place where osm_type = 'W'; +insert into placex (osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry) + select osm_type, osm_id, class, type, name, admin_level, housenumber, street, isin, postcode, country_code, extratags, geometry from place where osm_type = 'R'; --select count(*) from (select create_interpolation(osm_id, housenumber) from placex where indexed=false and class='place' and type='houses') as x; diff --git a/sql/partition-functions.src.sql b/sql/partition-functions.src.sql index 46dc6137..7d4f7cbe 100644 --- a/sql/partition-functions.src.sql +++ b/sql/partition-functions.src.sql @@ -65,13 +65,15 @@ BEGIN END IF; IF in_rank_search <= 4 THEN - INSERT INTO location_area_country values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); + INSERT INTO location_area_country (partition, place_id, country_code, keywords, rank_search, rank_address, isguess, centroid, geometry) + values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); RETURN TRUE; END IF; -- start IF in_partition = -partition- THEN - INSERT INTO location_area_large_-partition- values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); + INSERT INTO location_area_large_-partition- (partition, place_id, country_code, keywords, rank_search, rank_address, isguess, centroid, geometry) + values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry); RETURN TRUE; END IF; -- end @@ -203,14 +205,14 @@ DECLARE BEGIN DELETE FROM search_name WHERE place_id = in_place_id; - INSERT INTO search_name values (in_place_id, in_rank_search, in_rank_address, in_importance, in_country_code, - in_name_vector, in_nameaddress_vector, in_centroid); + INSERT INTO search_name (place_id, search_rank, address_rank, importance, country_code, name_vector, nameaddress_vector, centroid) + values (in_place_id, in_rank_search, in_rank_address, in_importance, in_country_code, in_name_vector, in_nameaddress_vector, in_centroid); IF in_rank_search <= 4 THEN DELETE FROM search_name_country WHERE place_id = in_place_id; IF in_rank_address > 0 THEN - INSERT INTO search_name_country values (in_place_id, in_rank_search, in_rank_address, - in_name_vector, in_geometry); + INSERT INTO search_name_country (place_id, search_rank, address_rank, name_vector, centroid) + values (in_place_id, in_rank_search, in_rank_address, in_name_vector, in_geometry); END IF; RETURN TRUE; END IF; @@ -219,8 +221,8 @@ BEGIN IF in_partition = -partition- THEN DELETE FROM search_name_-partition- values WHERE place_id = in_place_id; IF in_rank_address > 0 THEN - INSERT INTO search_name_-partition- values (in_place_id, in_rank_search, in_rank_address, - in_name_vector, in_geometry); + INSERT INTO search_name_-partition- (place_id, search_rank, address_rank, name_vector, centroid) + values (in_place_id, in_rank_search, in_rank_address, in_name_vector, in_geometry); END IF; RETURN TRUE; END IF; @@ -261,7 +263,8 @@ BEGIN -- start IF in_partition = -partition- THEN DELETE FROM location_road_-partition- where place_id = in_place_id; - INSERT INTO location_road_-partition- values (in_partition, in_place_id, in_country_code, in_geometry); + INSERT INTO location_road_-partition- (partition, place_id, country_code, geometry) + values (in_partition, in_place_id, in_country_code, in_geometry); RETURN TRUE; END IF; -- end diff --git a/sql/partition-tables.src.sql b/sql/partition-tables.src.sql index 3e32474f..6f799bcf 100644 --- a/sql/partition-tables.src.sql +++ b/sql/partition-tables.src.sql @@ -7,8 +7,8 @@ drop type if exists nearfeature cascade; create type nearfeature as ( place_id BIGINT, keywords int[], - rank_address integer, - rank_search integer, + rank_address smallint, + rank_search smallint, distance float, isguess boolean ); @@ -17,8 +17,8 @@ drop type if exists nearfeaturecentr cascade; create type nearfeaturecentr as ( place_id BIGINT, keywords int[], - rank_address integer, - rank_search integer, + rank_address smallint, + rank_search smallint, distance float, isguess boolean, centroid GEOMETRY @@ -27,8 +27,8 @@ create type nearfeaturecentr as ( drop table IF EXISTS search_name_blank CASCADE; CREATE TABLE search_name_blank ( place_id BIGINT, - search_rank integer, - address_rank integer, + search_rank smallint, + address_rank smallint, name_vector integer[] ); SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2); @@ -52,8 +52,8 @@ CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USI CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) {ts:address-index}; CREATE TABLE location_road_-partition- ( - partition integer, place_id BIGINT, + partition SMALLINT, country_code VARCHAR(2) ) {ts:address-data}; SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2); diff --git a/sql/tables.sql b/sql/tables.sql index 8be7c39e..6263025c 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -52,12 +52,12 @@ CREATE SEQUENCE seq_word start 1; drop table IF EXISTS location_area CASCADE; CREATE TABLE location_area ( - partition integer, place_id BIGINT, - country_code VARCHAR(2), keywords INTEGER[], - rank_search INTEGER NOT NULL, - rank_address INTEGER NOT NULL, + partition SMALLINT, + rank_search SMALLINT NOT NULL, + rank_address SMALLINT NOT NULL, + country_code VARCHAR(2), isguess BOOL ); SELECT AddGeometryColumn('location_area', 'centroid', 4326, 'POINT', 2); @@ -68,8 +68,8 @@ CREATE TABLE location_area_large () INHERITS (location_area); drop table IF EXISTS location_property CASCADE; CREATE TABLE location_property ( place_id BIGINT, - partition integer, parent_place_id BIGINT, + partition SMALLINT, housenumber TEXT, postcode TEXT ); @@ -81,25 +81,34 @@ CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber); GRANT SELECT ON location_property_aux TO "{www-user}"; -CREATE TABLE location_property_tiger (linegeo GEOMETRY, place_id BIGINT, partition INTEGER, parent_place_id BIGINT, startnumber INTEGER, endnumber INTEGER, interpolationtype TEXT, postcode TEXT); +CREATE TABLE location_property_tiger ( + place_id BIGINT, + parent_place_id BIGINT, + startnumber INTEGER, + endnumber INTEGER, + partition SMALLINT, + linegeo GEOMETRY, + interpolationtype TEXT, + postcode TEXT); GRANT SELECT ON location_property_tiger TO "{www-user}"; drop table if exists location_property_osmline; CREATE TABLE location_property_osmline ( - linegeo GEOMETRY, place_id BIGINT NOT NULL, - partition INTEGER, osm_id BIGINT, parent_place_id BIGINT, + geometry_sector INTEGER, + indexed_date TIMESTAMP, startnumber INTEGER, endnumber INTEGER, + partition SMALLINT, + indexed_status SMALLINT, + linegeo GEOMETRY, interpolationtype TEXT, address HSTORE, postcode TEXT, - country_code VARCHAR(2), - geometry_sector INTEGER, - indexed_status INTEGER, - indexed_date TIMESTAMP){ts:search-data}; + country_code VARCHAR(2) + ){ts:search-data}; CREATE UNIQUE INDEX idx_osmline_place_id ON location_property_osmline USING BTREE (place_id) {ts:search-index}; CREATE INDEX idx_osmline_geometry_sector ON location_property_osmline USING BTREE (geometry_sector) {ts:address-index}; CREATE INDEX idx_osmline_linegeo ON location_property_osmline USING GIST (linegeo) {ts:search-index}; @@ -108,12 +117,12 @@ GRANT SELECT ON location_property_osmline TO "{www-user}"; drop table IF EXISTS search_name; CREATE TABLE search_name ( place_id BIGINT, - search_rank integer, - address_rank integer, importance FLOAT, - country_code varchar(2), + search_rank SMALLINT, + address_rank SMALLINT, name_vector integer[], - nameaddress_vector integer[] + nameaddress_vector integer[], + country_code varchar(2) ) {ts:search-data}; SELECT AddGeometryColumn('search_name', 'centroid', 4326, 'GEOMETRY', 2); CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) {ts:search-index}; @@ -122,27 +131,27 @@ drop table IF EXISTS place_addressline; CREATE TABLE place_addressline ( place_id BIGINT, address_place_id BIGINT, + distance FLOAT, + cached_rank_address SMALLINT, fromarea boolean, - isaddress boolean, - distance float, - cached_rank_address integer + isaddress boolean ) {ts:search-data}; CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) {ts:search-index}; drop table if exists placex; CREATE TABLE placex ( place_id BIGINT NOT NULL, - partition integer, - LIKE place INCLUDING CONSTRAINTS, parent_place_id BIGINT, linked_place_id BIGINT, - rank_address INTEGER, - rank_search INTEGER, importance FLOAT, - indexed_status INTEGER, indexed_date TIMESTAMP, - wikipedia TEXT, -- calculated wikipedia article name (language:title) geometry_sector INTEGER, + rank_address SMALLINT, + rank_search SMALLINT, + partition SMALLINT, + indexed_status SMALLINT, + LIKE place INCLUDING CONSTRAINTS, + wikipedia TEXT, -- calculated wikipedia article name (language:title) country_code varchar(2), housenumber TEXT, postcode TEXT @@ -150,7 +159,7 @@ CREATE TABLE placex ( SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2); CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {ts:search-index}; CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {ts:search-index}; -CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {ts:address-index}; +CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {ts:address-index} WHERE linked_place_id IS NOT NULL; CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {ts:address-index}; CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {ts:search-index}; CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) {ts:address-index} WHERE osm_type='N' and rank_search < 26; @@ -190,8 +199,8 @@ CREATE SEQUENCE seq_postcodes start 1; DROP TABLE IF EXISTS import_polygon_error; CREATE TABLE import_polygon_error ( - osm_type char(1), - osm_id INTEGER, + osm_id BIGINT, + osm_type CHAR(1), class TEXT NOT NULL, type TEXT NOT NULL, name HSTORE, @@ -206,8 +215,8 @@ GRANT SELECT ON import_polygon_error TO "{www-user}"; DROP TABLE IF EXISTS import_polygon_delete; CREATE TABLE import_polygon_delete ( - osm_type char(1), - osm_id INTEGER, + osm_id BIGINT, + osm_type CHAR(1), class TEXT NOT NULL, type TEXT NOT NULL ); diff --git a/test/bdd/steps/db_ops.py b/test/bdd/steps/db_ops.py index 30c216a5..48abfd13 100644 --- a/test/bdd/steps/db_ops.py +++ b/test/bdd/steps/db_ops.py @@ -278,9 +278,9 @@ def import_and_index_data_from_place_table(context): context.nominatim.run_setup_script('create-functions', 'create-partition-functions') cur = context.db.cursor() cur.execute( - """insert into placex (osm_type, osm_id, class, type, name, admin_level, - address, extratags, geometry) - select * from place where not (class='place' and type='houses' and osm_type='W')""") + """insert into placex (osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry) + select osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry + from place where not (class='place' and type='houses' and osm_type='W')""") cur.execute( """insert into location_property_osmline (osm_id, address, linegeo) SELECT osm_id, address, geometry from place diff --git a/test/bdd/steps/osm_data.py b/test/bdd/steps/osm_data.py index f56a106a..122b2461 100644 --- a/test/bdd/steps/osm_data.py +++ b/test/bdd/steps/osm_data.py @@ -78,8 +78,8 @@ def update_from_osm_file(context): context.nominatim.run_setup_script('create-functions', 'create-partition-functions') cur = context.db.cursor() - cur.execute("""insert into placex (osm_type, osm_id, class, type, name, - admin_level, address, extratags, geometry) select * from place""") + cur.execute("""insert into placex (osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry) + select osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry from place""") cur.execute( """insert into location_property_osmline (osm_id, address, linegeo) SELECT osm_id, address, geometry from place diff --git a/utils/setup.php b/utils/setup.php index 5ec41000..6f1ce41d 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -360,13 +360,13 @@ if ($aCMDResult['load-data'] || $aCMDResult['all']) { } echo "Load Data\n"; + $sColumns = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'; + $aDBInstances = array(); $iLoadThreads = max(1, $iInstances - 1); for ($i = 0; $i < $iLoadThreads; $i++) { $aDBInstances[$i] =& getDB(true); - $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, '; - $sSQL .= ' address, extratags, geometry) '; - $sSQL .= 'select * from place where osm_id % '.$iLoadThreads.' = '.$i; + $sSQL = "INSERT INTO placex ($sColumns) SELECT $sColumns FROM place WHERE osm_id % $iLoadThreads = $i"; $sSQL .= " and not (class='place' and type='houses' and osm_type='W'"; $sSQL .= " and ST_GeometryType(geometry) = 'ST_LineString')"; $sSQL .= " and ST_IsValid(geometry)"; diff --git a/utils/update.php b/utils/update.php index 232a23c9..d8ac134a 100755 --- a/utils/update.php +++ b/utils/update.php @@ -156,16 +156,25 @@ if ($bHaveDiff) { } if ($aResult['deduplicate']) { - // - if (getPostgresVersion() < 9.3) { + $oDB =& getDB(); + + if (getPostgresVersion($oDB) < 9.3) { fail("ERROR: deduplicate is only currently supported in postgresql 9.3"); } - $oDB =& getDB(); $sSQL = 'select partition from country_name order by country_code'; $aPartitions = chksql($oDB->getCol($sSQL)); $aPartitions[] = 0; + // we don't care about empty search_name_* artitions, they can't contain mentions of duplicates + foreach ($aPartitions as $i => $sPartition) { + $sSQL = "select count(*) from search_name_".$sPartition; + $nEntries = chksql($oDB->getOne($sSQL)); + if ($nEntries == 0) { + unset($aPartitions[$i]); + } + } + $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' '"; $sSQL .= " and class is null and type is null and country_code is null"; $sSQL .= " group by word_token having count(*) > 1 order by word_token"; diff --git a/utils/warm.php b/utils/warm.php index d707b9cb..68fa751c 100755 --- a/utils/warm.php +++ b/utils/warm.php @@ -11,7 +11,7 @@ $aCMDOptions = array( array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'), array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'), array('reverse-only', '', 0, 1, 0, 0, 'bool', 'Warm reverse only'), - array('search-only', '', 0, 1, 0, 0, 'bool', 'Warm reverse only'), + array('search-only', '', 0, 1, 0, 0, 'bool', 'Warm search only'), ); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true); @@ -53,7 +53,7 @@ if (!$aResult['search-only']) { } if (!$aResult['reverse-only']) { - $oGeocode =& new Nominatim\Geocode($oDB); + $oGeocode = new Nominatim\Geocode($oDB); echo "Warm search: "; if ($bVerbose) echo "\n";