From c48fb88e6b4020e960ccb656cb0741ab5f2fec74 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 26 Feb 2017 12:58:07 +0100 Subject: [PATCH] move interpolaton computation into indexing step The interpolaton computation needs information from the osm2pgsql slim tables which may not be available when the data is inserted. Insertion now only adds a line with basic address information to location_property_osmline. The line is then split during the indexing, leading to more lines (which are complete in that case) being inserted. Fixes #598. --- lib/Geocode.php | 5 +- lib/ReverseGeocode.php | 2 +- sql/functions.sql | 237 ++++++++++++++++++++----------------- sql/tables.sql | 2 + test/bdd/steps/db_ops.py | 13 +- test/bdd/steps/osm_data.py | 10 +- utils/setup.php | 10 +- 7 files changed, 155 insertions(+), 124 deletions(-) diff --git a/lib/Geocode.php b/lib/Geocode.php index 4809575d..c6341829 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -1326,6 +1326,7 @@ class Geocode $aOrder[0] .= " SELECT place_id "; $aOrder[0] .= " FROM location_property_osmline "; $aOrder[0] .= " WHERE parent_place_id = search_name.place_id"; + $aOrder[0] .= " AND startnumber is not NULL"; $aOrder[0] .= " AND ".intval($aSearch['sHouseNumber']).">=startnumber "; $aOrder[0] .= " AND ".intval($aSearch['sHouseNumber'])."<=endnumber "; $aOrder[0] .= " LIMIT 1"; @@ -1462,7 +1463,7 @@ class Geocode // do we need to use transliteration and the regex for housenumbers??? //new query for lines, not housenumbers anymore $sSQL = "SELECT distinct place_id FROM location_property_osmline"; - $sSQL .= " WHERE parent_place_id in (".$sPlaceIDs.") and ("; + $sSQL .= " WHERE startnumber is not NULL and parent_place_id in (".$sPlaceIDs.") and ("; if ($searchedHousenumber%2 == 0) { //if housenumber is even, look for housenumber in streets with interpolationtype even or all $sSQL .= "interpolationtype='even'"; @@ -1681,7 +1682,7 @@ class Geocode $sSQL .= " SELECT place_id "; $sSQL .= " FROM location_property_osmline "; $sSQL .= " WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).")"; - $sSQL .= " AND (30 between $this->iMinAddressRank and $this->iMaxAddressRank)"; + $sSQL .= " AND startnumber is not NULL AND (30 between $this->iMinAddressRank and $this->iMaxAddressRank)"; if (CONST_Debug) var_dump($sSQL); $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL)); $tempIDs = array(); diff --git a/lib/ReverseGeocode.php b/lib/ReverseGeocode.php index 8fc22f69..d4e13875 100644 --- a/lib/ReverseGeocode.php +++ b/lib/ReverseGeocode.php @@ -57,7 +57,7 @@ class ReverseGeocode $sSQL .= ' , ST_Distance(linegeo,'.$sPointSQL.') as distance'; $sSQL .= ' FROM location_property_osmline'; $sSQL .= ' WHERE ST_DWithin('.$sPointSQL.', linegeo, '.$fSearchDiam.')'; - $sSQL .= ' and indexed_status = 0 '; + $sSQL .= ' and indexed_status = 0 and startnumber is not NULL '; $sSQL .= ' ORDER BY ST_distance('.$sPointSQL.', linegeo) ASC limit 1'; return chksql( diff --git a/sql/functions.sql b/sql/functions.sql index f9a89b51..61228c42 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -598,7 +598,7 @@ BEGIN IF addr_street is null and addr_place is null THEN select nodes from planet_osm_ways where id = wayid INTO waynodes; - FOR location IN SELECT placex.street, placex.addr_place from placex + FOR location IN SELECT placex.street, placex.addr_place from placex where osm_type = 'N' and osm_id = ANY(waynodes) and (placex.street is not null or placex.addr_place is not null) and indexed_status < 100 @@ -647,101 +647,27 @@ $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION insert_osmline(wayid BIGINT, interpolationtype TEXT, - street TEXT, addr_place TEXT, - defpostalcode TEXT, country_code TEXT, - geom GEOMETRY) -RETURNS INTEGER AS $$ -DECLARE - - newpoints INTEGER; - waynodes BIGINT[]; - nodeid BIGINT; - prevnode RECORD; - nextnode RECORD; - startnumber INTEGER; - endnumber INTEGER; - housenum INTEGER; - linegeo GEOMETRY; - splitline GEOMETRY; - sectiongeo GEOMETRY; - pointgeo GEOMETRY; - place_centroid GEOMETRY; - calculated_country_code VARCHAR(2); - partition INTEGER; - geometry_sector INTEGER; - +CREATE OR REPLACE FUNCTION osmline_insert() RETURNS TRIGGER + AS $$ BEGIN - place_centroid := ST_PointOnSurface(geom); - calculated_country_code := lower(get_country_code(place_centroid)); - partition := get_partition(calculated_country_code); - geometry_sector := geometry_sector(partition, place_centroid); - - IF interpolationtype != 'odd' AND interpolationtype != 'even' AND interpolationtype!='all' THEN - -- other interpolation types than odd/even/all (e.g. numeric ones) are not supported - RETURN 0; - END IF; - - select nodes from planet_osm_ways where id = wayid INTO waynodes; - - IF array_upper(waynodes, 1) IS NULL THEN - RETURN 0; - END IF; - - linegeo := geom; - startnumber := NULL; - - FOR nodeidpos in 1..array_upper(waynodes, 1) LOOP + NEW.place_id := nextval('seq_place'); + NEW.indexed_date := now(); - select * from place where osm_type = 'N' and osm_id = waynodes[nodeidpos]::BIGINT - and housenumber is not NULL limit 1 INTO nextnode; - --RAISE NOTICE 'Nextnode.place_id: %s', nextnode.place_id; - IF nextnode.osm_id IS NOT NULL THEN - --RAISE NOTICE 'place_id is not null'; - IF nodeidpos > 1 and nodeidpos < array_upper(waynodes, 1) THEN - -- Make sure that the point is actually on the line. That might - -- be a bit paranoid but ensures that the algorithm still works - -- should osm2pgsql attempt to repair geometries. - splitline := split_line_on_node(linegeo, nextnode.geometry); - sectiongeo := ST_GeometryN(splitline, 1); - linegeo := ST_GeometryN(splitline, 2); - ELSE - sectiongeo = linegeo; + IF NEW.indexed_status IS NULL THEN + IF NEW.interpolationtype NOT IN ('odd', 'even', 'all') THEN + -- other interpolation types than odd/even/all (e.g. numeric ones) are not supported + RETURN NULL; END IF; - endnumber := substring(nextnode.housenumber,'[0-9]+')::integer; - - IF startnumber IS NOT NULL AND endnumber IS NOT NULL - AND startnumber != endnumber - AND ST_GeometryType(sectiongeo) = 'ST_LineString' THEN - IF (startnumber > endnumber) THEN - housenum := endnumber; - endnumber := startnumber; - startnumber := housenum; - sectiongeo := ST_Reverse(sectiongeo); - END IF; - - insert into location_property_osmline - values (sectiongeo, nextval('seq_place'), partition, wayid, NULL, - startnumber, endnumber, interpolationtype, - coalesce(street, prevnode.street, nextnode.street), - coalesce(addr_place, prevnode.addr_place, nextnode.addr_place), - coalesce(defpostalcode, prevnode.postcode, nextnode.postcode), - calculated_country_code, geometry_sector, 2, now()); - END IF; + NEW.indexed_status := 1; --STATUS_NEW - -- early break if we are out of line string, - -- might happen when a line string loops back on itself - IF ST_GeometryType(linegeo) != 'ST_LineString' THEN - RETURN 0; - END IF; + NEW.calculated_country_code := lower(get_country_code(NEW.linegeo)); - startnumber := substring(nextnode.housenumber,'[0-9]+')::integer; - prevnode := nextnode; - END IF; - END LOOP; + NEW.partition := get_partition(NEW.calculated_country_code); + NEW.geometry_sector := geometry_sector(NEW.partition, NEW.linegeo); + END IF; - RETURN 1; + RETURN NEW; END; $$ LANGUAGE plpgsql; @@ -759,19 +685,6 @@ DECLARE classtable TEXT; line RECORD; BEGIN - --DEBUG: RAISE WARNING '% %',NEW.osm_type,NEW.osm_id; - - -- ignore interpolated addresses, not necessary anymore, cause interpolated addresses are now in location_property_osmline - IF NEW.class = 'place' and NEW.type = 'address' THEN - RETURN NEW; - END IF; - - IF ST_IsEmpty(NEW.geometry) OR NOT ST_IsValid(NEW.geometry) OR ST_X(ST_Centroid(NEW.geometry))::text in ('NaN','Infinity','-Infinity') OR ST_Y(ST_Centroid(NEW.geometry))::text in ('NaN','Infinity','-Infinity') THEN - -- block all invalid geometary - just not worth the risk. seg faults are causing serious problems. - RAISE WARNING 'invalid geometry %',NEW.osm_id; - RETURN NULL; - END IF; - --DEBUG: RAISE WARNING '% % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type; NEW.place_id := nextval('seq_place'); @@ -1069,6 +982,18 @@ TRIGGER AS $$ DECLARE place_centroid GEOMETRY; + waynodes BIGINT[]; + prevnode RECORD; + nextnode RECORD; + startnumber INTEGER; + endnumber INTEGER; + housenum INTEGER; + linegeo GEOMETRY; + splitline GEOMETRY; + sectiongeo GEOMETRY; + street TEXT; + addr_place TEXT; + postcode TEXT; BEGIN -- deferred delete IF OLD.indexed_status = 100 THEN @@ -1080,19 +1005,106 @@ BEGIN RETURN NEW; END IF; - -- do the reparenting: (finally here, because ALL places in placex, that are needed for reparenting, need to be up to date) - -- (the osm interpolationline in location_property_osmline was marked for reparenting in placex_insert/placex_delete with index_status = 1 or 2 (1 inset, 2 delete) + -- do the reparenting: (finally here, because ALL places in placex, + -- that are needed for reparenting, need to be up to date) + -- (the osm interpolationline in location_property_osmline was marked for + -- reparenting in placex_insert/placex_delete with index_status = 1 or 2 (1 inset, 2 delete) -- => index.c: sets index_status back to 0 -- => triggers this function) place_centroid := ST_PointOnSurface(NEW.linegeo); - -- marking descendants for reparenting is not needed, because there are actually no descendants for interpolation lines + -- marking descendants for reparenting is not needed, because there are + -- actually no descendants for interpolation lines NEW.parent_place_id = get_interpolation_parent(NEW.osm_id, NEW.street, NEW.addr_place, NEW.partition, place_centroid, NEW.linegeo); - return NEW; + + -- if we are just updating then our work is done + IF OLD.indexed_status != 1 THEN + return NEW; + END IF; + + -- otherwise split the line as necessary + select nodes from planet_osm_ways where id = NEW.osm_id INTO waynodes; + + IF array_upper(waynodes, 1) IS NULL THEN + RETURN 0; + END IF; + + linegeo := NEW.linegeo; + startnumber := NULL; + street := NEW.street; + addr_place := NEW.addr_place; + postcode := NEW.postcode; + + FOR nodeidpos in 1..array_upper(waynodes, 1) LOOP + + select * from place where osm_type = 'N' and osm_id = waynodes[nodeidpos]::BIGINT + and housenumber is not NULL limit 1 INTO nextnode; + --RAISE NOTICE 'Nextnode.place_id: %s', nextnode.place_id; + IF nextnode.osm_id IS NOT NULL THEN + --RAISE NOTICE 'place_id is not null'; + IF nodeidpos > 1 and nodeidpos < array_upper(waynodes, 1) THEN + -- Make sure that the point is actually on the line. That might + -- be a bit paranoid but ensures that the algorithm still works + -- should osm2pgsql attempt to repair geometries. + splitline := split_line_on_node(linegeo, nextnode.geometry); + sectiongeo := ST_GeometryN(splitline, 1); + linegeo := ST_GeometryN(splitline, 2); + ELSE + sectiongeo = linegeo; + END IF; + endnumber := substring(nextnode.housenumber,'[0-9]+')::integer; + + IF startnumber IS NOT NULL AND endnumber IS NOT NULL + AND startnumber != endnumber + AND ST_GeometryType(sectiongeo) = 'ST_LineString' THEN + + IF (startnumber > endnumber) THEN + housenum := endnumber; + endnumber := startnumber; + startnumber := housenum; + sectiongeo := ST_Reverse(sectiongeo); + END IF; + + IF NEW.startnumber IS NULL THEN + NEW.startnumber := startnumber; + NEW.endnumber := endnumber; + NEW.linegeo := sectiongeo; + NEW.street := coalesce(street, prevnode.street, nextnode.street); + NEW.addr_place := coalesce(addr_place, prevnode.addr_place, nextnode.addr_place); + NEW.postcode := coalesce(postcode, prevnode.postcode, nextnode.postcode); + ELSE + insert into location_property_osmline + (linegeo, partition, osm_id, parent_place_id, + startnumber, endnumber, interpolationtype, + street, addr_place, postcode, calculated_country_code, + geometry_sector, indexed_status) + values (sectiongeo, NEW.partition, NEW.osm_id, NEW.parent_place_id, + startnumber, endnumber, NEW.interpolationtype, + coalesce(street, prevnode.street, nextnode.street), + coalesce(addr_place, prevnode.addr_place, nextnode.addr_place), + coalesce(postcode, prevnode.postcode, nextnode.postcode), + NEW.calculated_country_code, NEW.geometry_sector, 0); + END IF; + END IF; + + -- early break if we are out of line string, + -- might happen when a line string loops back on itself + IF ST_GeometryType(linegeo) != 'ST_LineString' THEN + RETURN NEW; + END IF; + + startnumber := substring(nextnode.housenumber,'[0-9]+')::integer; + prevnode := nextnode; + END IF; + END LOOP; + + RETURN NEW; END; $$ LANGUAGE plpgsql; + + CREATE OR REPLACE FUNCTION placex_update() RETURNS TRIGGER AS $$ @@ -1924,7 +1936,7 @@ BEGIN RETURN null; END IF; - -- decide, whether it is an osm interpolation line => insert_osmline, or else just insert into placex + -- decide, whether it is an osm interpolation line => insert intoosmline, or else just placex IF NEW.class='place' and NEW.type='houses' and NEW.osm_type='W' and ST_GeometryType(NEW.geometry) = 'ST_LineString' THEN -- Have we already done this place? select * from place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type INTO existing; @@ -1949,11 +1961,16 @@ BEGIN update placex p set indexed_status = 2 from planet_osm_ways w where w.id = NEW.osm_id and p.osm_type = 'N' and p.osm_id = any(w.nodes); - -- insert new line into location_property_osmline, use function insert_osmline + + + INSERT INTO location_property_osmline + (osm_id, interpolationtype, street, + addr_place, postcode, calculated_country_code, linegeo) + VALUES (NEW.osm_id, NEW.housenumber, NEW.street, + NEW.addr_place, NEW.postcode, NEW.country_code, NEW.geometry); IF existing.osm_type IS NULL THEN - i = insert_osmline(NEW.osm_id, NEW.housenumber, NEW.street, NEW.addr_place, NEW.postcode, NEW.country_code, NEW.geometry); return NEW; END IF; @@ -1978,8 +1995,6 @@ BEGIN admin_level = NEW.admin_level, geometry = NEW.geometry where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type; - - i = insert_osmline(NEW.osm_id, NEW.housenumber, NEW.street, NEW.addr_place, NEW.postcode, NEW.country_code, NEW.geometry); END IF; RETURN NULL; diff --git a/sql/tables.sql b/sql/tables.sql index 19398eb5..8cc2c831 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -167,6 +167,8 @@ GRANT SELECT on location_area to "{www-user}" ; -- insert creates the location tables, creates location indexes if indexed == true CREATE TRIGGER placex_before_insert BEFORE INSERT ON placex FOR EACH ROW EXECUTE PROCEDURE placex_insert(); +CREATE TRIGGER osmline_before_insert BEFORE INSERT ON location_property_osmline + FOR EACH ROW EXECUTE PROCEDURE osmline_insert(); -- update insert creates the location tables CREATE TRIGGER placex_before_update BEFORE UPDATE ON placex diff --git a/test/bdd/steps/db_ops.py b/test/bdd/steps/db_ops.py index 3c5c5632..56aa0c80 100644 --- a/test/bdd/steps/db_ops.py +++ b/test/bdd/steps/db_ops.py @@ -216,9 +216,13 @@ def import_and_index_data_from_place_table(context): geometry) select * from place where not (class='place' and type='houses' and osm_type='W')""") cur.execute( - """select insert_osmline (osm_id, housenumber, street, addr_place, - postcode, country_code, geometry) - from place where class='place' and type='houses' and osm_type='W'""") + """insert into location_property_osmline + (osm_id, interpolationtype, street, addr_place, + postcode, calculated_country_code, linegeo) + SELECT osm_id, housenumber, street, addr_place, + postcode, country_code, geometry from place + WHERE class='place' and type='houses' and osm_type='W' + and ST_GeometryType(geometry) = 'ST_LineString'""") context.db.commit() context.nominatim.run_setup_script('index', 'index-noanalyse') @@ -373,7 +377,8 @@ def check_location_property_osmline(context, oid, neg): eq_('W', nid.typ, "interpolation must be a way") cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt - FROM location_property_osmline WHERE osm_id = %s""", + FROM location_property_osmline + WHERE osm_id = %s AND startnumber IS NOT NULL""", (nid.oid, )) if neg: diff --git a/test/bdd/steps/osm_data.py b/test/bdd/steps/osm_data.py index 588dcefe..d0d8d892 100644 --- a/test/bdd/steps/osm_data.py +++ b/test/bdd/steps/osm_data.py @@ -82,9 +82,13 @@ def update_from_osm_file(context): admin_level, housenumber, street, addr_place, isin, postcode, country_code, extratags, geometry) select * from place""") cur.execute( - """select insert_osmline (osm_id, housenumber, street, addr_place, - postcode, country_code, geometry) - from place where class='place' and type='houses' and osm_type='W'""") + """insert into location_property_osmline + (osm_id, interpolationtype, street, addr_place, + postcode, calculated_country_code, linegeo) + SELECT osm_id, housenumber, street, addr_place, + postcode, country_code, geometry from place + WHERE class='place' and type='houses' and osm_type='W' + and ST_GeometryType(geometry) = 'ST_LineString'""") context.db.commit() context.nominatim.run_setup_script('index', 'index-noanalyse') context.nominatim.run_setup_script('create-functions', 'create-partition-functions', diff --git a/utils/setup.php b/utils/setup.php index e4e91674..87c1dd07 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -373,8 +373,11 @@ if ($aCMDResult['load-data'] || $aCMDResult['all']) { } // last thread for interpolation lines $aDBInstances[$iLoadThreads] =& getDB(true); - $sSQL = 'select insert_osmline (osm_id, housenumber, street, addr_place, postcode, country_code, '; - $sSQL .= 'geometry) from place where '; + $sSQL = 'insert into location_property_osmline'; + $sSQL .= ' (osm_id, interpolationtype, street, addr_place,'; + $sSQL .= ' postcode, calculated_country_code, linegeo)'; + $sSQL .= ' SELECT osm_id, housenumber, street, addr_place,'; + $sSQL .= ' postcode, country_code, geometry from place where '; $sSQL .= "class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString'"; if ($aCMDResult['verbose']) echo "$sSQL\n"; if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection)); @@ -475,7 +478,8 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,"; $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,"; $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y "; - $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x"; + $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x "; + $sSQL .= "where ST_Point(x,y) is not null"; if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); if (CONST_Use_Extra_US_Postcodes) { -- 2.45.2