From: Sarah Hoffmann Date: Thu, 26 Apr 2018 20:28:53 +0000 (+0200) Subject: Merge pull request #1024 from lonvia/reduce-address-search-terms X-Git-Tag: v3.2.0~79 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/080ba00956527cc0b2e95083b35aacaa62fb856e?hp=2613ebfa0143cdd1e3220577d48d9aa8341dbf38 Merge pull request #1024 from lonvia/reduce-address-search-terms Reduce address search terms --- diff --git a/sql/functions.sql b/sql/functions.sql index 1829c63d..16862acb 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -1234,6 +1234,7 @@ DECLARE relation_members TEXT[]; relMember RECORD; linkedplacex RECORD; + addr_item RECORD; search_diameter FLOAT; search_prevdiameter FLOAT; search_maxrank INTEGER; @@ -1766,44 +1767,43 @@ BEGIN parent_place_id_rank = 0; - -- convert isin to array of tokenids + -- convert address store to array of tokenids --DEBUG: RAISE WARNING 'Starting address search'; isin_tokens := '{}'::int[]; IF NEW.address IS NOT NULL THEN - isin := avals(NEW.address); - IF array_upper(isin, 1) IS NOT NULL THEN - FOR i IN 1..array_upper(isin, 1) LOOP - -- TODO further split terms with comma and semicolon - address_street_word_id := get_name_id(make_standard_name(isin[i])); + FOR addr_item IN SELECT * FROM each(NEW.address) + LOOP + IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province', 'district', 'region', 'county', 'municipality', 'hamlet', 'village', 'subdistrict', 'town', 'neighbourhood', 'quarter', 'parish') THEN + address_street_word_id := get_name_id(make_standard_name(addr_item.value)); IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); isin_tokens := isin_tokens || address_street_word_id; END IF; - - -- merge word into address vector - address_street_word_id := get_word_id(make_standard_name(isin[i])); + address_street_word_id := get_word_id(make_standard_name(addr_item.value)); IF address_street_word_id IS NOT NULL THEN nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); END IF; - END LOOP; - END IF; - END IF; + END IF; + IF addr_item.key = 'is_in' THEN + -- is_in items need splitting + isin := regexp_split_to_array(addr_item.value, E'[;,]'); + IF array_upper(isin, 1) IS NOT NULL THEN + FOR i IN 1..array_upper(isin, 1) LOOP + address_street_word_id := get_name_id(make_standard_name(isin[i])); + IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN + isin_tokens := isin_tokens || address_street_word_id; + END IF; - -- %NOTIGERDATA% IF 0 THEN - -- for the USA we have an additional address table. Merge in zip codes from there too - IF NEW.rank_search = 26 AND NEW.country_code = 'us' THEN - FOR location IN SELECT distinct postcode from location_property_tiger where parent_place_id = NEW.place_id LOOP - address_street_word_id := get_name_id(make_standard_name(location.postcode)); - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); - isin_tokens := isin_tokens || address_street_word_id; - - -- also merge in the single word version - address_street_word_id := get_word_id(make_standard_name(location.postcode)); - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); + -- merge word into address vector + address_street_word_id := get_word_id(make_standard_name(isin[i])); + IF address_street_word_id IS NOT NULL THEN + nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); + END IF; + END LOOP; + END IF; + END IF; END LOOP; END IF; - --DEBUG: RAISE WARNING 'Tiger postcodes collected'; - -- %NOTIGERDATA% END IF; + nameaddress_vector := array_merge(nameaddress_vector, isin_tokens); -- RAISE WARNING 'ISIN: %', isin_tokens; @@ -1884,40 +1884,6 @@ BEGIN END LOOP; --DEBUG: RAISE WARNING 'address computed'; - -- try using the isin value to find parent places - IF array_upper(isin_tokens, 1) IS NOT NULL THEN - FOR i IN 1..array_upper(isin_tokens, 1) LOOP ---RAISE WARNING ' getNearestNamedFeature: % % % %',NEW.partition, place_centroid, search_maxrank, isin_tokens[i]; - IF NOT ARRAY[isin_tokens[i]] <@ nameaddress_vector THEN - - FOR location IN SELECT * from getNearestNamedFeature(NEW.partition, place_centroid, search_maxrank, isin_tokens[i]) LOOP - ---RAISE WARNING ' ISIN: %',location; - - IF location.rank_search > 4 THEN - nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); - INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) - VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); - IF NEW.postcode is null AND location.postcode is not null - AND NOT address_havelevel[location.rank_address] THEN - NEW.postcode := location.postcode; - END IF; - - address_havelevel[location.rank_address] := true; - - IF location.rank_address > parent_place_id_rank THEN - NEW.parent_place_id = location.place_id; - parent_place_id_rank = location.rank_address; - END IF; - END IF; - END LOOP; - - END IF; - - END LOOP; - END IF; - --DEBUG: RAISE WARNING 'isin tokens processed'; - -- for long ways we should add search terms for the entire length IF st_length(NEW.geometry) > 0.05 THEN diff --git a/sql/indices.src.sql b/sql/indices.src.sql index cf5c4bc1..61af8900 100644 --- a/sql/indices.src.sql +++ b/sql/indices.src.sql @@ -19,8 +19,6 @@ CREATE INDEX idx_location_area_country_place_id ON location_area_country USING B CREATE INDEX idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {ts:search-index}; -CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) {ts:address-index}; - DROP INDEX IF EXISTS place_id_idx; CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type) {ts:address-index}; diff --git a/sql/partition-functions.src.sql b/sql/partition-functions.src.sql index 32b5f0a1..cc11cf96 100644 --- a/sql/partition-functions.src.sql +++ b/sql/partition-functions.src.sql @@ -84,38 +84,6 @@ END $$ LANGUAGE plpgsql; -create or replace function getNearestNamedFeature(in_partition INTEGER, point GEOMETRY, maxrank INTEGER, isin_token INTEGER) RETURNS setof nearfeature AS $$ -DECLARE - r nearfeature%rowtype; -BEGIN - --- start - IF in_partition = -partition- THEN - FOR r IN - SELECT place_id, name_vector, address_rank, search_rank, - ST_Distance(centroid, point) as distance, null as isguess - FROM search_name_-partition- - WHERE name_vector @> ARRAY[isin_token] - AND search_rank < maxrank - UNION ALL - SELECT place_id, name_vector, address_rank, search_rank, - ST_Distance(centroid, point) as distance, null as isguess - FROM search_name_country - WHERE name_vector @> ARRAY[isin_token] - AND search_rank < maxrank - ORDER BY distance ASC limit 1 - LOOP - RETURN NEXT r; - END LOOP; - RETURN; - END IF; --- end - - RAISE EXCEPTION 'Unknown partition %', in_partition; -END -$$ -LANGUAGE plpgsql; - create or replace function getNearestNamedRoadFeature(in_partition INTEGER, point GEOMETRY, isin_token INTEGER[]) RETURNS setof nearfeature AS $$ DECLARE @@ -185,15 +153,6 @@ BEGIN INSERT INTO search_name (place_id, search_rank, address_rank, importance, country_code, name_vector, nameaddress_vector, centroid) values (in_place_id, in_rank_search, in_rank_address, in_importance, in_country_code, in_name_vector, in_nameaddress_vector, in_centroid); - IF in_rank_search <= 4 THEN - DELETE FROM search_name_country WHERE place_id = in_place_id; - IF in_rank_address > 0 THEN - INSERT INTO search_name_country (place_id, search_rank, address_rank, name_vector, centroid) - values (in_place_id, in_rank_search, in_rank_address, in_name_vector, in_geometry); - END IF; - RETURN TRUE; - END IF; - -- start IF in_partition = -partition- THEN DELETE FROM search_name_-partition- values WHERE place_id = in_place_id; @@ -216,7 +175,6 @@ DECLARE BEGIN DELETE from search_name WHERE place_id = in_place_id; - DELETE from search_name_country WHERE place_id = in_place_id; -- start IF in_partition = -partition- THEN diff --git a/sql/partition-tables.src.sql b/sql/partition-tables.src.sql index d8f02e10..61ed5281 100644 --- a/sql/partition-tables.src.sql +++ b/sql/partition-tables.src.sql @@ -38,10 +38,6 @@ CREATE TABLE search_name_blank ( CREATE TABLE location_area_country () INHERITS (location_area_large) {ts:address-data}; CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {ts:address-index}; -CREATE TABLE search_name_country () INHERITS (search_name_blank) {ts:address-data}; -CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) {ts:address-index}; -CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) {ts:address-index}; - -- start CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large) {ts:address-data}; CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) {ts:address-index}; diff --git a/test/bdd/db/import/search_name.feature b/test/bdd/db/import/search_name.feature index 86bdea9b..cf3ce4dd 100644 --- a/test/bdd/db/import/search_name.feature +++ b/test/bdd/db/import/search_name.feature @@ -23,3 +23,57 @@ Feature: Creation of search terms Then search_name contains | object | name_vector | nameaddress_vector | | N1 | foo | the road | + + Scenario: Some addr: tags are added to address when the name exists + Given the scene roads-with-pois + And the places + | osm | class | type | name | geometry | + | N1 | place | state | new york | 80 80 | + | N1 | place | city | bonn | 81 81 | + | N1 | place | suburb | smalltown| 80 81 | + And the named places + | osm | class | type | addr+city | addr+state | addr+suburb | geometry | + | W1 | highway | service | bonn | New York | Smalltown | :w-north | + When importing + Then search_name contains + | object | nameaddress_vector | + | W1 | bonn, new york, smalltown | + + Scenario: A known addr:* tag is not added if the name is unknown + Given the scene roads-with-pois + And the places + | osm | class | type | name | addr+city | geometry | + | W1 | highway | residential | Road | Nandu | :w-north | + When importing + Then search_name contains not + | object | nameaddress_vector | + | W1 | nandu | + + Scenario: addr:postcode is not added to the address terms + Given the scene roads-with-pois + And the places + | osm | class | type | name+ref | geometry | + | N1 | place | state | 12345 | 80 80 | + And the named places + | osm | class | type | addr+postcode | geometry | + | W1 | highway | residential | 12345 | :w-north | + When importing + Then search_name contains not + | object | nameaddress_vector | + | W1 | 12345 | + + Scenario: is_in is split and added to the address search terms + Given the scene roads-with-pois + And the places + | osm | class | type | name | geometry | + | N1 | place | state | new york | 80 80 | + | N1 | place | city | bonn | 81 81 | + | N1 | place | suburb | smalltown| 80 81 | + And the named places + | osm | class | type | addr+is_in | geometry | + | W1 | highway | service | bonn, New York, Smalltown | :w-north | + When importing + Then search_name contains + | object | nameaddress_vector | + | W1 | bonn, new york, smalltown | + diff --git a/test/bdd/steps/db_ops.py b/test/bdd/steps/db_ops.py index 87babdad..80f92222 100644 --- a/test/bdd/steps/db_ops.py +++ b/test/bdd/steps/db_ops.py @@ -427,8 +427,8 @@ def check_placex_contents(context, exact): context.db.commit() -@then("search_name contains") -def check_search_name_contents(context): +@then("search_name contains(?P not)?") +def check_search_name_contents(context, exclude): cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) for row in context.table: @@ -446,11 +446,16 @@ def check_search_name_contents(context): FROM word, (SELECT unnest(%s) as term) t WHERE word_token = make_standard_name(t.term)""", (terms,)) - ok_(subcur.rowcount >= len(terms), - "No word entry found for " + row[h]) + if not exclude: + ok_(subcur.rowcount >= len(terms), + "No word entry found for " + row[h]) for wid in subcur: - assert_in(wid[0], res[h], - "Missing term for %s/%s: %s" % (pid, h, wid[1])) + if exclude: + assert_not_in(wid[0], res[h], + "Found term for %s/%s: %s" % (pid, h, wid[1])) + else: + assert_in(wid[0], res[h], + "Missing term for %s/%s: %s" % (pid, h, wid[1])) else: assert_db_column(res, h, row[h], context)