]> git.openstreetmap.org Git - nominatim.git/commitdiff
lookup places for address tags for rank < 30
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 9 Nov 2020 11:03:37 +0000 (12:03 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 16 Nov 2020 14:28:01 +0000 (15:28 +0100)
While previously the content of addr:* tags was only added
to the list of address search keywords, we now really look up
the matching place. This has the advantage that we pull in all
potential translations from the place, just like all the other
address terms that are looked up by neighbourhood search.

If no place can be found for a given name, the content of the
addr:* tag is still added to the search keywords as before.

sql/functions/placex_triggers.sql
sql/functions/ranking.sql
sql/partition-functions.src.sql
test/bdd/db/import/search_name.feature

index 6848140ad44e249fcacf41eb6768abfbb9b347c8..5e43b27afdb3999c3e7cb9e82c2886f5c63d6e91 100644 (file)
@@ -254,6 +254,7 @@ CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT,
                                                maxrank SMALLINT,
                                                address HSTORE,
                                                geometry GEOMETRY,
+                                               country TEXT,
                                                OUT parent_place_id BIGINT,
                                                OUT postcode TEXT,
                                                OUT nameaddress_vector INT[])
@@ -265,45 +266,49 @@ DECLARE
   current_boundary GEOMETRY := NULL;
   current_node_area GEOMETRY := NULL;
 
-  location RECORD;
-  addr_item RECORD;
+  parent_place_rank INT := 0;
+  addr_place_ids BIGINT[];
 
-  isin_tokens INT[];
+  location RECORD;
 BEGIN
   parent_place_id := 0;
   nameaddress_vector := '{}'::int[];
-  isin_tokens := '{}'::int[];
 
-  ---- convert address store to array of tokenids
-  IF address IS NOT NULL THEN
-    FOR addr_item IN SELECT * FROM each(address)
-    LOOP
-      IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province',
-                           'district', 'region', 'county', 'municipality',
-                           'hamlet', 'village', 'subdistrict', 'town',
-                           'neighbourhood', 'quarter', 'parish')
-      THEN
-        isin_tokens := array_merge(isin_tokens,
-                                   word_ids_from_name(addr_item.value));
-        IF NOT %REVERSE-ONLY% THEN
-          nameaddress_vector := array_merge(nameaddress_vector,
-                                            addr_ids_from_name(addr_item.value));
+  address_havelevel := array_fill(false, ARRAY[maxrank]);
+
+  FOR location IN
+    SELECT * FROM get_places_for_addr_tags(partition, geometry,
+                                                   address, country)
+    ORDER BY rank_address, distance, isguess desc
+  LOOP
+    IF NOT %REVERSE-ONLY% THEN
+      nameaddress_vector := array_merge(nameaddress_vector,
+                                        location.keywords::int[]);
+    END IF;
+
+    IF location.place_id is not null THEN
+      location_isaddress := not address_havelevel[location.rank_address];
+      IF not address_havelevel[location.rank_address] THEN
+        address_havelevel[location.rank_address] := true;
+        IF parent_place_rank < location.rank_address THEN
+          parent_place_id := location.place_id;
+          parent_place_rank := location.rank_address;
         END IF;
       END IF;
-    END LOOP;
-  END IF;
-  IF NOT %REVERSE-ONLY% THEN
-    nameaddress_vector := array_merge(nameaddress_vector, isin_tokens);
-  END IF;
 
-  ---- now compute the address terms
-  FOR i IN 1..maxrank LOOP
-    address_havelevel[i] := false;
+      INSERT INTO place_addressline (place_id, address_place_id, fromarea,
+                                     isaddress, distance, cached_rank_address)
+        VALUES (obj_place_id, location.place_id, not location.isguess,
+                true, location.distance, location.rank_address);
+
+      addr_place_ids := array_append(addr_place_ids, location.place_id);
+    END IF;
   END LOOP;
 
   FOR location IN
     SELECT * FROM getNearFeatures(partition, geometry, maxrank)
-    ORDER BY rank_address, isin_tokens && keywords desc, isguess asc,
+    WHERE addr_place_ids is null or not addr_place_ids @> ARRAY[place_id]
+    ORDER BY rank_address, isguess asc,
              distance *
                CASE WHEN rank_address = 16 AND rank_search = 15 THEN 0.2
                     WHEN rank_address = 16 AND rank_search = 16 THEN 0.25
@@ -920,7 +925,8 @@ BEGIN
                                     NEW.address,
                                     CASE WHEN (NEW.rank_address = 0 or
                                                NEW.rank_search between 26 and 29)
-                                         THEN NEW.geometry ELSE NEW.centroid END)
+                                         THEN NEW.geometry ELSE NEW.centroid END,
+                                    NEW.country_code)
     INTO NEW.parent_place_id, NEW.postcode, nameaddress_vector;
 
   --DEBUG: RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector;
index a84269fe6730669b6c805a0bb632b645c87f1199..51dcd0d03dd76d937719fb1b45ce00ff17e89307 100644 (file)
@@ -233,3 +233,50 @@ BEGIN
 END;
 $$
 LANGUAGE plpgsql IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION get_addr_tag_rank(key TEXT, country TEXT,
+                                             OUT from_rank SMALLINT,
+                                             OUT to_rank SMALLINT,
+                                             OUT extent FLOAT)
+  AS $$
+DECLARE
+  ranks RECORD;
+BEGIN
+  from_rank := null;
+
+  FOR ranks IN
+    SELECT * FROM
+      (SELECT l.rank_search, l.rank_address FROM address_levels l
+        WHERE (l.country_code = country or l.country_code is NULL)
+               AND l.class = 'place' AND l.type = key
+        ORDER BY l.country_code LIMIT 1) r
+      WHERE rank_address > 0
+  LOOP
+    extent := reverse_place_diameter(ranks.rank_search);
+
+    IF ranks.rank_address <= 4 THEN
+        from_rank := 4;
+        to_rank := 4;
+    ELSEIF ranks.rank_address <= 9 THEN
+        from_rank := 5;
+        to_rank := 9;
+    ELSEIF ranks.rank_address <= 12 THEN
+        from_rank := 10;
+        to_rank := 12;
+    ELSEIF ranks.rank_address <= 16 THEN
+        from_rank := 13;
+        to_rank := 16;
+    ELSEIF ranks.rank_address <= 21 THEN
+        from_rank := 17;
+        to_rank := 21;
+    ELSEIF ranks.rank_address <= 24 THEN
+        from_rank := 22;
+        to_rank := 24;
+    ELSE
+        from_rank := 25;
+        to_rank := 25;
+    END IF;
+  END LOOP;
+END;
+$$
+LANGUAGE plpgsql IMMUTABLE;
index 97520f99b8441d5fec6f61248afa3572a4d3cb00..8e54868b664bbbd108d2e371001a3b55ce1c4ca5 100644 (file)
@@ -10,8 +10,8 @@ CREATE TYPE nearfeaturecentr AS (
   centroid GEOMETRY
 );
 
-        -- feature intersects geoemtry
-        -- for areas and linestrings they must touch at least along a line
+-- feature intersects geoemtry
+-- for areas and linestrings they must touch at least along a line
 CREATE OR REPLACE FUNCTION is_relevant_geometry(de9im TEXT, geom_type TEXT)
 RETURNS BOOLEAN
 AS $$
@@ -39,8 +39,10 @@ BEGIN
 
 -- start
   IF in_partition = -partition- THEN
-    FOR r IN 
-      SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, postcode, centroid
+    FOR r IN
+      SELECT place_id, keywords, rank_address, rank_search,
+             min(ST_Distance(feature, centroid)) as distance,
+             isguess, postcode, centroid
       FROM location_area_large_-partition-
       WHERE geometry && feature
         AND is_relevant_geometry(ST_Relate(geometry, feature), ST_GeometryType(feature))
@@ -58,6 +60,56 @@ END
 $$
 LANGUAGE plpgsql STABLE;
 
+CREATE OR REPLACE FUNCTION get_places_for_addr_tags(in_partition SMALLINT,
+                                                    feature GEOMETRY,
+                                                    address HSTORE, country TEXT)
+  RETURNS SETOF nearfeaturecentr
+  AS $$
+DECLARE
+  r nearfeaturecentr%rowtype;
+  item RECORD;
+BEGIN
+  FOR item IN
+    SELECT (get_addr_tag_rank(key, country)).*, key, name FROM
+      (SELECT skeys(address) as key, svals(address) as name) x
+        WHERE key not in ('country', 'postcode', 'housenumber',
+                          'conscriptionnumber', 'streetnumber')
+  LOOP
+   IF item.from_rank is null THEN
+     CONTINUE;
+   END IF;
+
+-- start
+    IF in_partition = -partition- THEN
+        SELECT place_id, keywords, rank_address, rank_search,
+               min(ST_Distance(feature, centroid)) as distance,
+               isguess, postcode, centroid INTO r
+        FROM location_area_large_-partition-
+        WHERE geometry && ST_Expand(feature, item.extent)
+          AND rank_address between item.from_rank and item.to_rank
+          AND word_ids_from_name(item.name) && keywords
+        GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid
+        ORDER BY ST_Intersects(ST_Collect(geometry), feature), distance LIMIT 1;
+      IF r.place_id is null THEN
+        -- If we cannot find a place for the term, just return the
+        -- search term for the given name. That ensures that the address
+        -- element can still be searched for, even though it will not be
+        -- displayed.
+        RETURN NEXT ROW(null, addr_ids_from_name(item.name), null, null,
+                        null, null, null, null)::nearfeaturecentr;
+      ELSE
+        RETURN NEXT r;
+      END IF;
+      CONTINUE;
+    END IF;
+-- end
+
+    RAISE EXCEPTION 'Unknown partition %', in_partition;
+  END LOOP;
+END;
+$$
+LANGUAGE plpgsql STABLE;
+
 create or replace function deleteLocationArea(in_partition INTEGER, in_place_id BIGINT, in_rank_search INTEGER) RETURNS BOOLEAN AS $$
 DECLARE
 BEGIN
@@ -153,7 +205,7 @@ BEGIN
       FROM search_name_-partition-
       WHERE name_vector && isin_token
             AND centroid && ST_Expand(point, 0.04)
-            AND search_rank between 16 and 25
+            AND address_rank between 16 and 25
       ORDER BY ST_Distance(centroid, point) ASC limit 1;
     RETURN parent;
   END IF;
@@ -164,7 +216,6 @@ END
 $$
 LANGUAGE plpgsql STABLE;
 
-
 create or replace function insertSearchName(
   in_partition INTEGER, in_place_id BIGINT, in_name_vector INTEGER[],
   in_rank_search INTEGER, in_rank_address INTEGER, in_geometry GEOMETRY)
index a0a53911f86d221579f1a2cf6ec515ddef5c4296..866a597da178bf021940bd44415a250795f4efd3 100644 (file)
@@ -185,16 +185,15 @@ Feature: Creation of search terms
          | object | name_vector | nameaddress_vector |
          | N1     | foo         | the road |
 
-    Scenario: Some addr: tags are added to address when the name exists
+    Scenario: Some addr: tags are added to address
         Given the scene roads-with-pois
         And the places
          | osm | class   | type        | name     | geometry |
-         | N1  | place   | state       | new york | 80 80 |
          | N2  | place   | city        | bonn     | 81 81 |
          | N3  | place   | suburb      | smalltown| 80 81 |
         And the named places
-         | osm | class   | type    | addr+city | addr+state | addr+suburb | geometry |
-         | W1  | highway | service | bonn      | New York   | Smalltown   | :w-north |
+         | osm | class   | type    | addr+city | addr+municipality | addr+suburb | geometry |
+         | W1  | highway | service | bonn      | New York          | Smalltown   | :w-north |
         When importing
         Then search_name contains
          | object | nameaddress_vector |