]> git.openstreetmap.org Git - nominatim.git/commitdiff
overhaul address computation
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 13 Oct 2020 20:10:07 +0000 (22:10 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Tue, 13 Oct 2020 20:10:07 +0000 (22:10 +0200)
This is a complete rewrite of the selection of address parts to
be inserted into the place_addressline table.

The new algorithm selects for each rank:
* the boundary overlapping with the addressee and contained
  in the already selected boundaries of lower rank, or failing that
* the place node closest to the addressee that is contained in
  the already selected boundaries and in the influence radius
  of already selected place nodes of lower rank

Place nodes that are not contained in already selected boundaries
of lower rank are completely thrown away. All other candidates are
added as non-address parts.

sql/functions/placex_triggers.sql
sql/functions/utils.sql

index 9a5c67767245edcd84a8c0a4398db54729c76317..aac7cafcdca0cb24768ba2ec874b3ec0c61b9a22 100644 (file)
@@ -259,21 +259,16 @@ CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT,
                                                OUT nameaddress_vector INT[])
   AS $$
 DECLARE
-  current_rank_address INTEGER := 0;
-  location_distance FLOAT := 0;
-  location_parent GEOMETRY := NULL;
-  parent_place_id_rank SMALLINT := 0;
+  address_havelevel BOOLEAN[];
 
   location_isaddress BOOLEAN;
-
-  address_havelevel BOOLEAN[];
-  location_keywords INT[];
+  current_boundary GEOMETRY := NULL;
+  current_node_area GEOMETRY := NULL;
 
   location RECORD;
   addr_item RECORD;
 
   isin_tokens INT[];
-  isin TEXT[];
 BEGIN
   parent_place_id := 0;
   nameaddress_vector := '{}'::int[];
@@ -302,7 +297,7 @@ BEGIN
   END IF;
 
   ---- now compute the address terms
-  FOR i IN 1..28 LOOP
+  FOR i IN 1..maxrank LOOP
     address_havelevel[i] := false;
   END LOOP;
 
@@ -315,70 +310,58 @@ BEGIN
                     WHEN rank_address = 16 AND rank_search = 18 THEN 0.5
                     ELSE 1 END ASC
   LOOP
-    IF location.rank_address != current_rank_address THEN
-      current_rank_address := location.rank_address;
-      IF location.isguess THEN
-        location_distance := location.distance * 1.5;
-      ELSE
-        IF location.rank_address <= 12 THEN
-          -- for county and above, if we have an area consider that exact
-          -- (It would be nice to relax the constraint for places close to
-          --  the boundary but we'd need the exact geometry for that. Too
-          --  expensive.)
-          location_distance = 0;
-        ELSE
-          -- Below county level remain slightly fuzzy.
-          location_distance := location.distance * 0.5;
-        END IF;
+    -- Ignore all place nodes that do not fit in a lower level boundary.
+    CONTINUE WHEN location.isguess
+                  and current_boundary is not NULL
+                  and not ST_Contains(current_boundary, location.centroid);
+
+    -- If this is the first item in the rank, then assume it is the address.
+    location_isaddress := not address_havelevel[location.rank_address];
+
+    -- Further sanity checks to ensure that the address forms a sane hierarchy.
+    IF location_isaddress THEN
+      IF location.isguess and current_node_area is not NULL THEN
+        location_isaddress := ST_Contains(current_node_area, location.centroid);
+      END IF;
+      IF not location.isguess and current_boundary is not NULL
+         and location.rank_address != 11 AND location.rank_address != 5 THEN
+        location_isaddress := ST_Contains(current_boundary, location.centroid);
       END IF;
-    ELSE
-      CONTINUE WHEN location.keywords <@ location_keywords;
     END IF;
 
-    IF location.distance < location_distance OR NOT location.isguess THEN
-      location_keywords := location.keywords;
+    IF location_isaddress THEN
+      address_havelevel[location.rank_address] := true;
+      parent_place_id := location.place_id;
 
-      location_isaddress := NOT address_havelevel[location.rank_address];
-      --DEBUG: RAISE WARNING 'should be address: %, is guess: %, rank: %', location_isaddress, location.isguess, location.rank_address;
-      IF location_isaddress AND location.isguess AND location_parent IS NOT NULL THEN
-          location_isaddress := ST_Contains(location_parent, location.centroid);
+      -- Set postcode if we have one.
+      -- (Returned will be the highest ranking one.)
+      IF location.postcode is not NULL THEN
+        postcode = location.postcode;
       END IF;
 
-      --DEBUG: RAISE WARNING '% isaddress: %', location.place_id, location_isaddress;
-      -- Add it to the list of search terms
-      IF NOT %REVERSE-ONLY% THEN
-          nameaddress_vector := array_merge(nameaddress_vector,
-                                            location.keywords::integer[]);
-      END IF;
-
-      INSERT INTO place_addressline (place_id, address_place_id, fromarea,
-                                     isaddress, distance, cached_rank_address)
-        VALUES (obj_place_id, location.place_id, true,
-                location_isaddress, location.distance, location.rank_address);
-
-      IF location_isaddress THEN
-        -- add postcode if we have one
-        -- (If multiple postcodes are available, we end up with the highest ranking one.)
-        IF location.postcode is not null THEN
-            postcode = location.postcode;
-        END IF;
-
-        address_havelevel[location.rank_address] := true;
-        -- add a hack against postcode ranks
-        IF NOT location.isguess
-           AND location.rank_address != 11 AND location.rank_address != 5
-        THEN
+      -- Recompute the areas we need for hierarchy sanity checks.
+      IF location.rank_address != 11 AND location.rank_address != 5 THEN
+        IF location.isguess THEN
+          current_node_area := place_node_fuzzy_area(location.centroid,
+                                                     location.rank_search);
+        ELSE
+          current_node_area := NULL;
           SELECT p.geometry FROM placex p
-            WHERE p.place_id = location.place_id INTO location_parent;
-        END IF;
-
-        IF location.rank_address > parent_place_id_rank THEN
-          parent_place_id = location.place_id;
-          parent_place_id_rank = location.rank_address;
+              WHERE p.place_id = location.place_id INTO current_boundary;
         END IF;
       END IF;
     END IF;
 
+    -- Add it to the list of search terms
+    IF NOT %REVERSE-ONLY% THEN
+      nameaddress_vector := array_merge(nameaddress_vector,
+                                        location.keywords::integer[]);
+    END IF;
+
+    INSERT INTO place_addressline (place_id, address_place_id, fromarea,
+                                     isaddress, distance, cached_rank_address)
+        VALUES (obj_place_id, location.place_id, true,
+                location_isaddress, location.distance, location.rank_address);
   END LOOP;
 END;
 $$
index ae841bf25d23346b9780a3a12e4ee93526877f95..08e82c5be671d4e8b37b89424d30b77fdc491d98 100644 (file)
@@ -272,21 +272,27 @@ END;
 $$
 LANGUAGE plpgsql;
 
-CREATE OR REPLACE FUNCTION near_feature_rank_distance(rank_search INTEGER)
-  RETURNS FLOAT
+-- Create a bounding box with an extent computed from the radius (in meters)
+-- which in turn is derived from the given search rank.
+CREATE OR REPLACE FUNCTION place_node_fuzzy_area(geom GEOMETRY, rank_search INTEGER)
+  RETURNS GEOMETRY
   AS $$
+DECLARE
+  radius FLOAT := 500;
 BEGIN
   IF rank_search <= 16 THEN -- city
-    RETURN 15000;
+    radius := 15000;
   ELSIF rank_search <= 18 THEN -- town
-    RETURN 4000;
+    radius := 4000;
   ELSIF rank_search <= 19 THEN -- village
-    RETURN 2000;
+    radius := 2000;
   ELSIF rank_search  <= 20 THEN -- hamlet
-    RETURN 1000;
+    radius := 1000;
   END IF;
 
-  RETURN 500;
+  RETURN ST_Envelope(ST_Collect(
+                     ST_Project(geom, radius, 0.785398)::geometry,
+                     ST_Project(geom, radius, 3.9269908)::geometry));
 END;
 $$
 LANGUAGE plpgsql IMMUTABLE;
@@ -301,7 +307,6 @@ CREATE OR REPLACE FUNCTION add_location(place_id BIGINT, country_code varchar(2)
 DECLARE
   locationid INTEGER;
   centroid GEOMETRY;
-  radius FLOAT;
   secgeo GEOMETRY;
   postcode TEXT;
 BEGIN
@@ -321,13 +326,7 @@ BEGIN
     END LOOP;
 
   ELSEIF ST_GeometryType(geometry) = 'ST_Point' THEN
-    radius := near_feature_rank_distance(rank_search);
-    --DEBUG: RAISE WARNING 'adding % radius %', place_id, radius;
-
-    -- Create a bounding box with an extent computed from the radius (in meters).
-    secgeo := ST_Envelope(ST_Collect(
-                            ST_Project(geometry, radius, 0.785398)::geometry,
-                            ST_Project(geometry, radius, 3.9269908)::geometry));
+    secgeo := place_node_fuzzy_area(geometry, rank_search);
     PERFORM insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, true, postcode, geometry, secgeo);
 
   END IF;