]> git.openstreetmap.org Git - nominatim.git/blobdiff - sql/functions/placex_triggers.sql
restrict size of features that get a full address search
[nominatim.git] / sql / functions / placex_triggers.sql
index 9a5c67767245edcd84a8c0a4398db54729c76317..e80f14cb6033f7582a59a007670d381c0c3bc882 100644 (file)
@@ -138,6 +138,7 @@ DECLARE
 BEGIN
   IF bnd.rank_search >= 26 or bnd.rank_address = 0
      or ST_GeometryType(bnd.geometry) NOT IN ('ST_Polygon','ST_MultiPolygon')
+     or bnd.type IN ('postcode', 'postal_code')
   THEN
     RETURN NULL;
   END IF;
@@ -254,131 +255,119 @@ CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT,
                                                maxrank SMALLINT,
                                                address HSTORE,
                                                geometry GEOMETRY,
+                                               country TEXT,
                                                OUT parent_place_id BIGINT,
                                                OUT postcode TEXT,
                                                OUT nameaddress_vector INT[])
   AS $$
 DECLARE
-  current_rank_address INTEGER := 0;
-  location_distance FLOAT := 0;
-  location_parent GEOMETRY := NULL;
-  parent_place_id_rank SMALLINT := 0;
+  address_havelevel BOOLEAN[];
 
   location_isaddress BOOLEAN;
+  current_boundary GEOMETRY := NULL;
+  current_node_area GEOMETRY := NULL;
 
-  address_havelevel BOOLEAN[];
-  location_keywords INT[];
+  parent_place_rank INT := 0;
+  addr_place_ids BIGINT[];
 
   location RECORD;
-  addr_item RECORD;
-
-  isin_tokens INT[];
-  isin TEXT[];
 BEGIN
   parent_place_id := 0;
   nameaddress_vector := '{}'::int[];
-  isin_tokens := '{}'::int[];
 
-  ---- convert address store to array of tokenids
-  IF address IS NOT NULL THEN
-    FOR addr_item IN SELECT * FROM each(address)
-    LOOP
-      IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province',
-                           'district', 'region', 'county', 'municipality',
-                           'hamlet', 'village', 'subdistrict', 'town',
-                           'neighbourhood', 'quarter', 'parish')
-      THEN
-        isin_tokens := array_merge(isin_tokens,
-                                   word_ids_from_name(addr_item.value));
-        IF NOT %REVERSE-ONLY% THEN
-          nameaddress_vector := array_merge(nameaddress_vector,
-                                            addr_ids_from_name(addr_item.value));
+  address_havelevel := array_fill(false, ARRAY[maxrank]);
+
+  FOR location IN
+    SELECT * FROM get_places_for_addr_tags(partition, geometry,
+                                                   address, country)
+    ORDER BY rank_address, distance, isguess desc
+  LOOP
+    IF NOT %REVERSE-ONLY% THEN
+      nameaddress_vector := array_merge(nameaddress_vector,
+                                        location.keywords::int[]);
+    END IF;
+
+    IF location.place_id is not null THEN
+      location_isaddress := not address_havelevel[location.rank_address];
+      IF not address_havelevel[location.rank_address] THEN
+        address_havelevel[location.rank_address] := true;
+        IF parent_place_rank < location.rank_address THEN
+          parent_place_id := location.place_id;
+          parent_place_rank := location.rank_address;
         END IF;
       END IF;
-    END LOOP;
-  END IF;
-  IF NOT %REVERSE-ONLY% THEN
-    nameaddress_vector := array_merge(nameaddress_vector, isin_tokens);
-  END IF;
 
-  ---- now compute the address terms
-  FOR i IN 1..28 LOOP
-    address_havelevel[i] := false;
+      INSERT INTO place_addressline (place_id, address_place_id, fromarea,
+                                     isaddress, distance, cached_rank_address)
+        VALUES (obj_place_id, location.place_id, not location.isguess,
+                true, location.distance, location.rank_address);
+
+      addr_place_ids := array_append(addr_place_ids, location.place_id);
+    END IF;
   END LOOP;
 
   FOR location IN
     SELECT * FROM getNearFeatures(partition, geometry, maxrank)
-    ORDER BY rank_address, isin_tokens && keywords desc, isguess asc,
+    WHERE addr_place_ids is null or not addr_place_ids @> ARRAY[place_id]
+    ORDER BY rank_address, isguess asc,
              distance *
                CASE WHEN rank_address = 16 AND rank_search = 15 THEN 0.2
                     WHEN rank_address = 16 AND rank_search = 16 THEN 0.25
                     WHEN rank_address = 16 AND rank_search = 18 THEN 0.5
                     ELSE 1 END ASC
   LOOP
-    IF location.rank_address != current_rank_address THEN
-      current_rank_address := location.rank_address;
-      IF location.isguess THEN
-        location_distance := location.distance * 1.5;
-      ELSE
-        IF location.rank_address <= 12 THEN
-          -- for county and above, if we have an area consider that exact
-          -- (It would be nice to relax the constraint for places close to
-          --  the boundary but we'd need the exact geometry for that. Too
-          --  expensive.)
-          location_distance = 0;
-        ELSE
-          -- Below county level remain slightly fuzzy.
-          location_distance := location.distance * 0.5;
-        END IF;
+    -- Ignore all place nodes that do not fit in a lower level boundary.
+    CONTINUE WHEN location.isguess
+                  and current_boundary is not NULL
+                  and not ST_Contains(current_boundary, location.centroid);
+
+    -- If this is the first item in the rank, then assume it is the address.
+    location_isaddress := not address_havelevel[location.rank_address];
+
+    -- Further sanity checks to ensure that the address forms a sane hierarchy.
+    IF location_isaddress THEN
+      IF location.isguess and current_node_area is not NULL THEN
+        location_isaddress := ST_Contains(current_node_area, location.centroid);
+      END IF;
+      IF not location.isguess and current_boundary is not NULL
+         and location.rank_address != 11 AND location.rank_address != 5 THEN
+        location_isaddress := ST_Contains(current_boundary, location.centroid);
       END IF;
-    ELSE
-      CONTINUE WHEN location.keywords <@ location_keywords;
     END IF;
 
-    IF location.distance < location_distance OR NOT location.isguess THEN
-      location_keywords := location.keywords;
+    IF location_isaddress THEN
+      address_havelevel[location.rank_address] := true;
+      parent_place_id := location.place_id;
 
-      location_isaddress := NOT address_havelevel[location.rank_address];
-      --DEBUG: RAISE WARNING 'should be address: %, is guess: %, rank: %', location_isaddress, location.isguess, location.rank_address;
-      IF location_isaddress AND location.isguess AND location_parent IS NOT NULL THEN
-          location_isaddress := ST_Contains(location_parent, location.centroid);
+      -- Set postcode if we have one.
+      -- (Returned will be the highest ranking one.)
+      IF location.postcode is not NULL THEN
+        postcode = location.postcode;
       END IF;
 
-      --DEBUG: RAISE WARNING '% isaddress: %', location.place_id, location_isaddress;
-      -- Add it to the list of search terms
-      IF NOT %REVERSE-ONLY% THEN
-          nameaddress_vector := array_merge(nameaddress_vector,
-                                            location.keywords::integer[]);
-      END IF;
-
-      INSERT INTO place_addressline (place_id, address_place_id, fromarea,
-                                     isaddress, distance, cached_rank_address)
-        VALUES (obj_place_id, location.place_id, true,
-                location_isaddress, location.distance, location.rank_address);
-
-      IF location_isaddress THEN
-        -- add postcode if we have one
-        -- (If multiple postcodes are available, we end up with the highest ranking one.)
-        IF location.postcode is not null THEN
-            postcode = location.postcode;
-        END IF;
-
-        address_havelevel[location.rank_address] := true;
-        -- add a hack against postcode ranks
-        IF NOT location.isguess
-           AND location.rank_address != 11 AND location.rank_address != 5
-        THEN
+      -- Recompute the areas we need for hierarchy sanity checks.
+      IF location.rank_address != 11 AND location.rank_address != 5 THEN
+        IF location.isguess THEN
+          current_node_area := place_node_fuzzy_area(location.centroid,
+                                                     location.rank_search);
+        ELSE
+          current_node_area := NULL;
           SELECT p.geometry FROM placex p
-            WHERE p.place_id = location.place_id INTO location_parent;
-        END IF;
-
-        IF location.rank_address > parent_place_id_rank THEN
-          parent_place_id = location.place_id;
-          parent_place_id_rank = location.rank_address;
+              WHERE p.place_id = location.place_id INTO current_boundary;
         END IF;
       END IF;
     END IF;
 
+    -- Add it to the list of search terms
+    IF NOT %REVERSE-ONLY% THEN
+      nameaddress_vector := array_merge(nameaddress_vector,
+                                        location.keywords::integer[]);
+    END IF;
+
+    INSERT INTO place_addressline (place_id, address_place_id, fromarea,
+                                     isaddress, distance, cached_rank_address)
+        VALUES (obj_place_id, location.place_id, not location.isguess,
+                location_isaddress, location.distance, location.rank_address);
   END LOOP;
 END;
 $$
@@ -511,33 +500,6 @@ END;
 $$
 LANGUAGE plpgsql;
 
-CREATE OR REPLACE FUNCTION get_parent_address_level(geom GEOMETRY, in_level SMALLINT)
-  RETURNS SMALLINT
-  AS $$
-DECLARE
-  address_rank SMALLINT;
-BEGIN
-  IF in_level <= 3 or in_level > 15 THEN
-    address_rank := 3;
-  ELSE
-    SELECT rank_address INTO address_rank
-      FROM placex
-      WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative'
-            and admin_level < in_level
-            and geometry && geom and ST_Covers(geometry, geom)
-      ORDER BY admin_level desc LIMIT 1;
-  END IF;
-
-  IF address_rank is NULL or address_rank <= 3 THEN
-    RETURN 3;
-  END IF;
-
-  RETURN address_rank;
-END;
-$$
-LANGUAGE plpgsql;
-
-
 CREATE OR REPLACE FUNCTION placex_update()
   RETURNS TRIGGER
   AS $$
@@ -546,13 +508,15 @@ DECLARE
   location RECORD;
   relation_members TEXT[];
 
-  centroid GEOMETRY;
+  geom GEOMETRY;
   parent_address_level SMALLINT;
   place_address_level SMALLINT;
 
   addr_street TEXT;
   addr_place TEXT;
 
+  max_rank SMALLINT;
+
   name_vector INTEGER[];
   nameaddress_vector INTEGER[];
   addr_nameaddress_vector INTEGER[];
@@ -626,20 +590,58 @@ BEGIN
   IF NEW.class = 'boundary' and NEW.type = 'administrative'
      and NEW.osm_type = 'R' and NEW.rank_address > 0
   THEN
-    parent_address_level := get_parent_address_level(NEW.centroid, NEW.admin_level);
-    IF parent_address_level >= NEW.rank_address THEN
-      IF parent_address_level >= 24 THEN
-        NEW.rank_address := 25;
+    -- First, check that admin boundaries do not overtake each other rank-wise.
+    parent_address_level := 3;
+    FOR location IN
+      SELECT rank_address,
+             (CASE WHEN extratags ? 'wikidata' and NEW.extratags ? 'wikidata'
+                        and extratags->'wikidata' = NEW.extratags->'wikidata'
+                   THEN ST_Equals(geometry, NEW.geometry)
+                   ELSE false END) as is_same
+      FROM placex
+      WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative'
+            and admin_level < NEW.admin_level and admin_level > 3
+            and rank_address > 0
+            and geometry && NEW.centroid and _ST_Covers(geometry, NEW.centroid)
+      ORDER BY admin_level desc LIMIT 1
+    LOOP
+      IF location.is_same THEN
+        -- Looks like the same boundary is replicated on multiple admin_levels.
+        -- Usual tagging in Poland. Remove our boundary from addresses.
+        NEW.rank_address := 0;
       ELSE
-        NEW.rank_address := parent_address_level + 2;
+        parent_address_level := location.rank_address;
+        IF location.rank_address >= NEW.rank_address THEN
+          IF location.rank_address >= 24 THEN
+            NEW.rank_address := 25;
+          ELSE
+            NEW.rank_address := location.rank_address + 2;
+          END IF;
+        END IF;
       END IF;
+    END LOOP;
+
+    IF NEW.rank_address > 9 THEN
+        -- Second check that the boundary is not completely contained in a
+        -- place area with a higher address rank
+        FOR location IN
+          SELECT rank_address FROM placex
+          WHERE class = 'place' and rank_address < 24
+                and rank_address > NEW.rank_address
+                and geometry && NEW.geometry
+                and geometry ~ NEW.geometry -- needed because ST_Relate does not do bbox cover test
+                and ST_Relate(geometry, NEW.geometry, 'T*T***FF*') -- contains but not equal
+          ORDER BY rank_address desc LIMIT 1
+        LOOP
+          NEW.rank_address := location.rank_address + 2;
+        END LOOP;
     END IF;
-  -- If a place node is contained in a admin boundary with the same address level
-  -- and has not been linked, then make the node a subpart by increasing the
-  -- address rank (city level and above).
   ELSEIF NEW.class = 'place' and NEW.osm_type = 'N'
      and NEW.rank_address between 16 and 23
   THEN
+    -- If a place node is contained in a admin boundary with the same address level
+    -- and has not been linked, then make the node a subpart by increasing the
+    -- address rank (city level and above).
     FOR location IN
         SELECT rank_address FROM placex
         WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative'
@@ -805,17 +807,22 @@ BEGIN
           IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN
             result := add_location(NEW.place_id, NEW.country_code, NEW.partition,
                                    name_vector, NEW.rank_search, NEW.rank_address,
-                                   upper(trim(NEW.address->'postcode')), NEW.geometry);
+                                   upper(trim(NEW.address->'postcode')), NEW.geometry,
+                                   NEW.centroid);
             --DEBUG: RAISE WARNING 'Place added to location table';
           END IF;
 
       END IF;
 
-      IF NOT %REVERSE-ONLY% THEN
+      IF not %REVERSE-ONLY% AND (array_length(name_vector, 1) is not NULL
+         OR inherited_address is not NULL OR NEW.address is not NULL)
+      THEN
         SELECT * INTO name_vector, nameaddress_vector
-          FROM create_poi_search_terms(NEW.parent_place_id,
+          FROM create_poi_search_terms(NEW.place_id,
+                                       NEW.partition, NEW.parent_place_id,
                                        inherited_address || NEW.address,
-                                       NEW.housenumber, name_vector);
+                                       NEW.country_code, NEW.housenumber,
+                                       name_vector, NEW.centroid);
 
         IF array_length(name_vector, 1) is not NULL THEN
           INSERT INTO search_name (place_id, search_rank, address_rank,
@@ -842,9 +849,9 @@ BEGIN
 
     -- Use the linked point as the centre point of the geometry,
     -- but only if it is within the area of the boundary.
-    centroid := coalesce(location.centroid, ST_Centroid(location.geometry));
-    IF centroid is not NULL AND ST_Within(centroid, NEW.geometry) THEN
-        NEW.centroid := centroid;
+    geom := coalesce(location.centroid, ST_Centroid(location.geometry));
+    IF geom is not NULL AND ST_Within(geom, NEW.geometry) THEN
+        NEW.centroid := geom;
     END IF;
 
     --DEBUG: RAISE WARNING 'parent address: % rank address: %', parent_address_level, location.rank_address;
@@ -911,13 +918,31 @@ BEGIN
     --DEBUG: RAISE WARNING 'Country names updated';
   END IF;
 
-  SELECT * FROM insert_addresslines(NEW.place_id, NEW.partition,
-                                    CASE WHEN NEW.rank_address = 0
-                                      THEN NEW.rank_search ELSE NEW.rank_address END,
-                                    NEW.address,
-                                    CASE WHEN NEW.rank_search >= 26
-                                             AND NEW.rank_search < 30
-                                      THEN NEW.geometry ELSE NEW.centroid END)
+  -- For linear features we need the full geometry for determining the address
+  -- because they may go through several administrative entities. Otherwise use
+  -- the centroid for performance reasons.
+  IF ST_GeometryType(NEW.geometry) in ('ST_LineString', 'ST_MultiLineString') THEN
+    geom := NEW.geometry;
+  ELSE
+    geom := NEW.centroid;
+  END IF;
+
+  IF NEW.rank_address = 0 THEN
+    max_rank := geometry_to_rank(NEW.rank_search, NEW.geometry, NEW.country_code);
+    -- Rank 0 features may also span multiple administrative areas (e.g. lakes)
+    -- so use the geometry here too. Just make sure the areas don't become too
+    -- large.
+    IF NEW.class = 'natural' or max_rank > 10 THEN
+      geom := NEW.geometry;
+    END IF;
+  ELSEIF NEW.rank_address > 25 THEN
+    max_rank := 25;
+  ELSE
+    max_rank = NEW.rank_address;
+  END IF;
+
+  SELECT * FROM insert_addresslines(NEW.place_id, NEW.partition, max_rank,
+                                    NEW.address, geom, NEW.country_code)
     INTO NEW.parent_place_id, NEW.postcode, nameaddress_vector;
 
   --DEBUG: RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector;
@@ -935,7 +960,7 @@ BEGIN
   IF NEW.name IS NOT NULL THEN
 
     IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN
-      result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry);
+      result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry, NEW.centroid);
       --DEBUG: RAISE WARNING 'added to location (full)';
     END IF;