]> git.openstreetmap.org Git - nominatim.git/commitdiff
detect and remove admin boundary duplicates
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 20 Oct 2020 21:26:44 +0000 (23:26 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Wed, 28 Oct 2020 09:49:26 +0000 (10:49 +0100)
The Polish community maps admin boundaries that span multiple
levels by duplicating the boundary relations. Detect this situation
by looking out for matching wikidata tags. The higher ranked
duplicates are then thrown out from the address pool by setting
their address rank to 0.

sql/functions/placex_triggers.sql
test/bdd/db/import/rank_computation.feature

index 696a7c343349e19b842a5e0f65ef45b8d34d7d77..837f8fd369f1a722c1159149553f2ce2f982d080 100644 (file)
@@ -494,33 +494,6 @@ END;
 $$
 LANGUAGE plpgsql;
 
-CREATE OR REPLACE FUNCTION get_parent_address_level(geom GEOMETRY, in_level SMALLINT)
-  RETURNS SMALLINT
-  AS $$
-DECLARE
-  address_rank SMALLINT;
-BEGIN
-  IF in_level <= 3 or in_level > 15 THEN
-    address_rank := 3;
-  ELSE
-    SELECT rank_address INTO address_rank
-      FROM placex
-      WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative'
-            and admin_level < in_level
-            and geometry ~ geom and _ST_Covers(geometry, geom)
-      ORDER BY admin_level desc LIMIT 1;
-  END IF;
-
-  IF address_rank is NULL or address_rank <= 3 THEN
-    RETURN 3;
-  END IF;
-
-  RETURN address_rank;
-END;
-$$
-LANGUAGE plpgsql;
-
-
 CREATE OR REPLACE FUNCTION placex_update()
   RETURNS TRIGGER
   AS $$
@@ -610,14 +583,33 @@ BEGIN
      and NEW.osm_type = 'R' and NEW.rank_address > 0
   THEN
     -- First, check that admin boundaries do not overtake each other rank-wise.
-    parent_address_level := get_parent_address_level(NEW.centroid, NEW.admin_level);
-    IF parent_address_level >= NEW.rank_address THEN
-      IF parent_address_level >= 24 THEN
-        NEW.rank_address := 25;
+    parent_address_level := 3;
+    FOR location IN
+      SELECT rank_address, extratags FROM placex
+      WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative'
+            and admin_level < NEW.admin_level and admin_level > 3
+            and rank_address > 0
+            and ST_Covers(geometry, NEW.geometry)
+      ORDER BY admin_level desc LIMIT 1
+    LOOP
+      IF location.extratags ? 'wikidata' and NEW.extratags ? 'wikidata'
+         and location.extratags->'wikidata' = NEW.extratags->'wikidata'
+      THEN
+        -- Looks like the same boundary is replicated on multiple admin_levels.
+        -- Usual tagging in Poland. Remove our boundary from addresses.
+        NEW.rank_address := 0;
       ELSE
-        NEW.rank_address := parent_address_level + 2;
+        parent_address_level := location.rank_address;
+        IF location.rank_address >= NEW.rank_address THEN
+          IF location.rank_address >= 24 THEN
+            NEW.rank_address := 25;
+          ELSE
+            NEW.rank_address := location.rank_address + 2;
+          END IF;
+        END IF;
       END IF;
-    END IF;
+    END LOOP;
+
     IF NEW.rank_address > 9 THEN
         -- Second check that the boundary is not completely contained in a
         -- place area with a higher address rank
@@ -630,7 +622,7 @@ BEGIN
                 and ST_Relate(geometry, NEW.geometry, 'T*T***FF*') -- contains but not equal
           ORDER BY rank_address desc LIMIT 1
         LOOP
-            NEW.rank_address := location.rank_address + 2;
+          NEW.rank_address := location.rank_address + 2;
         END LOOP;
     END IF;
   ELSEIF NEW.class = 'place' and NEW.osm_type = 'N'
index cea4d973acbf16625a99b4f3a7e205778b1ad96e..beecb36658536324c786912d292082cdd68f6101 100644 (file)
@@ -74,15 +74,15 @@ Feature: Rank assignment
           | R21 | boundary | administrative | 9     | municipality | (0 0, 0 1, 1 1, 1 0, 0 0) |
           | R22 | boundary | administrative | 9     | suburb       | (0 0, 0 1, 1 1, 1 0, 0 0) |
         When importing
-        Then place_addressline contains
-            | object | address | cached_rank_address |
-            | R21    | R20     | 16                  |
-            | R22    | R20     | 16                  |
         Then placex contains
           | object | rank_search | rank_address |
           | R20    | 16          | 16 |
           | R21    | 18          | 18 |
           | R22    | 18          | 20 |
+        Then place_addressline contains
+            | object | address | cached_rank_address |
+            | R21    | R20     | 16                  |
+            | R22    | R20     | 16                  |
 
     Scenario: Admin levels cannot overtake each other due to place address ranks
         Given the named places
@@ -146,3 +146,42 @@ Feature: Rank assignment
             | object | rank_search | rank_address |
             | R10    | 16          | 16           |
             | R20    | 12          | 12           |
+
+
+    Scenario: adjacent admin_levels are considered the same object when they have the same wikidata
+        Given the named places
+          | osm | class    | type           | admin | extra+wikidata | geometry |
+          | N20 | place    | square         | 15    | Q123           | 0.1 0.1  |
+          | R23 | boundary | administrative | 10    | Q444           | (0 0, 0 1, 1 1, 1 0, 0 0) |
+          | R21 | boundary | administrative | 9     | Q444           | (0 0, 0 1, 1 1, 1 0, 0 0) |
+          | R22 | boundary | administrative | 8     | Q444           | (0 0, 0 1, 1 1, 1 0, 0 0) |
+        When importing
+        Then placex contains
+          | object | rank_search | rank_address |
+          | R23    | 20          | 0  |
+          | R21    | 18          | 0  |
+          | R22    | 16          | 16 |
+        Then place_addressline contains
+            | object | address | cached_rank_address |
+            | N20    | R22     | 16                  |
+        Then place_addressline doesn't contain
+            | object | address |
+            | N20    | R21     |
+            | N20    | R23     |
+
+    Scenario: adjacent admin_levels are considered different objects when they have different wikidata
+        Given the named places
+          | osm | class    | type           | admin | extra+wikidata | geometry |
+          | N20 | place    | square         | 15    | Q123           | 0.1 0.1  |
+          | R21 | boundary | administrative | 9     | Q4441          | (0 0, 0 1, 1 1, 1 0, 0 0) |
+          | R22 | boundary | administrative | 8     | Q444           | (0 0, 0 1, 1 1, 1 0, 0 0) |
+        When importing
+        Then placex contains
+          | object | rank_search | rank_address |
+          | R21    | 18          | 18 |
+          | R22    | 16          | 16 |
+        Then place_addressline contains
+            | object | address | cached_rank_address |
+            | N20    | R22     | 16                  |
+            | N20    | R21     | 18                  |
+