]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #2027 from lonvia/remove-duplicate-admin-boundaries
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 28 Oct 2020 10:11:42 +0000 (11:11 +0100)
committerGitHub <noreply@github.com>
Wed, 28 Oct 2020 10:11:42 +0000 (11:11 +0100)
Handle duplicated admin boundaries

lib/Geocode.php
settings/address-levels.json
sql/functions/placex_triggers.sql
test/bdd/db/import/rank_computation.feature

index b07d1adebe587d12f01a738cdf8d39ad1ffd2ddd..69b6f41ca46e1085d2917592dc2757780bbbc64a 100644 (file)
@@ -934,6 +934,8 @@ class Geocode
                 } else {
                     $aResult['foundorder'] += 0.01;
                 }
+                // - rank
+                $aResult['foundorder'] -= 0.00001 * (30 - $aResult['rank_search']);
 
                 // Adjust importance for the number of exact string matches in the result
                 $iCountWords = 0;
index 1810af59755be89a1ed734594ad35cc51e895129..f245c227d9bbd5d307b2d6890581e0826d45ebf6 100644 (file)
           "municipality" : 18
       },
       "boundary" : {
+          "administrative5" : [10, 0],
           "administrative7" : [13, 0],
           "administrative8" : 14
       }
index 696a7c343349e19b842a5e0f65ef45b8d34d7d77..1e2aac4c96fcc88a6e188eacf43a0888135458e6 100644 (file)
@@ -494,33 +494,6 @@ END;
 $$
 LANGUAGE plpgsql;
 
-CREATE OR REPLACE FUNCTION get_parent_address_level(geom GEOMETRY, in_level SMALLINT)
-  RETURNS SMALLINT
-  AS $$
-DECLARE
-  address_rank SMALLINT;
-BEGIN
-  IF in_level <= 3 or in_level > 15 THEN
-    address_rank := 3;
-  ELSE
-    SELECT rank_address INTO address_rank
-      FROM placex
-      WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative'
-            and admin_level < in_level
-            and geometry ~ geom and _ST_Covers(geometry, geom)
-      ORDER BY admin_level desc LIMIT 1;
-  END IF;
-
-  IF address_rank is NULL or address_rank <= 3 THEN
-    RETURN 3;
-  END IF;
-
-  RETURN address_rank;
-END;
-$$
-LANGUAGE plpgsql;
-
-
 CREATE OR REPLACE FUNCTION placex_update()
   RETURNS TRIGGER
   AS $$
@@ -610,14 +583,36 @@ BEGIN
      and NEW.osm_type = 'R' and NEW.rank_address > 0
   THEN
     -- First, check that admin boundaries do not overtake each other rank-wise.
-    parent_address_level := get_parent_address_level(NEW.centroid, NEW.admin_level);
-    IF parent_address_level >= NEW.rank_address THEN
-      IF parent_address_level >= 24 THEN
-        NEW.rank_address := 25;
+    parent_address_level := 3;
+    FOR location IN
+      SELECT rank_address,
+             (CASE WHEN extratags ? 'wikidata' and NEW.extratags ? 'wikidata'
+                        and extratags->'wikidata' = NEW.extratags->'wikidata'
+                   THEN ST_Equals(geometry, NEW.geometry)
+                   ELSE false END) as is_same
+      FROM placex
+      WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative'
+            and admin_level < NEW.admin_level and admin_level > 3
+            and rank_address > 0
+            and geometry && NEW.centroid and _ST_Covers(geometry, NEW.centroid)
+      ORDER BY admin_level desc LIMIT 1
+    LOOP
+      IF location.is_same THEN
+        -- Looks like the same boundary is replicated on multiple admin_levels.
+        -- Usual tagging in Poland. Remove our boundary from addresses.
+        NEW.rank_address := 0;
       ELSE
-        NEW.rank_address := parent_address_level + 2;
+        parent_address_level := location.rank_address;
+        IF location.rank_address >= NEW.rank_address THEN
+          IF location.rank_address >= 24 THEN
+            NEW.rank_address := 25;
+          ELSE
+            NEW.rank_address := location.rank_address + 2;
+          END IF;
+        END IF;
       END IF;
-    END IF;
+    END LOOP;
+
     IF NEW.rank_address > 9 THEN
         -- Second check that the boundary is not completely contained in a
         -- place area with a higher address rank
@@ -630,7 +625,7 @@ BEGIN
                 and ST_Relate(geometry, NEW.geometry, 'T*T***FF*') -- contains but not equal
           ORDER BY rank_address desc LIMIT 1
         LOOP
-            NEW.rank_address := location.rank_address + 2;
+          NEW.rank_address := location.rank_address + 2;
         END LOOP;
     END IF;
   ELSEIF NEW.class = 'place' and NEW.osm_type = 'N'
index cea4d973acbf16625a99b4f3a7e205778b1ad96e..0fe440ce835210144b9046afd7c7e641f8f70e9b 100644 (file)
@@ -74,15 +74,15 @@ Feature: Rank assignment
           | R21 | boundary | administrative | 9     | municipality | (0 0, 0 1, 1 1, 1 0, 0 0) |
           | R22 | boundary | administrative | 9     | suburb       | (0 0, 0 1, 1 1, 1 0, 0 0) |
         When importing
-        Then place_addressline contains
-            | object | address | cached_rank_address |
-            | R21    | R20     | 16                  |
-            | R22    | R20     | 16                  |
         Then placex contains
           | object | rank_search | rank_address |
           | R20    | 16          | 16 |
           | R21    | 18          | 18 |
           | R22    | 18          | 20 |
+        Then place_addressline contains
+            | object | address | cached_rank_address |
+            | R21    | R20     | 16                  |
+            | R22    | R20     | 16                  |
 
     Scenario: Admin levels cannot overtake each other due to place address ranks
         Given the named places
@@ -101,6 +101,20 @@ Feature: Rank assignment
             | R21    | R20     | 16                  |
             | R22    | R20     | 16                  |
 
+    Scenario: Admin levels cannot overtake each other due to place address ranks even when slightly misaligned
+        Given the named places
+          | osm | class    | type           | admin | extra+place  | geometry |
+          | R20 | boundary | administrative | 6     | town         | (0 0, 0 2, 2 2, 2 0, 0 0) |
+          | R21 | boundary | administrative | 8     |              | (0 0, -0.0001 1, 1 1, 1 0, 0 0) |
+        When importing
+        Then placex contains
+          | object | rank_search | rank_address |
+          | R20    | 12          | 16 |
+          | R21    | 16          | 18 |
+        Then place_addressline contains
+            | object | address | cached_rank_address |
+            | R21    | R20     | 16                  |
+
     Scenario: Admin levels must not be larger than 25
         Given the named places
           | osm | class    | type           | admin | extra+place   | geometry |
@@ -146,3 +160,42 @@ Feature: Rank assignment
             | object | rank_search | rank_address |
             | R10    | 16          | 16           |
             | R20    | 12          | 12           |
+
+
+    Scenario: adjacent admin_levels are considered the same object when they have the same wikidata
+        Given the named places
+          | osm | class    | type           | admin | extra+wikidata | geometry |
+          | N20 | place    | square         | 15    | Q123           | 0.1 0.1  |
+          | R23 | boundary | administrative | 10    | Q444           | (0 0, 0 1, 1 1, 1 0, 0 0) |
+          | R21 | boundary | administrative | 9     | Q444           | (0 0, 0 1, 1 1, 1 0, 0 0) |
+          | R22 | boundary | administrative | 8     | Q444           | (0 0, 0 1, 1 1, 1 0, 0 0) |
+        When importing
+        Then placex contains
+          | object | rank_search | rank_address |
+          | R23    | 20          | 0  |
+          | R21    | 18          | 0  |
+          | R22    | 16          | 16 |
+        Then place_addressline contains
+            | object | address | cached_rank_address |
+            | N20    | R22     | 16                  |
+        Then place_addressline doesn't contain
+            | object | address |
+            | N20    | R21     |
+            | N20    | R23     |
+
+    Scenario: adjacent admin_levels are considered different objects when they have different wikidata
+        Given the named places
+          | osm | class    | type           | admin | extra+wikidata | geometry |
+          | N20 | place    | square         | 15    | Q123           | 0.1 0.1  |
+          | R21 | boundary | administrative | 9     | Q4441          | (0 0, 0 1, 1 1, 1 0, 0 0) |
+          | R22 | boundary | administrative | 8     | Q444           | (0 0, 0 1, 1 1, 1 0, 0 0) |
+        When importing
+        Then placex contains
+          | object | rank_search | rank_address |
+          | R21    | 18          | 18 |
+          | R22    | 16          | 16 |
+        Then place_addressline contains
+            | object | address | cached_rank_address |
+            | N20    | R22     | 16                  |
+            | N20    | R21     | 18                  |
+