]> git.openstreetmap.org Git - nominatim.git/commitdiff
ignore invalid coordinates in external postcodes
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 13 May 2021 10:19:20 +0000 (12:19 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 13 May 2021 12:15:42 +0000 (14:15 +0200)
nominatim/tools/postcodes.py
test/python/test_tools_postcodes.py

index 9bcdf2e558a4f652c66f62fdfccc489a3ee5d261..195d407ee3b8c7e43c591c2d0b74cbc910d5e28c 100644 (file)
@@ -5,6 +5,7 @@ of artificial postcode centroids.
 import csv
 import gzip
 import logging
+from math import isfinite
 
 from psycopg2.extras import execute_values
 
@@ -12,6 +13,17 @@ from nominatim.db.connection import connect
 
 LOG = logging.getLogger()
 
+def _to_float(num, max_value):
+    """ Convert the number in string into a float. The number is expected
+        to be in the range of [-max_value, max_value]. Otherwise rises a
+        ValueError.
+    """
+    num = float(num)
+    if not isfinite(num) or num <= -max_value or num >= max_value:
+        raise ValueError()
+
+    return num
+
 class _CountryPostcodesCollector:
     """ Collector for postcodes of a single country.
     """
@@ -108,7 +120,8 @@ class _CountryPostcodesCollector:
                 postcode = analyzer.normalize_postcode(row['postcode'])
                 if postcode not in self.collected:
                     try:
-                        self.collected[postcode] = (float(row['lon']), float(row['lat']))
+                        self.collected[postcode] = (_to_float(row['lon'], 180),
+                                                    _to_float(row['lat'], 90))
                     except ValueError:
                         LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
                                     row['lat'], row['lon'], self.country)
index e0a62ec7572ff4b53d6254ff13be266dba0754f1..adbc0e74b3608096a340fab369b69d749281743b 100644 (file)
@@ -157,3 +157,29 @@ def test_import_postcodes_extern(dsn, placex_table, postcode_table, tmp_path,
     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
                                       ('xx', 'CD 4511', -10, -5)}
 
+
+def test_import_postcodes_extern_bad_column(dsn, placex_table, postcode_table,
+                                            tmp_path, tokenizer):
+    placex_table.add(country='xx', geom='POINT(10 12)',
+                     address=dict(postcode='AB 4511'))
+
+    extfile = tmp_path / 'xx_postcodes.csv'
+    extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
+
+    postcodes.update_postcodes(dsn, tmp_path, tokenizer)
+
+    assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
+
+
+def test_import_postcodes_extern_bad_number(dsn, placex_table, postcode_table,
+                                            tmp_path, tokenizer):
+    placex_table.add(country='xx', geom='POINT(10 12)',
+                     address=dict(postcode='AB 4511'))
+
+    extfile = tmp_path / 'xx_postcodes.csv'
+    extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
+
+    postcodes.update_postcodes(dsn, tmp_path, tokenizer)
+
+    assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
+                                      ('xx', 'CD 4511', -10, -5)}