]> git.openstreetmap.org Git - nominatim.git/commitdiff
postcodes: add support for optional spaces
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 23 May 2022 12:04:22 +0000 (14:04 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 23 Jun 2022 21:42:31 +0000 (23:42 +0200)
nominatim/tokenizer/sanitizers/clean_postcodes.py
settings/country_settings.yaml
test/python/tokenizer/sanitizers/test_clean_postcodes.py

index ae1cd62d8d09f7c9afe5b1fac3949fccaca8b941..a968c9db0787f8a4bb009d72b20db72800145094 100644 (file)
@@ -31,18 +31,24 @@ class _PostcodeMatcher:
 
         pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
 
-        self.pattern = re.compile(f'(?:{country_code.upper()}[ -]?)?({pc_pattern})')
+        self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?(.*)\\s*')
+        self.pattern = re.compile(pc_pattern)
+
+        self.output = config.get('output', r'\g<0>')
 
 
     def normalize(self, postcode):
         """ Return the normalized version of the postcode. If the given postcode
             does not correspond to the usage-pattern, return null.
         """
-        normalized = postcode.strip().upper()
+        # Upper-case, strip spaces and leading country code.
+        normalized = self.norm_pattern.fullmatch(postcode.upper())
 
-        match = self.pattern.fullmatch(normalized)
+        if normalized:
+            match = self.pattern.fullmatch(normalized.group(1))
+            return match.expand(self.output) if match else None
 
-        return match.group(1) if match else None
+        return None
 
 
 class _PostcodeSanitizer:
index adb7593ed538f1def1d39a2fe0c1f70928c62632..f09de046fcdb6785677ae5c88b4e85f47e2a366a 100644 (file)
@@ -456,6 +456,9 @@ cz:
     partition: 124
     languages: cs
     names: !include country-names/cz.yaml
+    postcode:
+      pattern: "(ddd) ?(dd)"
+      output: \1 \2
 
 
 # Germany (Deutschland)
@@ -1618,6 +1621,9 @@ se:
     partition: 112
     languages: sv
     names: !include country-names/se.yaml
+    postcode:
+      pattern: "(ddd) ?(dd)"
+      output: \1 \2
 
 
 # Singapore (Singapore)
@@ -1657,6 +1663,9 @@ sk:
     partition: 172
     languages: sk
     names: !include country-names/sk.yaml
+    postcode:
+      pattern: "(ddd) ?(dd)"
+      output: \1 \2
 
 
 # Sierra Leone (Sierra Leone)
index e5c07596a7942c7a34678d1c93deab257c02e983..228c2f3a1a9adf3331c4536ce03468a875f96426 100644 (file)
@@ -77,3 +77,14 @@ def test_postcode_kazakhstan_pass(sanitize, postcode):
 def test_postcode_kazakhstan_fail(sanitize, postcode):
     assert sanitize(country='kz', postcode=postcode) == []
 
+
+@pytest.mark.parametrize("postcode", ('675 34', '67534', 'SE-675 34', 'SE67534'))
+def test_postcode_sweden_pass(sanitize, postcode):
+    assert sanitize(country='se', postcode=postcode) == [('postcode', '675 34')]
+
+
+@pytest.mark.parametrize("postcode", ('67 345', '671123'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_sweden_fail(sanitize, postcode):
+    assert sanitize(country='se', postcode=postcode) == []
+