1 # SPDX-License-Identifier: GPL-2.0-only
 
   3 # This file is part of Nominatim. (https://nominatim.org)
 
   5 # Copyright (C) 2022 by the Nominatim developer community.
 
   6 # For a full list of authors see the git log.
 
   8 Tests for the sanitizer that normalizes postcodes.
 
  12 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
 
  13 from nominatim.data.place_info import PlaceInfo
 
  14 from nominatim.data import country_info
 
  17 def sanitize(def_config, request):
 
  18     country_info.setup_country_config(def_config)
 
  19     sanitizer_args = {'step': 'clean-postcodes'}
 
  20     for mark in request.node.iter_markers(name="sanitizer_params"):
 
  21         sanitizer_args.update({k.replace('_', '-') : v for k,v in mark.kwargs.items()})
 
  23     def _run(country=None, **kwargs):
 
  24         pi = {'address': kwargs}
 
  25         if country is not None:
 
  26             pi['country_code'] = country
 
  28         _, address = PlaceSanitizer([sanitizer_args], def_config).process_names(PlaceInfo(pi))
 
  30         return sorted([(p.kind, p.name) for p in address])
 
  35 @pytest.mark.parametrize("country", (None, 'ae'))
 
  36 def test_postcode_no_country(sanitize, country):
 
  37     assert sanitize(country=country, postcode='23231') == [('unofficial_postcode', '23231')]
 
  40 @pytest.mark.parametrize("country", (None, 'ae'))
 
  41 @pytest.mark.sanitizer_params(convert_to_address=False)
 
  42 def test_postcode_no_country_drop(sanitize, country):
 
  43     assert sanitize(country=country, postcode='23231') == []
 
  46 @pytest.mark.parametrize("postcode", ('12345', '  12345  ', 'de 12345',
 
  47                                       'DE12345', 'DE 12345', 'DE-12345'))
 
  48 def test_postcode_pass_good_format(sanitize, postcode):
 
  49     assert sanitize(country='de', postcode=postcode) == [('postcode', '12345')]
 
  52 @pytest.mark.parametrize("postcode", ('123456', '', '   ', '.....',
 
  53                                       'DE  12345', 'DEF12345', 'CH 12345'))
 
  54 @pytest.mark.sanitizer_params(convert_to_address=False)
 
  55 def test_postcode_drop_bad_format(sanitize, postcode):
 
  56     assert sanitize(country='de', postcode=postcode) == []
 
  59 @pytest.mark.parametrize("postcode", ('1234', '9435', '99000'))
 
  60 def test_postcode_cyprus_pass(sanitize, postcode):
 
  61     assert sanitize(country='cy', postcode=postcode) == [('postcode', postcode)]
 
  64 @pytest.mark.parametrize("postcode", ('91234', '99a45', '567'))
 
  65 @pytest.mark.sanitizer_params(convert_to_address=False)
 
  66 def test_postcode_cyprus_fail(sanitize, postcode):
 
  67     assert sanitize(country='cy', postcode=postcode) == []
 
  70 @pytest.mark.parametrize("postcode", ('123456', 'A33F2G7'))
 
  71 def test_postcode_kazakhstan_pass(sanitize, postcode):
 
  72     assert sanitize(country='kz', postcode=postcode) == [('postcode', postcode)]
 
  75 @pytest.mark.parametrize("postcode", ('V34T6Y923456', '99345'))
 
  76 @pytest.mark.sanitizer_params(convert_to_address=False)
 
  77 def test_postcode_kazakhstan_fail(sanitize, postcode):
 
  78     assert sanitize(country='kz', postcode=postcode) == []
 
  81 @pytest.mark.parametrize("postcode", ('675 34', '67534', 'SE-675 34', 'SE67534'))
 
  82 def test_postcode_sweden_pass(sanitize, postcode):
 
  83     assert sanitize(country='se', postcode=postcode) == [('postcode', '675 34')]
 
  86 @pytest.mark.parametrize("postcode", ('67 345', '671123'))
 
  87 @pytest.mark.sanitizer_params(convert_to_address=False)
 
  88 def test_postcode_sweden_fail(sanitize, postcode):
 
  89     assert sanitize(country='se', postcode=postcode) == []
 
  92 @pytest.mark.parametrize("postcode", ('AD123', '123', 'AD 123', 'AD-123'))
 
  93 def test_postcode_andorra_pass(sanitize, postcode):
 
  94     assert sanitize(country='ad', postcode=postcode) == [('postcode', 'AD123')]
 
  97 @pytest.mark.parametrize("postcode", ('AD1234', 'AD AD123', 'XX123'))
 
  98 @pytest.mark.sanitizer_params(convert_to_address=False)
 
  99 def test_postcode_andorra_fail(sanitize, postcode):
 
 100     assert sanitize(country='ad', postcode=postcode) == []
 
 103 @pytest.mark.parametrize("postcode", ('AI-2640', '2640', 'AI 2640'))
 
 104 def test_postcode_anguilla_pass(sanitize, postcode):
 
 105     assert sanitize(country='ai', postcode=postcode) == [('postcode', 'AI-2640')]
 
 108 @pytest.mark.parametrize("postcode", ('AI-2000', 'AI US-2640', 'AI AI-2640'))
 
 109 @pytest.mark.sanitizer_params(convert_to_address=False)
 
 110 def test_postcode_anguilla_fail(sanitize, postcode):
 
 111     assert sanitize(country='ai', postcode=postcode) == []
 
 114 @pytest.mark.parametrize("postcode", ('BN1111', 'BN 1111', 'BN BN1111', 'BN BN 1111'))
 
 115 def test_postcode_brunei_pass(sanitize, postcode):
 
 116     assert sanitize(country='bn', postcode=postcode) == [('postcode', 'BN1111')]
 
 119 @pytest.mark.parametrize("postcode", ('BN-1111', 'BNN1111'))
 
 120 @pytest.mark.sanitizer_params(convert_to_address=False)
 
 121 def test_postcode_brunei_fail(sanitize, postcode):
 
 122     assert sanitize(country='bn', postcode=postcode) == []
 
 125 @pytest.mark.parametrize("postcode", ('IM1 1AA', 'IM11AA', 'IM IM11AA'))
 
 126 def test_postcode_isle_of_man_pass(sanitize, postcode):
 
 127     assert sanitize(country='im', postcode=postcode) == [('postcode', 'IM1 1AA')]
 
 130 @pytest.mark.parametrize("postcode", ('IZ1 1AA', 'IM1 AA'))
 
 131 @pytest.mark.sanitizer_params(convert_to_address=False)
 
 132 def test_postcode_isle_of_man_fail(sanitize, postcode):
 
 133     assert sanitize(country='im', postcode=postcode) == []
 
 136 @pytest.mark.parametrize("postcode", ('JE5 0LA', 'JE50LA', 'JE JE50LA', 'je JE5 0LA'))
 
 137 def test_postcode_jersey_pass(sanitize, postcode):
 
 138     assert sanitize(country='je', postcode=postcode) == [('postcode', 'JE5 0LA')]
 
 141 @pytest.mark.parametrize("postcode", ('gb JE5 0LA', 'IM50LA', 'IM5 012'))
 
 142 @pytest.mark.sanitizer_params(convert_to_address=False)
 
 143 def test_postcode_jersey_fail(sanitize, postcode):
 
 144     assert sanitize(country='je', postcode=postcode) == []
 
 147 @pytest.mark.parametrize("postcode", ('KY1-1234', '1-1234', 'KY 1-1234'))
 
 148 def test_postcode_cayman_islands_pass(sanitize, postcode):
 
 149     assert sanitize(country='ky', postcode=postcode) == [('postcode', 'KY1-1234')]
 
 152 @pytest.mark.parametrize("postcode", ('KY-1234', 'KZ1-1234', 'KY1 1234', 'KY1-123', 'KY KY1-1234'))
 
 153 @pytest.mark.sanitizer_params(convert_to_address=False)
 
 154 def test_postcode_cayman_islands_fail(sanitize, postcode):
 
 155     assert sanitize(country='ky', postcode=postcode) == []
 
 158 @pytest.mark.parametrize("postcode", ('LC11 222', '11 222', '11222', 'LC 11 222'))
 
 159 def test_postcode_saint_lucia_pass(sanitize, postcode):
 
 160     assert sanitize(country='lc', postcode=postcode) == [('postcode', 'LC11 222')]
 
 163 @pytest.mark.parametrize("postcode", ('11 2222', 'LC LC11 222'))
 
 164 @pytest.mark.sanitizer_params(convert_to_address=False)
 
 165 def test_postcode_saint_lucia_fail(sanitize, postcode):
 
 166     assert sanitize(country='lc', postcode=postcode) == []
 
 169 @pytest.mark.parametrize("postcode", ('LV-1111', '1111', 'LV 1111', 'LV1111',))
 
 170 def test_postcode_latvia_pass(sanitize, postcode):
 
 171     assert sanitize(country='lv', postcode=postcode) == [('postcode', 'LV-1111')]
 
 174 @pytest.mark.parametrize("postcode", ('111', '11111', 'LV LV-1111'))
 
 175 @pytest.mark.sanitizer_params(convert_to_address=False)
 
 176 def test_postcode_latvia_fail(sanitize, postcode):
 
 177     assert sanitize(country='lv', postcode=postcode) == []
 
 180 @pytest.mark.parametrize("postcode", ('MD-1111', '1111', 'MD 1111', 'MD1111'))
 
 181 def test_postcode_moldova_pass(sanitize, postcode):
 
 182     assert sanitize(country='md', postcode=postcode) == [('postcode', 'MD-1111')]
 
 185 @pytest.mark.parametrize("postcode", ("MD MD-1111", "MD MD1111", "MD MD 1111"))
 
 186 @pytest.mark.sanitizer_params(convert_to_address=False)
 
 187 def test_postcode_moldova_fail(sanitize, postcode):
 
 188     assert sanitize(country='md', postcode=postcode) == []
 
 191 @pytest.mark.parametrize("postcode", ('VLT 1117', 'GDJ 1234', 'BZN 2222'))
 
 192 def test_postcode_malta_pass(sanitize, postcode):
 
 193     assert sanitize(country='mt', postcode=postcode) == [('postcode', postcode)]
 
 196 @pytest.mark.parametrize("postcode", ('MTF 1111', 'MT MTF 1111', 'MTF1111', 'MT MTF1111'))
 
 197 def test_postcode_malta_mtarfa_pass(sanitize, postcode):
 
 198     assert sanitize(country='mt', postcode=postcode) == [('postcode', 'MTF 1111')]
 
 201 @pytest.mark.parametrize("postcode", ('1111', 'MTMT 1111'))
 
 202 @pytest.mark.sanitizer_params(convert_to_address=False)
 
 203 def test_postcode_malta_fail(sanitize, postcode):
 
 204     assert sanitize(country='mt', postcode=postcode) == []
 
 207 @pytest.mark.parametrize("postcode", ('VC1111', '1111', 'VC-1111', 'VC 1111'))
 
 208 def test_postcode_saint_vincent_pass(sanitize, postcode):
 
 209     assert sanitize(country='vc', postcode=postcode) == [('postcode', 'VC1111')]
 
 212 @pytest.mark.parametrize("postcode", ('VC11', 'VC VC1111'))
 
 213 @pytest.mark.sanitizer_params(convert_to_address=False)
 
 214 def test_postcode_saint_vincent_fail(sanitize, postcode):
 
 215     assert sanitize(country='vc', postcode=postcode) == []
 
 218 @pytest.mark.parametrize("postcode", ('VG1111', '1111', 'VG 1111', 'VG-1111'))
 
 219 def test_postcode_virgin_islands_pass(sanitize, postcode):
 
 220     assert sanitize(country='vg', postcode=postcode) == [('postcode', 'VG1111')]
 
 223 @pytest.mark.parametrize("postcode", ('111', '11111', 'VG VG1111'))
 
 224 @pytest.mark.sanitizer_params(convert_to_address=False)
 
 225 def test_postcode_virgin_islands_fail(sanitize, postcode):
 
 226     assert sanitize(country='vg', postcode=postcode) == []
 
 229 @pytest.mark.parametrize("postcode", ('AB1', '123-456-7890', '1 as 44'))
 
 230 @pytest.mark.sanitizer_params(default_pattern='[A-Z0-9- ]{3,12}')
 
 231 def test_postcode_default_pattern_pass(sanitize, postcode):
 
 232     assert sanitize(country='an', postcode=postcode) == [('postcode', postcode.upper())]
 
 235 @pytest.mark.parametrize("postcode", ('C', '12', 'ABC123DEF 456', '1234,5678', '11223;11224'))
 
 236 @pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}')
 
 237 def test_postcode_default_pattern_fail(sanitize, postcode):
 
 238     assert sanitize(country='an', postcode=postcode) == []