1 # SPDX-License-Identifier: GPL-2.0-only
 
   3 # This file is part of Nominatim. (https://nominatim.org)
 
   5 # Copyright (C) 2022 by the Nominatim developer community.
 
   6 # For a full list of authors see the git log.
 
   8 Tests for the sanitizer that splits multivalue lists.
 
  12 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
 
  13 from nominatim.indexer.place_info import PlaceInfo
 
  15 from nominatim.errors import UsageError
 
  17 def run_sanitizer_on(**kwargs):
 
  18     place = PlaceInfo({'name': kwargs})
 
  19     name, _ = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
 
  21     return sorted([(p.name, p.kind, p.suffix) for p in name])
 
  24 def sanitize_with_delimiter(delimiter, name):
 
  25     place = PlaceInfo({'name': {'name': name}})
 
  26     san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}])
 
  27     name, _ = san.process_names(place)
 
  29     return sorted([p.name for p in name])
 
  33     assert run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
 
  34     assert run_sanitizer_on(name='') == [('', 'name', None)]
 
  38     assert run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
 
  41     assert run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
 
  42                                                             ('boat', 'short_name', None)]
 
  45 def test_empty_fields():
 
  46     assert run_sanitizer_on(name='A;;B') == [('A', 'name', None),
 
  48     assert run_sanitizer_on(name='A; ,B') == [('A', 'name', None),
 
  50     assert run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
 
  51     assert run_sanitizer_on(name='B,') == [('B', 'name', None)]
 
  54 def test_custom_delimiters():
 
  55     assert sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
 
  56     assert sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
 
  57     assert sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
 
  58     assert sanitize_with_delimiter(' ', 'morning  sun') == ['morning', 'sun']
 
  61 def test_empty_delimiter_set():
 
  62     with pytest.raises(UsageError):
 
  63         sanitize_with_delimiter('', 'abc')
 
  66 def test_no_name_list():
 
  67     place = PlaceInfo({'address': {'housenumber': '3'}})
 
  68     name, address = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
 
  71     assert len(address) == 1