1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2026 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Tests for the sanitizer that creates derived names using regular expressions.
12 from nominatim_db.errors import UsageError
13 from nominatim_db.data.place_info import PlaceInfo
14 from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
18 def mk_sanitizer(def_config):
20 args = {k.replace('_', '-'): v for k, v in kwargs.items()}
21 return PlaceSanitizer([args | {'step': 'derive-names'}], def_config)
26 def test_no_parameters(mk_sanitizer):
27 with pytest.raises(UsageError, match='name-pattern is missing'):
31 @pytest.mark.parametrize('prim,out', [('name', 0), ('address', 1)])
32 @pytest.mark.parametrize('keep', [True, False])
33 def test_name_deletion(mk_sanitizer, prim, out, keep):
34 san = mk_sanitizer(name_pattern='A.*B', type=prim, keep_original=keep)
36 names = dict(name='AltBoom', alt_name='foo', loc_name='AltB')
37 place = PlaceInfo({'name': names, 'address': names,
38 'country_code': 'de', 'rank_address': 30})
40 res = san.process_names(place)
42 assert len(res[(out + 1) % 2]) == 3
44 assert len(res[out]) == 3
46 assert {(p.kind, p.name) for p in res[out]} == {('name', 'AltBoom'),
51 def simple_replace(mk_sanitizer):
52 def _impl(name, pattern, *variants, keep=True):
53 san = mk_sanitizer(name_pattern=pattern, type='name', keep_original=keep,
55 place = PlaceInfo({'name': {'name': name}, 'country_code': 'de', 'rank_address': 30})
56 out, _ = san.process_names(place)
58 return {p.name for p in out}
62 def test_variant_single_name(simple_replace):
63 assert simple_replace('A', 'A', 'The A') == {'A', 'The A'}
66 def test_replace_single_name(simple_replace):
67 assert simple_replace('A', 'A', 'The A', keep=False) == {'The A'}
70 def test_variant_with_multiple_names(simple_replace):
71 assert simple_replace('A', 'A', 'A1', 'A2', 'A1') == {'A', 'A1', 'A2'}
74 def test_variant_with_group_replacement(simple_replace):
75 assert simple_replace('abc X', '(.*) X', r'\1 Y') == {'abc X', 'abc Y'}