]> git.openstreetmap.org Git - nominatim.git/blob - test/python/tokenizer/sanitizers/test_derive_names.py
prepare release 5.3.2.post5
[nominatim.git] / test / python / tokenizer / sanitizers / test_derive_names.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2026 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Tests for the sanitizer that creates derived names using regular expressions.
9 """
10 import pytest
11
12 from nominatim_db.errors import UsageError
13 from nominatim_db.data.place_info import PlaceInfo
14 from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
15
16
17 @pytest.fixture
18 def mk_sanitizer(def_config):
19     def _f(**kwargs):
20         args = {k.replace('_', '-'): v for k, v in kwargs.items()}
21         return PlaceSanitizer([args | {'step': 'derive-names'}], def_config)
22
23     return _f
24
25
26 def test_no_parameters(mk_sanitizer):
27     with pytest.raises(UsageError, match='name-pattern is missing'):
28         mk_sanitizer()
29
30
31 @pytest.mark.parametrize('prim,out', [('name', 0), ('address', 1)])
32 @pytest.mark.parametrize('keep', [True, False])
33 def test_name_deletion(mk_sanitizer, prim, out, keep):
34     san = mk_sanitizer(name_pattern='A.*B', type=prim, keep_original=keep)
35
36     names = dict(name='AltBoom', alt_name='foo', loc_name='AltB')
37     place = PlaceInfo({'name': names, 'address': names,
38                        'country_code': 'de', 'rank_address': 30})
39
40     res = san.process_names(place)
41
42     assert len(res[(out + 1) % 2]) == 3
43     if keep:
44         assert len(res[out]) == 3
45     else:
46         assert {(p.kind, p.name) for p in res[out]} == {('name', 'AltBoom'),
47                                                         ('alt_name', 'foo')}
48
49
50 @pytest.fixture
51 def simple_replace(mk_sanitizer):
52     def _impl(name, pattern, *variants, keep=True):
53         san = mk_sanitizer(name_pattern=pattern, type='name', keep_original=keep,
54                            variants=variants)
55         place = PlaceInfo({'name': {'name': name}, 'country_code': 'de', 'rank_address': 30})
56         out, _ = san.process_names(place)
57
58         return {p.name for p in out}
59     return _impl
60
61
62 def test_variant_single_name(simple_replace):
63     assert simple_replace('A', 'A', 'The A') == {'A', 'The A'}
64
65
66 def test_replace_single_name(simple_replace):
67     assert simple_replace('A', 'A', 'The A', keep=False) == {'The A'}
68
69
70 def test_variant_with_multiple_names(simple_replace):
71     assert simple_replace('A', 'A', 'A1', 'A2', 'A1') == {'A', 'A1', 'A2'}
72
73
74 def test_variant_with_group_replacement(simple_replace):
75     assert simple_replace('abc X', '(.*) X', r'\1 Y') == {'abc X', 'abc Y'}