1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2026 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Tests for the sanitizer that deletes names.
12 from nominatim_db.data.place_info import PlaceInfo
13 from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
16 class TestWithDefault:
18 @pytest.fixture(autouse=True)
19 def setup_country(self, def_config):
20 self.config = def_config
22 def run_sanitizer_on(self, type, **kwargs):
24 place = PlaceInfo({type: {k.replace('_', ':'): v for k, v in kwargs.items()},
25 'country_code': 'de', 'rank_address': 30})
27 sanitizer_args = {'step': 'delete-names'}
29 name, address = PlaceSanitizer([sanitizer_args],
30 self.config).process_names(place)
32 return {'name': sorted([(p.name, p.kind, p.suffix or '') for p in name]),
33 'address': sorted([(p.name, p.kind, p.suffix or '') for p in address])}
35 def test_on_name(self):
36 res = self.run_sanitizer_on('name', name='foo', ref='bar', ref_abc='baz')
38 assert res.get('name') == []
40 def test_on_address(self):
41 res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')
43 assert res.get('address') == [('bar', 'ref', ''), ('baz', 'ref', 'abc'),
49 @pytest.fixture(autouse=True)
50 def setup_country(self, def_config):
51 self.config = def_config
53 def run_sanitizer_on(self, type, **kwargs):
55 place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
56 'country_code': 'de', 'rank_address': 30})
58 sanitizer_args = {'step': 'delete-names',
61 name, _ = PlaceSanitizer([sanitizer_args],
62 self.config).process_names(place)
64 return sorted([(p.name, p.kind, p.suffix or '') for p in name])
66 def test_name_type(self):
67 res = self.run_sanitizer_on('name', name='foo', ref='bar', ref_abc='baz')
71 def test_address_type(self):
72 res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')
74 assert res == [('bar', 'ref', ''), ('baz', 'ref', 'abc'),
80 @pytest.fixture(autouse=True)
81 def setup_country(self, def_config):
82 self.config = def_config
84 def run_sanitizer_on(self, filt, **kwargs):
86 place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
87 'country_code': 'de', 'rank_address': 30})
89 sanitizer_args = {'step': 'delete-names',
92 name, _ = PlaceSanitizer([sanitizer_args],
93 self.config).process_names(place)
95 return sorted([(p.name, p.kind, p.suffix or '') for p in name])
97 def test_single_exact_name(self):
98 res = self.run_sanitizer_on(['name'], ref='foo', name='foo',
99 name_abc='bar', ref_abc='bar')
101 assert res == [('bar', 'ref', 'abc'), ('foo', 'ref', '')]
103 def test_single_pattern(self):
104 res = self.run_sanitizer_on(['.*name'],
105 name_fr='foo', ref_fr='foo', namexx_fr='bar',
106 shortname_fr='bar', name='bar')
108 assert res == [('bar', 'namexx', 'fr'), ('foo', 'ref', 'fr')]
110 def test_multiple_patterns(self):
111 res = self.run_sanitizer_on(['.*name', 'ref'],
112 name_fr='foo', ref_fr='foo', oldref_fr='foo',
113 namexx_fr='bar', shortname_fr='baz', name='baz')
115 assert res == [('bar', 'namexx', 'fr'), ('foo', 'oldref', 'fr')]
118 class TestRankAddress:
120 @pytest.fixture(autouse=True)
121 def setup_country(self, def_config):
122 self.config = def_config
124 def run_sanitizer_on(self, rank_addr, **kwargs):
126 place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
127 'country_code': 'de', 'rank_address': 30})
129 sanitizer_args = {'step': 'delete-names',
130 'filter-rank': rank_addr}
132 name, _ = PlaceSanitizer([sanitizer_args],
133 self.config).process_names(place)
135 return sorted([(p.name, p.kind, p.suffix or '') for p in name])
137 def test_single_rank(self):
138 res = self.run_sanitizer_on('30', name='foo', ref='bar')
142 def test_single_rank_fail(self):
143 res = self.run_sanitizer_on('28', name='foo', ref='bar')
145 assert res == [('bar', 'ref', ''), ('foo', 'name', '')]
147 def test_ranged_rank_pass(self):
148 res = self.run_sanitizer_on('26-30', name='foo', ref='bar')
152 def test_ranged_rank_fail(self):
153 res = self.run_sanitizer_on('26-29', name='foo', ref='bar')
155 assert res == [('bar', 'ref', ''), ('foo', 'name', '')]
157 def test_mixed_rank_pass(self):
158 res = self.run_sanitizer_on(['4', '20-28', '30', '10-12'], name='foo', ref='bar')
162 def test_mixed_rank_fail(self):
163 res = self.run_sanitizer_on(['4-8', '10', '26-29', '18'], name='foo', ref='bar')
165 assert res == [('bar', 'ref', ''), ('foo', 'name', '')]
170 @pytest.fixture(autouse=True)
171 def setup_country(self, def_config):
172 self.config = def_config
174 def run_sanitizer_on(self, suffix, **kwargs):
176 place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
177 'country_code': 'de', 'rank_address': 30})
179 sanitizer_args = {'step': 'delete-names',
180 'filter-suffix': suffix}
182 name, _ = PlaceSanitizer([sanitizer_args],
183 self.config).process_names(place)
185 return sorted([(p.name, p.kind, p.suffix or '') for p in name])
187 def test_single_suffix(self):
188 res = self.run_sanitizer_on('abc', name='foo', name_abc='foo',
189 name_pqr='bar', ref='bar', ref_abc='baz')
191 assert res == [('bar', 'name', 'pqr'), ('bar', 'ref', ''), ('foo', 'name', '')]
193 def test_multiple_suffix(self):
194 res = self.run_sanitizer_on(['abc.*', 'pqr'], name='foo', name_abcxx='foo',
195 ref_pqr='bar', name_pqrxx='baz')
197 assert res == [('baz', 'name', 'pqrxx'), ('foo', 'name', '')]
200 class TestCountryCodes:
202 @pytest.fixture(autouse=True)
203 def setup_country(self, def_config):
204 self.config = def_config
206 def run_sanitizer_on(self, country_code, **kwargs):
208 place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
209 'country_code': 'de', 'rank_address': 30})
211 sanitizer_args = {'step': 'delete-names',
212 'filter-country': country_code}
214 name, _ = PlaceSanitizer([sanitizer_args],
215 self.config).process_names(place)
217 return sorted([(p.name, p.kind) for p in name])
219 def test_single_country_code_pass(self):
220 res = self.run_sanitizer_on('de', name='foo', ref='bar')
224 def test_single_country_code_fail(self):
225 res = self.run_sanitizer_on('in', name='foo', ref='bar')
227 assert res == [('bar', 'ref'), ('foo', 'name')]
229 def test_empty_country_code_list(self):
230 res = self.run_sanitizer_on([], name='foo', ref='bar')
232 assert res == [('bar', 'ref'), ('foo', 'name')]
234 def test_multiple_country_code_pass(self):
235 res = self.run_sanitizer_on(['in', 'de', 'fr'], name='foo', ref='bar')
239 def test_multiple_country_code_fail(self):
240 res = self.run_sanitizer_on(['in', 'au', 'fr'], name='foo', ref='bar')
242 assert res == [('bar', 'ref'), ('foo', 'name')]
245 class TestAllParameters:
247 @pytest.fixture(autouse=True)
248 def setup_country(self, def_config):
249 self.config = def_config
251 def run_sanitizer_on(self, country_code, rank_addr, suffix, **kwargs):
253 place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
254 'country_code': 'de', 'rank_address': 30})
257 'step': 'delete-names',
259 'filter-kind': ['name', 'ref'],
260 'filter-country': country_code,
261 'filter-rank': rank_addr,
262 'filter-suffix': suffix,
263 'filter-name': r'[\s\S]*',
266 name, _ = PlaceSanitizer([sanitizer_args],
267 self.config).process_names(place)
269 return sorted([(p.name, p.kind, p.suffix or '') for p in name])
271 def test_string_arguments_pass(self):
272 res = self.run_sanitizer_on('de', '25-30', r'[\s\S]*',
273 name='foo', ref='foo', name_abc='bar', ref_abc='baz')
277 def test_string_arguments_fail(self):
278 res = self.run_sanitizer_on('in', '25-30', r'[\s\S]*',
279 name='foo', ref='foo', name_abc='bar', ref_abc='baz')
281 assert res == [('bar', 'name', 'abc'), ('baz', 'ref', 'abc'),
282 ('foo', 'name', ''), ('foo', 'ref', '')]
284 def test_list_arguments_pass(self):
285 res = self.run_sanitizer_on(['de', 'in'], ['20-28', '30'], [r'abc.*', r'[\s\S]*'],
286 name='foo', ref='foo', name_abcxx='bar', ref_pqr='baz')
290 def test_list_arguments_fail(self):
291 res = self.run_sanitizer_on(['de', 'in'], ['14', '20-29'], [r'abc.*', r'pqr'],
292 name='foo', ref_abc='foo', name_abcxx='bar', ref_pqr='baz')
294 assert res == [('bar', 'name', 'abcxx'), ('baz', 'ref', 'pqr'),
295 ('foo', 'name', ''), ('foo', 'ref', 'abc')]
297 def test_mix_arguments_pass(self):
298 res = self.run_sanitizer_on('de', ['10', '20-28', '30'], r'[\s\S]*',
299 name_abc='foo', ref_abc='foo', name_abcxx='bar', ref_pqr='baz')
303 def test_mix_arguments_fail(self):
304 res = self.run_sanitizer_on(['de', 'in'], ['10', '20-28', '30'], r'abc.*',
305 name='foo', ref='foo', name_pqr='bar', ref_pqr='baz')
307 assert res == [('bar', 'name', 'pqr'), ('baz', 'ref', 'pqr'),
308 ('foo', 'name', ''), ('foo', 'ref', '')]