1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Sanitizer that cleans and normalizes housenumbers.
11 delimiters: Define the set of characters to be used for
12 splitting a list of housenumbers into parts. (default: ',;')
15 from nominatim.tokenizer.sanitizers.helpers import create_split_regex
17 class _HousenumberSanitizer:
19 def __init__(self, config):
20 self.kinds = config.get('filter-kind', ('housenumber', ))
21 self.split_regexp = create_split_regex(config)
24 def __call__(self, obj):
29 for item in obj.address:
30 if item.kind in self.kinds:
31 new_address.extend(item.clone(kind='housenumber', name=n) for n in self.sanitize(item.name))
33 # Don't touch other address items.
34 new_address.append(item)
36 obj.address = new_address
39 def sanitize(self, value):
40 """ Extract housenumbers in a regularized format from an OSM value.
42 The function works as a generator that yields all valid housenumbers
43 that can be created from the value.
45 for hnr in self.split_regexp.split(value):
47 yield from self._regularize(hnr)
50 def _regularize(self, hnr):
55 """ Create a housenumber processing function.
58 return _HousenumberSanitizer(config)