]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tokenizer/sanitizers/clean_housenumbers.py
b65880c38e5179e167b8a2711e31b079e900d8ab
[nominatim.git] / nominatim / tokenizer / sanitizers / clean_housenumbers.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Sanitizer that cleans and normalizes housenumbers.
9
10 Arguments:
11     delimiters: Define the set of characters to be used for
12                 splitting a list of housenumbers into parts. (default: ',;')
13
14 """
15 from nominatim.tokenizer.sanitizers.helpers import create_split_regex
16
17 class _HousenumberSanitizer:
18
19     def __init__(self, config):
20         self.kinds = config.get('filter-kind', ('housenumber', ))
21         self.split_regexp = create_split_regex(config)
22
23
24     def __call__(self, obj):
25         if not obj.address:
26             return
27
28         new_address = []
29         for item in obj.address:
30             if item.kind in self.kinds:
31                 new_address.extend(item.clone(kind='housenumber', name=n) for n in self.sanitize(item.name))
32             else:
33                 # Don't touch other address items.
34                 new_address.append(item)
35
36         obj.address = new_address
37
38
39     def sanitize(self, value):
40         """ Extract housenumbers in a regularized format from an OSM value.
41
42             The function works as a generator that yields all valid housenumbers
43             that can be created from the value.
44         """
45         for hnr in self.split_regexp.split(value):
46             if hnr:
47                 yield from self._regularize(hnr)
48
49
50     def _regularize(self, hnr):
51         yield hnr
52
53
54 def create(config):
55     """ Create a housenumber processing function.
56     """
57
58     return _HousenumberSanitizer(config)