]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tokenizer/sanitizers/clean_postcodes.py
43d297695f1d2e7fc0be8087284aa9987ad5ee65
[nominatim.git] / nominatim / tokenizer / sanitizers / clean_postcodes.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Sanitizer that filters postcodes by their officially allowed pattern.
9
10 Arguments:
11     convert-to-address: If set to 'yes' (the default), then postcodes that do
12                         not conform with their country-specific pattern are
13                         converted to an address component. That means that
14                         the postcode does not take part when computing the
15                         postcode centroids of a country but is still searchable.
16                         When set to 'no', non-conforming postcodes are not
17                         searchable either.
18 """
19 from nominatim.data.postcode_format import PostcodeFormatter
20
21 class _PostcodeSanitizer:
22
23     def __init__(self, config):
24         self.convert_to_address = config.get_bool('convert-to-address', True)
25         self.matcher = PostcodeFormatter()
26
27         default_pattern = config.get('default-pattern')
28         if default_pattern is not None and isinstance(default_pattern, str):
29             self.matcher.set_default_pattern(default_pattern)
30
31
32     def __call__(self, obj):
33         if not obj.address:
34             return
35
36         postcodes = ((i, o) for i, o in enumerate(obj.address) if o.kind == 'postcode')
37
38         for pos, postcode in postcodes:
39             formatted = self.scan(postcode.name, obj.place.country_code)
40
41             if formatted is None:
42                 if self.convert_to_address:
43                     postcode.kind = 'unofficial_postcode'
44                 else:
45                     obj.address.pop(pos)
46             else:
47                 postcode.name = formatted[0]
48                 postcode.set_attr('variant', formatted[1])
49
50
51     def scan(self, postcode, country):
52         """ Check the postcode for correct formatting and return the
53             normalized version. Returns None if the postcode does not
54             correspond to the oficial format of the given country.
55         """
56         match = self.matcher.match(country, postcode)
57         if match is None:
58             return None
59
60         return self.matcher.normalize(country, match),\
61                ' '.join(filter(lambda p: p is not None, match.groups()))
62
63
64
65
66 def create(config):
67     """ Create a housenumber processing function.
68     """
69
70     return _PostcodeSanitizer(config)