1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2025 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Functions for formatting postcodes according to their country-specific
11 from typing import Any, Mapping, Optional, Set, Match
14 from ..errors import UsageError
15 from . import country_info
18 class CountryPostcodeMatcher:
19 """ Matches and formats a postcode according to a format definition
22 def __init__(self, country_code: str, config: Mapping[str, Any]) -> None:
23 if 'pattern' not in config:
24 raise UsageError("Field 'pattern' required for 'postcode' "
25 f"for country '{country_code}'")
27 pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
29 self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?({pc_pattern})\\s*')
30 self.pattern = re.compile(pc_pattern)
32 # We want to exclude 0000, 00-000, 000 00 etc
33 self.zero_pattern = re.compile(r'^[0\- ]+$')
35 self.output = config.get('output', r'\g<0>')
37 def match(self, postcode: str) -> Optional[Match[str]]:
38 """ Match the given postcode against the postcode pattern for this
39 matcher. Returns a `re.Match` object if the match was successful
42 # Upper-case, strip spaces and leading country code.
43 normalized = self.norm_pattern.fullmatch(postcode.upper())
46 match = self.pattern.fullmatch(normalized.group(1))
47 if match and self.zero_pattern.match(match.string):
53 def normalize(self, match: Match[str]) -> str:
54 """ Return the default format of the postcode for the given match.
55 `match` must be a `re.Match` object previously returned by
58 return match.expand(self.output)
61 class PostcodeFormatter:
62 """ Container for different postcode formats of the world and
65 def __init__(self) -> None:
66 # Objects without a country code can't have a postcode per definition.
67 self.country_without_postcode: Set[Optional[str]] = {None}
68 self.country_matcher = {}
69 self.default_matcher = CountryPostcodeMatcher('', {'pattern': '.*'})
70 self.postcode_extent: dict[Optional[str], int] = {}
72 for ccode, prop in country_info.iterate('postcode'):
74 self.country_without_postcode.add(ccode)
75 elif isinstance(prop, dict):
76 self.country_matcher[ccode] = CountryPostcodeMatcher(ccode, prop)
78 self.postcode_extent[ccode] = int(prop['extent'])
80 raise UsageError(f"Invalid entry 'postcode' for country '{ccode}'")
82 def set_default_pattern(self, pattern: str) -> None:
83 """ Set the postcode match pattern to use, when a country does not
84 have a specific pattern.
86 self.default_matcher = CountryPostcodeMatcher('', {'pattern': pattern})
88 def get_matcher(self, country_code: Optional[str]) -> Optional[CountryPostcodeMatcher]:
89 """ Return the CountryPostcodeMatcher for the given country.
90 Returns None if the country doesn't have a postcode and the
91 default matcher if there is no specific matcher configured for
94 if country_code in self.country_without_postcode:
97 assert country_code is not None
99 return self.country_matcher.get(country_code, self.default_matcher)
101 def match(self, country_code: Optional[str], postcode: str) -> Optional[Match[str]]:
102 """ Match the given postcode against the postcode pattern for this
103 matcher. Returns a `re.Match` object if the country has a pattern
104 and the match was successful or None if the match failed.
106 if country_code in self.country_without_postcode:
109 assert country_code is not None
111 return self.country_matcher.get(country_code, self.default_matcher).match(postcode)
113 def normalize(self, country_code: str, match: Match[str]) -> str:
114 """ Return the default format of the postcode for the given match.
115 `match` must be a `re.Match` object previously returned by
118 return self.country_matcher.get(country_code, self.default_matcher).normalize(match)
120 def get_postcode_extent(self, country_code: Optional[str]) -> int:
121 """ Return the extent (in m) to use for the given country. If no
122 specific extent is set, then the default of 5km will be returned.
124 return self.postcode_extent.get(country_code, 5000)