1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Sanitizer that preprocesses tags from the TIGER import.
10 It makes the following changes:
12 * remove state reference from tiger:county
14 from typing import Callable
17 from nominatim.tokenizer.sanitizers.base import ProcessInfo
18 from nominatim.tokenizer.sanitizers.config import SanitizerConfig
20 COUNTY_MATCH = re.compile('(.*), [A-Z][A-Z]')
22 def _clean_tiger_county(obj: ProcessInfo) -> None:
23 """ Remove the state reference from tiger:county tags.
25 This transforms a name like 'Hamilton, AL' into 'Hamilton'.
26 If no state reference is detected at the end, the name is left as is.
31 for item in obj.address:
32 if item.kind == 'tiger' and item.suffix == 'county':
33 m = COUNTY_MATCH.fullmatch(item.name)
36 # Switch kind and suffix, the split left them reversed.
43 def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]:
44 """ Create a function that preprocesses tags from the TIGER import.
46 return _clean_tiger_county