1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Helper functions for sanitizers.
12 from nominatim.errors import UsageError
14 def create_split_regex(config, default=',;'):
15 """ Converts the 'delimiter' parameter in the configuration into a
16 compiled regular expression that can be used to split the names on the
17 delimiters. The regular expression makes sure that the resulting names
18 are stripped and that repeated delimiters
19 are ignored but it will still create empty fields on occasion. The
20 code needs to filter those.
22 The 'default' parameter defines the delimiter set to be used when
23 not explicitly configured.
25 delimiter_set = set(config.get('delimiters', default))
27 raise UsageError("Empty 'delimiter' parameter not allowed for sanitizer.")
29 return re.compile('\\s*[{}]+\\s*'.format(''.join('\\' + d for d in delimiter_set)))