2 Name processor for tagging the langauge of the name
6 from nominatim.tools import country_info
8 class _AnalyzerByLanguage:
9 """ Processor for tagging the language of names in a place.
12 def __init__(self, config):
13 if 'filter-kind' in config:
14 self.regexes = [re.compile(regex) for regex in config['filter-kind']]
18 self.use_defaults = config.get('use-defaults', 'no')
19 if self.use_defaults not in ('mono', 'all'):
20 self.use_defaults = False
22 self.replace = config.get('mode', 'replace') != 'append'
23 self.whitelist = config.get('whitelist')
25 # Compute the languages to use when no suffix is given.
27 for ccode, prop in country_info.iterate():
28 clangs = prop['languages']
29 if len(clangs) == 1 or self.use_defaults == 'all':
31 self.deflangs[ccode] = [l for l in clangs if l in self.whitelist]
33 self.deflangs[ccode] = clangs
37 def _kind_matches(self, kind):
38 if self.regexes is None:
41 return any(regex.search(kind) for regex in self.regexes)
44 def _suffix_matches(self, suffix):
45 if self.whitelist is None:
46 return len(suffix) in (2, 3) and suffix.islower()
48 return suffix in self.whitelist
51 def __call__(self, obj):
57 for name in (n for n in obj.names
58 if not n.has_attr('analyzer') and self._kind_matches(n.kind)):
60 langs = [name.suffix] if self._suffix_matches(name.suffix) else None
63 langs = self.deflangs.get(obj.place.country_code)
64 if self.use_defaults == 'mono' and len(langs) > 1:
69 name.set_attr('analyzer', langs[0])
71 more_names.append(name.clone(attr={'analyzer': langs[0]}))
73 more_names.extend(name.clone(attr={'analyzer': l}) for l in langs[1:])
75 obj.names.extend(more_names)
79 """ Create a function that sets the analyzer property depending on the
80 language of the tag. The language is taken from the suffix.
82 To restrict the set of languages that should be tagged, use
83 'whitelist'. A list of acceptable suffixes. When unset, all 2- and
84 3-letter codes are accepted.
86 'use-defaults' configures what happens when the name has no suffix
87 with a language tag. When set to 'all', a variant is created for
88 each on the spoken languages in the country the feature is in. When
89 set to 'mono', a variant is created, when only one language is spoken
90 in the country. The default is, to do nothing with the default languages
93 'mode' hay be 'replace' (the default) or 'append' and configures if
94 the original name (without any analyzer tagged) is retained.
96 With 'filter-kind' the set of names the sanitizer should be applied
97 to can be retricted to the given patterns of 'kind'. It expects a
98 list of regular expression to be matched against 'kind'.
100 return _AnalyzerByLanguage(config)