Helper class to create ICU rules from a configuration file.
"""
import io
-import yaml
import logging
from collections import defaultdict
import itertools
+import yaml
from icu import Transliterator
from nominatim.errors import UsageError
def __init__(self, configfile):
self.configfile = configfile
+ self.compound_suffixes = set()
+ self.abbreviations = defaultdict()
if configfile.suffix == '.yaml':
self._load_from_yaml()
def get_search_rules(self):
- """ Returns the ICU rules to be used during search.
+ """ Return the ICU rules to be used during search.
The rules combine normalization, compound decomposition (including
abbreviated compounds) and transliteration.
"""
suffixes.add(suffix)
suffixes.update(self.abbreviations.get(suffix, []))
- for suffix in sorted(suffixes, key=lambda x:len(x), reverse=True):
+ for suffix in sorted(suffixes, key=len, reverse=True):
rules.write("'{0} ' > ' {0} ';".format(suffix))
# Finally add transliteration.
return self.transliteration_rules
def get_replacement_pairs(self):
- """ Returns the list of possible compound decompositions with
+ """ Return the list of possible compound decompositions with
application of abbreviations included.
The result is a list of pairs: the first item is the sequence to
replace, the second is a list of replacements.
synonyms[abbr + ' '].add(' ' + abbr + ' ')
# sort the resulting list by descending length (longer matches are prefered).
- sorted_keys = sorted(synonyms.keys(), key=lambda x: len(x), reverse=True)
+ sorted_keys = sorted(synonyms.keys(), key=len, reverse=True)
return [(k, list(synonyms[k])) for k in sorted_keys]