class ICUNameProcessor:
+ """ Collects the different transformation rules for normalisation of names
+ and provides the functions to aply the transformations.
+ """
def __init__(self, rules):
self.normalizer = Transliterator.createFromRules("icu_normalization",
""" Normalize the given name, i.e. remove all elements not relevant
for search.
"""
- return self.normalizer.transliterate(name)
+ return self.normalizer.transliterate(name).strip()
def get_variants_ascii(self, norm_name):
""" Compute the spelling variants for the given normalized name
else:
pos += 1
+ results = []
+
if startpos == 0:
- return [self.to_ascii.transliterate(norm_name)]
+ trans_name = self.to_ascii.transliterate(norm_name).strip()
+ if trans_name:
+ results.append(trans_name)
+ else:
+ for variant in variants:
+ trans_name = self.to_ascii.transliterate(variant + baseform[startpos:pos]).strip()
+ if trans_name:
+ results.append(trans_name)
- return [self.to_ascii.transliterate(v + baseform[startpos:pos]).strip() for v in variants]
+ return results
def get_search_normalized(self, name):
""" Return the normalized version of the name (including transliteration)
to be applied at search time.
"""
- return self.search.transliterate(name)
+ return self.search.transliterate(' ' + name + ' ').strip()