self.search = Transliterator.createFromRules("icu_search",
norm_rules + trans_rules)
- self.analysis = {name: arules.create(self.to_ascii, arules.config)
+ self.analysis = {name: arules.create(self.normalizer, self.to_ascii, arules.config)
for name, arules in analysis_rules.items()}
for name in names:
analyzer_id = name.get_attr('analyzer')
- norm_name = self._normalized(name.name)
+ analyzer = self.token_analysis.analysis[analyzer_id]
+ norm_name = analyzer.normalize(name.name)
if analyzer_id is None:
token_id = norm_name
else:
full, part = self._cache.names.get(token_id, (None, None))
if full is None:
- variants = self.token_analysis.analysis[analyzer_id].get_variants_ascii(norm_name)
+ variants = analyzer.get_variants_ascii(norm_name)
if not variants:
continue
### Analysis section
-def create(transliterator, config):
+def create(normalizer, transliterator, config):
""" Create a new token analysis instance for this module.
"""
- return GenericTokenAnalysis(transliterator, config)
+ return GenericTokenAnalysis(normalizer, transliterator, config)
class GenericTokenAnalysis:
and provides the functions to apply the transformations.
"""
- def __init__(self, to_ascii, config):
+ def __init__(self, norm, to_ascii, config):
+ self.norm = norm
self.to_ascii = to_ascii
self.variant_only = config['variant_only']
self.mutations = [MutationVariantGenerator(*cfg) for cfg in config['mutations']]
+ def normalize(self, name):
+ """ Return the normalized form of the name. This is the standard form
+ from which possible variants for the name can be derived.
+ """
+ return self.norm.transliterate(name).strip()
+
+
def get_variants_ascii(self, norm_name):
""" Compute the spelling variants for the given normalized name
and transliterate the result.
rules['mode'] = 'variant-only'
config = module.configure(rules, DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
+ norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
- return module.create(trans, config)
+ return module.create(norm, trans, config)
def get_normalized_variants(proc, name):
rules = { 'analyzer': 'generic' }
config = module.configure(rules, DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
+ norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
- proc = module.create(trans, config)
+ proc = module.create(norm, trans, config)
assert get_normalized_variants(proc, '大德!') == ['dà dé']
}
config = module.configure(rules, DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
+ norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
- self.analysis = module.create(trans, config)
+ self.analysis = module.create(norm, trans, config)
def variants(self, name):