]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tokenizer/icu_variants.py
leave ICU variant properties empty for now
[nominatim.git] / nominatim / tokenizer / icu_variants.py
1 """
2 Data structures for saving variant expansions for ICU tokenizer.
3 """
4 from collections import namedtuple
5 import json
6
7 _ICU_VARIANT_PORPERTY_FIELDS = ['lang']
8
9
10 class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS,
11                                       defaults=(None, )*len(_ICU_VARIANT_PORPERTY_FIELDS))):
12     """ Data container for saving properties that describe when a variant
13         should be applied.
14
15         Porperty instances are hashable.
16     """
17     @classmethod
18     def from_rules(cls, _):
19         """ Create a new property type from a generic dictionary.
20
21             The function only takes into account the properties that are
22             understood presently and ignores all others.
23         """
24         return cls(lang=None)
25
26
27 ICUVariant = namedtuple('ICUVariant', ['source', 'replacement', 'properties'])
28
29
30 def pickle_variant_set(variants):
31     """ Serializes an iterable of variant rules to a string.
32     """
33     # Create a list of property sets. So they don't need to be duplicated
34     properties = {}
35     pid = 1
36     for variant in variants:
37         if variant.properties not in properties:
38             properties[variant.properties] = pid
39             pid += 1
40
41     # Convert the variants into a simple list.
42     variants = [(v.source, v.replacement, properties[v.properties]) for v in variants]
43
44     # Convert everythin to json.
45     return json.dumps({'properties': {v: k._asdict() for k, v in properties.items()},
46                        'variants': variants})
47
48
49 def unpickle_variant_set(variant_string):
50     """ Deserializes a variant string that was previously created with
51         pickle_variant_set() into a set of ICUVariants.
52     """
53     data = json.loads(variant_string)
54
55     properties = {int(k): ICUVariantProperties(**v) for k, v in data['properties'].items()}
56     print(properties)
57
58     return set((ICUVariant(src, repl, properties[pid]) for src, repl, pid in data['variants']))