]> git.openstreetmap.org Git - nominatim.git/commitdiff
refactor variant production to use generators
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 11 Jan 2022 16:51:05 +0000 (17:51 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Tue, 18 Jan 2022 10:09:21 +0000 (11:09 +0100)
nominatim/tokenizer/token_analysis/generic.py

index f790dad27a24d9f11300ab1f6d78c147b829797b..05ba885b29c8714d9a7f931f6af91a7e16786527 100644 (file)
@@ -176,14 +176,26 @@ class GenericTokenAnalysis:
         """ Compute the spelling variants for the given normalized name
             and transliterate the result.
         """
+        results = set()
+        for variant in self._generate_word_variants(norm_name):
+            if not self.variant_only or variant.strip() != norm_name:
+                trans_name = self.to_ascii.transliterate(variant).strip()
+                if trans_name:
+                    results.add(trans_name)
+
+        return list(results)
+
+
+    def _generate_word_variants(self, norm_name):
         baseform = '^ ' + norm_name + ' ^'
+        baselen = len(baseform)
         partials = ['']
 
         startpos = 0
         if self.replacements is not None:
             pos = 0
             force_space = False
-            while pos < len(baseform):
+            while pos < baselen:
                 full, repl = self.replacements.longest_prefix_item(baseform[pos:],
                                                                    (None, None))
                 if full is not None:
@@ -207,24 +219,9 @@ class GenericTokenAnalysis:
 
         # No variants detected? Fast return.
         if startpos == 0:
-            if self.variant_only:
-                return []
-
-            trans_name = self.to_ascii.transliterate(norm_name).strip()
-            return [trans_name] if trans_name else []
+            return (norm_name, )
 
-        return self._compute_result_set(partials, baseform[startpos:],
-                                        norm_name if self.variant_only else '')
+        if startpos < baselen:
+            return (part[1:] + baseform[startpos:-1] for part in partials)
 
-
-    def _compute_result_set(self, partials, prefix, exclude):
-        results = set()
-
-        for variant in partials:
-            vname = (variant + prefix)[1:-1].strip()
-            if vname != exclude:
-                trans_name = self.to_ascii.transliterate(vname).strip()
-                if trans_name:
-                    results.add(trans_name)
-
-        return list(results)
+        return (part[1:-1] for part in partials)