]> git.openstreetmap.org Git - nominatim.git/commitdiff
limit the number of variants that can be produced
authorSarah Hoffmann <lonvia@denofr.de>
Fri, 2 Jul 2021 14:42:13 +0000 (16:42 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Sun, 4 Jul 2021 08:28:28 +0000 (10:28 +0200)
nominatim/tokenizer/icu_name_processor.py
test/python/test_tokenizer_icu_name_processor.py

index 6ead712e661be376386f5af7f1ce1f3962f06e58..28719df1ee7146b01591b5a1b4cf384275b7ee1f 100644 (file)
@@ -105,6 +105,11 @@ class ICUNameProcessor:
                 partials = [v + done + r
                             for v, r in itertools.product(partials, repl)
                             if not force_space or r.startswith(' ')]
+                if len(partials) > 128:
+                    # If too many variants are produced, they are unlikely
+                    # to be helpful. Only use the original term.
+                    startpos = 0
+                    break
                 startpos = pos + len(full)
                 if full[-1] == ' ':
                     startpos -= 1
index 553d25c5e2a3c72824b1c01d953b8388f3fcb59a..cc1031164c2872b77ed7dcf0fb3600df05895376 100644 (file)
@@ -78,6 +78,8 @@ VARIANT_TESTS = [
 (('river$ -> r',), "Bent River", {'bent river', 'bent r'}),
 (('^north => n',), "North 2nd Street", {'n 2nd street'}),
 (('^north => n',), "Airport North", {'airport north'}),
+(('am -> a',), "am am am am am am am am", {'am am am am am am am am'}),
+(('am => a',), "am am am am am am am am", {'a a a a a a a a'})
 ]
 
 @pytest.mark.parametrize("rules,name,variants", VARIANT_TESTS)