]> git.openstreetmap.org Git - nominatim.git/commitdiff
increase penalty for one-letter words
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 23 Nov 2023 09:51:58 +0000 (10:51 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 23 Nov 2023 09:51:58 +0000 (10:51 +0100)
nominatim/api/search/icu_tokenizer.py

index b68e8d10eef70816f6cb772da2d7036e8a31693d..196fde2a8444e69d5d74a0a0310dd94812425d96 100644 (file)
@@ -101,10 +101,16 @@ class ICUToken(qmod.Token):
         penalty = 0.0
         if row.type == 'w':
             penalty = 0.3
+        elif row.type == 'W':
+            if len(row.word_token) == 1 and row.word_token == row.word:
+                penalty = 0.2 if row.word.isdigit() else 0.3
         elif row.type == 'H':
             penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
             if all(not c.isdigit() for c in row.word_token):
                 penalty += 0.2 * (len(row.word_token) - 1)
+        elif row.type == 'C':
+            if len(row.word_token) == 1:
+                penalty = 0.3
 
         if row.info is None:
             lookup_word = row.word