]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Sat, 12 Aug 2023 15:06:38 +0000 (17:06 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Sat, 12 Aug 2023 15:06:38 +0000 (17:06 +0200)
1  2 
nominatim/api/search/icu_tokenizer.py

index ad08294e00f8409c04043d9187a96198aebc8e16,7bf516e3aa25c12775d0c3ffae7271076f6d0032..d3e34537a8d7c03585b16dac4a2136e80e77c8fd
@@@ -83,7 -83,7 +83,7 @@@ class ICUToken(qmod.Token)
          seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
          distance = 0
          for tag, afrom, ato, bfrom, bto in seq.get_opcodes():
-             if tag == 'delete' and (afrom == 0 or ato == len(self.lookup_word)):
+             if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)):
                  distance += 1
              elif tag == 'replace':
                  distance += max((ato-afrom), (bto-bfrom))
@@@ -192,12 -192,7 +192,12 @@@ class ICUQueryAnalyzer(AbstractQueryAna
              standardized form search will work with. All information removed
              at this stage is inevitably lost.
          """
 -        return cast(str, self.normalizer.transliterate(text))
 +        norm = cast(str, self.normalizer.transliterate(text))
 +        numspaces = norm.count(' ')
 +        if numspaces > 4 and len(norm) <= (numspaces + 1) * 3:
 +            return ''
 +
 +        return norm
  
  
      def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: