From: Sarah Hoffmann Date: Mon, 1 Apr 2024 13:05:36 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~11 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/50352c634da0b44bd595a278406cd9654ce94d86?ds=inline;hp=-c Merge remote-tracking branch 'upstream/master' --- 50352c634da0b44bd595a278406cd9654ce94d86 diff --combined nominatim/api/search/icu_tokenizer.py index 23cfa5a1,eb90c122..f6590f5b --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@@ -122,10 -122,10 +122,10 @@@ class ICUToken(qmod.Token) else: lookup_word = row.word_token - return ICUToken(penalty=penalty, token=row.word_id, count=count, + return ICUToken(penalty=penalty, token=row.word_id, count=max(1, count), lookup_word=lookup_word, is_indexed=True, word_token=row.word_token, info=row.info, - addr_count=addr_count) + addr_count=max(1, addr_count)) @@@ -208,12 -208,7 +208,12 @@@ class ICUQueryAnalyzer(AbstractQueryAna standardized form search will work with. All information removed at this stage is inevitably lost. """ - return cast(str, self.normalizer.transliterate(text)) + norm = cast(str, self.normalizer.transliterate(text)) + numspaces = norm.count(' ') + if numspaces > 4 and len(norm) <= (numspaces + 1) * 3: + return '' + + return norm def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: