log().section('Analyze query (using ICU tokenizer)')
for func in self.preprocessors:
phrases = func(phrases)
+
+ if len(phrases) == 1 \
+ and phrases[0].text.count(' ') > 3 \
+ and max(len(s) for s in phrases[0].text.split()) < 3:
+ normalized = []
+
query = qmod.QueryStruct(phrases)
log().var_dump('Normalized query', query.source)
standardized form search will work with. All information removed
at this stage is inevitably lost.
"""
- return cast(str, self.normalizer.transliterate(text))
+ return cast(str, self.normalizer.transliterate(text)).strip('-: ')
def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
""" Transliterate the phrases and split them into tokens.