X-Git-Url: https://git.openstreetmap.org/nominatim.git/blobdiff_plain/1ffb6bd5d0e1aea120f953a55d72025f47206242..2e3c5d4c5b39e29af57a9398f20fdf5cad0e9045:/nominatim/tokenizer/legacy_tokenizer.py diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index 4c03678d..6040f88f 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -271,8 +271,7 @@ class LegacyNameAnalyzer: self.conn = None - @staticmethod - def get_word_token_info(conn, words): + def get_word_token_info(self, words): """ Return token information for the given list of words. If a word starts with # it is assumed to be a full name otherwise is a partial name. @@ -283,7 +282,7 @@ class LegacyNameAnalyzer: The function is used for testing and debugging only and not necessarily efficient. """ - with conn.cursor() as cur: + with self.conn.cursor() as cur: cur.execute("""SELECT t.term, word_token, word_id FROM word, (SELECT unnest(%s::TEXT[]) as term) t WHERE word_token = (CASE @@ -375,7 +374,7 @@ class LegacyNameAnalyzer: cur, """ INSERT INTO word (word_id, word_token, word, class, type, search_name_count, operator) - (SELECT nextval('seq_word'), make_standard_name(name), name, + (SELECT nextval('seq_word'), ' ' || make_standard_name(name), name, class, type, 0, CASE WHEN op in ('in', 'near') THEN op ELSE null END FROM (VALUES %s) as v(name, class, type, op))""", @@ -400,11 +399,11 @@ class LegacyNameAnalyzer: cur.execute( """INSERT INTO word (word_id, word_token, country_code) (SELECT nextval('seq_word'), lookup_token, %s - FROM (SELECT ' ' || make_standard_name(n) as lookup_token + FROM (SELECT DISTINCT ' ' || make_standard_name(n) as lookup_token FROM unnest(%s)n) y WHERE NOT EXISTS(SELECT * FROM word WHERE word_token = lookup_token and country_code = %s)) - """, (country_code, names, country_code)) + """, (country_code, list(names.values()), country_code)) def process_place(self, place): @@ -422,7 +421,7 @@ class LegacyNameAnalyzer: country_feature = place.get('country_feature') if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature): - self.add_country_names(country_feature.lower(), list(names.values())) + self.add_country_names(country_feature.lower(), names) address = place.get('address') @@ -513,10 +512,9 @@ class _TokenInfo: """ def _get_place(name): with conn.cursor() as cur: - cur.execute("""SELECT (addr_ids_from_name(%s) - || getorcreate_name_id(make_standard_name(%s), ''))::text, + cur.execute("""SELECT make_keywords(hstore('name' , %s))::text, word_ids_from_name(%s)::text""", - (name, name, name)) + (name, name)) return cur.fetchone() self.data['place_search'], self.data['place_match'] = \