1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2025 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Legacy word table for testing with functions to prefil and test contents
11 from nominatim_db.db.connection import execute_scalar
13 from psycopg.types.json import Jsonb
16 class MockIcuWordTable:
17 """ A word table for testing using legacy word table structure.
19 def __init__(self, conn):
21 with conn.cursor() as cur:
22 cur.execute("""CREATE TABLE word (word_id INTEGER,
23 word_token text NOT NULL,
30 def add_full_word(self, word_id, word, word_token=None):
31 with self.conn.cursor() as cur:
32 cur.execute("""INSERT INTO word (word_id, word_token, type, word, info)
33 VALUES(%s, %s, 'W', %s, '{}'::jsonb)""",
34 (word_id, word or word_token, word))
37 def add_special(self, word_token, word, cls, typ, oper):
38 with self.conn.cursor() as cur:
39 cur.execute("""INSERT INTO word (word_token, type, word, info)
41 json_build_object('class', %s::text,
44 """, (word_token, word, cls, typ, oper))
47 def add_country(self, country_code, word_token, lookup):
48 with self.conn.cursor() as cur:
49 cur.execute("""INSERT INTO word (word_token, type, word, info)
50 VALUES(%s, 'C', %s, %s)""",
51 (word_token, lookup, Jsonb({'cc': country_code})))
54 def add_postcode(self, word_token, postcode):
55 with self.conn.cursor() as cur:
56 cur.execute("""INSERT INTO word (word_token, type, word)
58 """, (word_token, postcode))
61 def add_housenumber(self, word_id, word_tokens, word=None):
62 with self.conn.cursor() as cur:
63 if isinstance(word_tokens, str):
64 # old style without analyser
65 cur.execute("""INSERT INTO word (word_id, word_token, type)
67 """, (word_id, word_tokens))
71 for token in word_tokens:
72 cur.execute("""INSERT INTO word (word_id, word_token, type, word, info)
73 VALUES (%s, %s, 'H', %s,
74 jsonb_build_object('lookup', %s::text))
75 """, (word_id, token, word, word_tokens[0]))
80 return execute_scalar(self.conn, "SELECT count(*) FROM word")
82 def count_special(self):
83 return execute_scalar(self.conn, "SELECT count(*) FROM word WHERE type = 'S'")
85 def count_housenumbers(self):
86 return execute_scalar(self.conn, "SELECT count(*) FROM word WHERE type = 'H'")
88 def get_special(self):
89 with self.conn.cursor() as cur:
90 cur.execute("SELECT word_token, info, word FROM word WHERE type = 'S'")
91 result = set(((row[0], row[2], row[1]['class'],
92 row[1]['type'], row[1]['op']) for row in cur))
93 assert len(result) == cur.rowcount, "Word table has duplicates."
96 def get_country(self):
97 with self.conn.cursor() as cur:
98 cur.execute("SELECT info->>'cc', word_token, word FROM word WHERE type = 'C'")
99 result = set((tuple(row) for row in cur))
100 assert len(result) == cur.rowcount, "Word table has duplicates."
103 def get_postcodes(self):
104 with self.conn.cursor() as cur:
105 cur.execute("SELECT word FROM word WHERE type = 'P'")
106 return set((row[0] for row in cur))
108 def get_partial_words(self):
109 with self.conn.cursor() as cur:
110 cur.execute("SELECT word_token, info FROM word WHERE type ='w'")
111 return set(((row[0], row[1]['count']) for row in cur))