2 Tokenizer implementing normalisation as used before Nominatim 4.
4 from nominatim.db.connection import connect
5 from nominatim.db import properties
7 DBCFG_NORMALIZATION = "tokenizer_normalization"
9 def create(dsn, data_dir):
10 """ Create a new instance of the tokenizer provided by this module.
12 return LegacyTokenizer(dsn, data_dir)
14 class LegacyTokenizer:
15 """ The legacy tokenizer uses a special PostgreSQL module to normalize
16 names and queries. The tokenizer thus implements normalization through
17 calls to the database.
20 def __init__(self, dsn, data_dir):
22 self.data_dir = data_dir
23 self.normalization = None
26 def init_new_db(self, config):
27 """ Set up a new tokenizer for the database.
29 This copies all necessary data in the project directory to make
30 sure the tokenizer remains stable even over updates.
32 self.normalization = config.TERM_NORMALIZATION
34 # Stable configuration is saved in the database.
35 with connect(self.dsn) as conn:
36 properties.set_property(conn, DBCFG_NORMALIZATION,
40 def init_from_project(self):
41 """ Initialise the tokenizer from the project directory.
43 with connect(self.dsn) as conn:
44 self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)