]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/clicmd/refresh.py
c741dcf63632fc0c01d8592a66f46d3be0c8bdbd
[nominatim.git] / nominatim / clicmd / refresh.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of 'refresh' subcommand.
9 """
10 import logging
11 from pathlib import Path
12
13 from nominatim.db.connection import connect
14
15 # Do not repeat documentation of subcommand classes.
16 # pylint: disable=C0111
17 # Using non-top-level imports to avoid eventually unused imports.
18 # pylint: disable=E0012,C0415
19
20 LOG = logging.getLogger()
21
22 class UpdateRefresh:
23     """\
24     Recompute auxiliary data used by the indexing process.
25
26     This sub-commands updates various static data and functions in the database.
27     It usually needs to be run after changing various aspects of the
28     configuration. The configuration documentation will mention the exact
29     command to use in such case.
30
31     Warning: the 'update' command must not be run in parallel with other update
32              commands like 'replication' or 'add-data'.
33     """
34     def __init__(self):
35         self.tokenizer = None
36
37     @staticmethod
38     def add_args(parser):
39         group = parser.add_argument_group('Data arguments')
40         group.add_argument('--postcodes', action='store_true',
41                            help='Update postcode centroid table')
42         group.add_argument('--word-tokens', action='store_true',
43                            help='Clean up search terms')
44         group.add_argument('--word-counts', action='store_true',
45                            help='Compute frequency of full-word search terms')
46         group.add_argument('--address-levels', action='store_true',
47                            help='Reimport address level configuration')
48         group.add_argument('--functions', action='store_true',
49                            help='Update the PL/pgSQL functions in the database')
50         group.add_argument('--wiki-data', action='store_true',
51                            help='Update Wikipedia/data importance numbers')
52         group.add_argument('--importance', action='store_true',
53                            help='Recompute place importances (expensive!)')
54         group.add_argument('--website', action='store_true',
55                            help='Refresh the directory that serves the scripts for the web API')
56         group = parser.add_argument_group('Arguments for function refresh')
57         group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
58                            help='Do not enable code for propagating updates')
59         group.add_argument('--enable-debug-statements', action='store_true',
60                            help='Enable debug warning statements in functions')
61
62
63     def run(self, args):
64         from ..tools import refresh, postcodes
65         from ..indexer.indexer import Indexer
66
67
68         if args.postcodes:
69             if postcodes.can_compute(args.config.get_libpq_dsn()):
70                 LOG.warning("Update postcodes centroid")
71                 tokenizer = self._get_tokenizer(args.config)
72                 postcodes.update_postcodes(args.config.get_libpq_dsn(),
73                                            args.project_dir, tokenizer)
74                 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
75                                   args.threads or 1)
76                 indexer.index_postcodes()
77             else:
78                 LOG.error("The place table doesn't exist. "
79                           "Postcode updates on a frozen database is not possible.")
80
81         if args.word_tokens:
82             tokenizer = self._get_tokenizer(args.config)
83             tokenizer.update_word_tokens()
84
85         if args.word_counts:
86             LOG.warning('Recompute word statistics')
87             self._get_tokenizer(args.config).update_statistics()
88
89         if args.address_levels:
90             LOG.warning('Updating address levels')
91             with connect(args.config.get_libpq_dsn()) as conn:
92                 refresh.load_address_levels_from_config(conn, args.config)
93
94         if args.functions:
95             LOG.warning('Create functions')
96             with connect(args.config.get_libpq_dsn()) as conn:
97                 refresh.create_functions(conn, args.config,
98                                          args.diffs, args.enable_debug_statements)
99                 self._get_tokenizer(args.config).update_sql_functions(args.config)
100
101         if args.wiki_data:
102             data_path = Path(args.config.WIKIPEDIA_DATA_PATH
103                              or args.project_dir)
104             LOG.warning('Import wikipdia article importance from %s', data_path)
105             if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
106                                                  data_path) > 0:
107                 LOG.fatal('FATAL: Wikipedia importance dump file not found')
108                 return 1
109
110         # Attention: importance MUST come after wiki data import.
111         if args.importance:
112             LOG.warning('Update importance values for database')
113             with connect(args.config.get_libpq_dsn()) as conn:
114                 refresh.recompute_importance(conn)
115
116         if args.website:
117             webdir = args.project_dir / 'website'
118             LOG.warning('Setting up website directory at %s', webdir)
119             with connect(args.config.get_libpq_dsn()) as conn:
120                 refresh.setup_website(webdir, args.config, conn)
121
122         return 0
123
124
125     def _get_tokenizer(self, config):
126         if self.tokenizer is None:
127             from ..tokenizer import factory as tokenizer_factory
128
129             self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
130
131         return self.tokenizer