1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Implementation of 'refresh' subcommand.
11 from pathlib import Path
13 from nominatim.db.connection import connect
15 # Do not repeat documentation of subcommand classes.
16 # pylint: disable=C0111
17 # Using non-top-level imports to avoid eventually unused imports.
18 # pylint: disable=E0012,C0415
20 LOG = logging.getLogger()
22 def _parse_osm_object(obj):
23 """ Parse the given argument into a tuple of OSM type and ID.
24 Raises an ArgumentError if the format is not recognized.
26 if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit():
27 raise ArgumentError("Expect OSM object id of form [N|W|R]<id>.")
29 return (obj[0].upper(), int(obj[1:]))
34 Recompute auxiliary data used by the indexing process.
36 This sub-commands updates various static data and functions in the database.
37 It usually needs to be run after changing various aspects of the
38 configuration. The configuration documentation will mention the exact
39 command to use in such case.
41 Warning: the 'update' command must not be run in parallel with other update
42 commands like 'replication' or 'add-data'.
49 group = parser.add_argument_group('Data arguments')
50 group.add_argument('--postcodes', action='store_true',
51 help='Update postcode centroid table')
52 group.add_argument('--word-tokens', action='store_true',
53 help='Clean up search terms')
54 group.add_argument('--word-counts', action='store_true',
55 help='Compute frequency of full-word search terms')
56 group.add_argument('--address-levels', action='store_true',
57 help='Reimport address level configuration')
58 group.add_argument('--functions', action='store_true',
59 help='Update the PL/pgSQL functions in the database')
60 group.add_argument('--wiki-data', action='store_true',
61 help='Update Wikipedia/data importance numbers')
62 group.add_argument('--importance', action='store_true',
63 help='Recompute place importances (expensive!)')
64 group.add_argument('--website', action='store_true',
65 help='Refresh the directory that serves the scripts for the web API')
66 group.add_argument('--data-object', action='append',
67 type=_parse_osm_object, metavar='OBJECT',
68 help='Mark the given OSM object as requiring an update'
69 ' (format: [NWR]<id>)')
70 group.add_argument('--data-area', action='append',
71 type=_parse_osm_object, metavar='OBJECT',
72 help='Mark the area around the given OSM object as requiring an update'
73 ' (format: [NWR]<id>)')
75 group = parser.add_argument_group('Arguments for function refresh')
76 group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
77 help='Do not enable code for propagating updates')
78 group.add_argument('--enable-debug-statements', action='store_true',
79 help='Enable debug warning statements in functions')
83 from ..tools import refresh, postcodes
84 from ..indexer.indexer import Indexer
88 if postcodes.can_compute(args.config.get_libpq_dsn()):
89 LOG.warning("Update postcodes centroid")
90 tokenizer = self._get_tokenizer(args.config)
91 postcodes.update_postcodes(args.config.get_libpq_dsn(),
92 args.project_dir, tokenizer)
93 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
95 indexer.index_postcodes()
97 LOG.error("The place table doesn't exist. "
98 "Postcode updates on a frozen database is not possible.")
101 LOG.warning('Updating word tokens')
102 tokenizer = self._get_tokenizer(args.config)
103 tokenizer.update_word_tokens()
106 LOG.warning('Recompute word statistics')
107 self._get_tokenizer(args.config).update_statistics()
109 if args.address_levels:
110 LOG.warning('Updating address levels')
111 with connect(args.config.get_libpq_dsn()) as conn:
112 refresh.load_address_levels_from_config(conn, args.config)
115 LOG.warning('Create functions')
116 with connect(args.config.get_libpq_dsn()) as conn:
117 refresh.create_functions(conn, args.config,
118 args.diffs, args.enable_debug_statements)
119 self._get_tokenizer(args.config).update_sql_functions(args.config)
122 data_path = Path(args.config.WIKIPEDIA_DATA_PATH
124 LOG.warning('Import wikipdia article importance from %s', data_path)
125 if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
127 LOG.fatal('FATAL: Wikipedia importance dump file not found')
130 # Attention: importance MUST come after wiki data import.
132 LOG.warning('Update importance values for database')
133 with connect(args.config.get_libpq_dsn()) as conn:
134 refresh.recompute_importance(conn)
137 webdir = args.project_dir / 'website'
138 LOG.warning('Setting up website directory at %s', webdir)
139 # This is a little bit hacky: call the tokenizer setup, so that
140 # the tokenizer directory gets repopulated as well, in case it
142 self._get_tokenizer(args.config)
143 with connect(args.config.get_libpq_dsn()) as conn:
144 refresh.setup_website(webdir, args.config, conn)
146 if args.data_object or args.data_area:
147 with connect(args.config.get_libpq_dsn()) as conn:
148 for obj in args.data_object or []:
149 refresh.invalidate_osm_object(*obj, conn, recursive=False)
150 for obj in args.data_area or []:
151 refresh.invalidate_osm_object(*obj, conn, recursive=True)
157 def _get_tokenizer(self, config):
158 if self.tokenizer is None:
159 from ..tokenizer import factory as tokenizer_factory
161 self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
163 return self.tokenizer