1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Implementation of 'refresh' subcommand.
10 from typing import Tuple, Optional
13 from pathlib import Path
15 from nominatim_core.config import Configuration
16 from nominatim_core.db.connection import connect
17 from ..tokenizer.base import AbstractTokenizer
18 from .args import NominatimArgs
20 # Do not repeat documentation of subcommand classes.
21 # pylint: disable=C0111
22 # Using non-top-level imports to avoid eventually unused imports.
23 # pylint: disable=E0012,C0415
25 LOG = logging.getLogger()
27 def _parse_osm_object(obj: str) -> Tuple[str, int]:
28 """ Parse the given argument into a tuple of OSM type and ID.
29 Raises an ArgumentError if the format is not recognized.
31 if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit():
32 raise argparse.ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R]<id>.")
34 return (obj[0].upper(), int(obj[1:]))
39 Recompute auxiliary data used by the indexing process.
41 This sub-commands updates various static data and functions in the database.
42 It usually needs to be run after changing various aspects of the
43 configuration. The configuration documentation will mention the exact
44 command to use in such case.
46 Warning: the 'update' command must not be run in parallel with other update
47 commands like 'replication' or 'add-data'.
49 def __init__(self) -> None:
50 self.tokenizer: Optional[AbstractTokenizer] = None
52 def add_args(self, parser: argparse.ArgumentParser) -> None:
53 group = parser.add_argument_group('Data arguments')
54 group.add_argument('--postcodes', action='store_true',
55 help='Update postcode centroid table')
56 group.add_argument('--word-tokens', action='store_true',
57 help='Clean up search terms')
58 group.add_argument('--word-counts', action='store_true',
59 help='Compute frequency of full-word search terms')
60 group.add_argument('--address-levels', action='store_true',
61 help='Reimport address level configuration')
62 group.add_argument('--functions', action='store_true',
63 help='Update the PL/pgSQL functions in the database')
64 group.add_argument('--wiki-data', action='store_true',
65 help='Update Wikipedia/data importance numbers')
66 group.add_argument('--secondary-importance', action='store_true',
67 help='Update secondary importance raster data')
68 group.add_argument('--importance', action='store_true',
69 help='Recompute place importances (expensive!)')
70 group.add_argument('--website', action='store_true',
71 help='Refresh the directory that serves the scripts for the web API')
72 group.add_argument('--data-object', action='append',
73 type=_parse_osm_object, metavar='OBJECT',
74 help='Mark the given OSM object as requiring an update'
75 ' (format: [NWR]<id>)')
76 group.add_argument('--data-area', action='append',
77 type=_parse_osm_object, metavar='OBJECT',
78 help='Mark the area around the given OSM object as requiring an update'
79 ' (format: [NWR]<id>)')
81 group = parser.add_argument_group('Arguments for function refresh')
82 group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
83 help='Do not enable code for propagating updates')
84 group.add_argument('--enable-debug-statements', action='store_true',
85 help='Enable debug warning statements in functions')
88 def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements
89 from ..tools import refresh, postcodes
90 from ..indexer.indexer import Indexer
92 need_function_refresh = args.functions
95 if postcodes.can_compute(args.config.get_libpq_dsn()):
96 LOG.warning("Update postcodes centroid")
97 tokenizer = self._get_tokenizer(args.config)
98 postcodes.update_postcodes(args.config.get_libpq_dsn(),
99 args.project_dir, tokenizer)
100 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
102 indexer.index_postcodes()
104 LOG.error("The place table doesn't exist. "
105 "Postcode updates on a frozen database is not possible.")
108 LOG.warning('Updating word tokens')
109 tokenizer = self._get_tokenizer(args.config)
110 tokenizer.update_word_tokens()
113 LOG.warning('Recompute word statistics')
114 self._get_tokenizer(args.config).update_statistics(args.config,
115 threads=args.threads or 1)
117 if args.address_levels:
118 LOG.warning('Updating address levels')
119 with connect(args.config.get_libpq_dsn()) as conn:
120 refresh.load_address_levels_from_config(conn, args.config)
122 # Attention: must come BEFORE functions
123 if args.secondary_importance:
124 with connect(args.config.get_libpq_dsn()) as conn:
125 # If the table did not exist before, then the importance code
126 # needs to be enabled.
127 if not conn.table_exists('secondary_importance'):
128 args.functions = True
130 LOG.warning('Import secondary importance raster data from %s', args.project_dir)
131 if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
132 args.project_dir) > 0:
133 LOG.fatal('FATAL: Cannot update secondary importance raster data')
135 need_function_refresh = True
138 data_path = Path(args.config.WIKIPEDIA_DATA_PATH
140 LOG.warning('Import wikipedia article importance from %s', data_path)
141 if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
143 LOG.fatal('FATAL: Wikipedia importance file not found in %s', data_path)
145 need_function_refresh = True
147 if need_function_refresh:
148 LOG.warning('Create functions')
149 with connect(args.config.get_libpq_dsn()) as conn:
150 refresh.create_functions(conn, args.config,
151 args.diffs, args.enable_debug_statements)
152 self._get_tokenizer(args.config).update_sql_functions(args.config)
154 # Attention: importance MUST come after wiki data import and after functions.
156 LOG.warning('Update importance values for database')
157 with connect(args.config.get_libpq_dsn()) as conn:
158 refresh.recompute_importance(conn)
161 webdir = args.project_dir / 'website'
162 LOG.warning('Setting up website directory at %s', webdir)
163 # This is a little bit hacky: call the tokenizer setup, so that
164 # the tokenizer directory gets repopulated as well, in case it
166 self._get_tokenizer(args.config)
167 with connect(args.config.get_libpq_dsn()) as conn:
168 refresh.setup_website(webdir, args.config, conn)
170 if args.data_object or args.data_area:
171 with connect(args.config.get_libpq_dsn()) as conn:
172 for obj in args.data_object or []:
173 refresh.invalidate_osm_object(*obj, conn, recursive=False)
174 for obj in args.data_area or []:
175 refresh.invalidate_osm_object(*obj, conn, recursive=True)
181 def _get_tokenizer(self, config: Configuration) -> AbstractTokenizer:
182 if self.tokenizer is None:
183 from ..tokenizer import factory as tokenizer_factory
185 self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
187 return self.tokenizer