]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_db/tools/check_database.py
Merge pull request #3799 from lonvia/reduce-coordinate-precision
[nominatim.git] / src / nominatim_db / tools / check_database.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2025 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Collection of functions that check if the database is complete and functional.
9 """
10 from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
11 from enum import Enum
12 from textwrap import dedent
13
14 from ..config import Configuration
15 from ..db.connection import connect, Connection, \
16                             index_exists, table_exists, execute_scalar
17 from ..db import properties
18 from ..errors import UsageError
19 from ..tokenizer import factory as tokenizer_factory
20 from . import freeze
21 from ..version import NOMINATIM_VERSION, parse_version
22
23 CHECKLIST = []
24
25
26 class CheckState(Enum):
27     """ Possible states of a check. FATAL stops check execution entirely.
28     """
29     OK = 0
30     FAIL = 1
31     FATAL = 2
32     NOT_APPLICABLE = 3
33     WARN = 4
34
35
36 CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
37 CheckFunc = Callable[[Connection, Configuration], CheckResult]
38
39
40 def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
41     """ Decorator for checks. It adds the function to the list of
42         checks to execute and adds the code for printing progress messages.
43     """
44     def decorator(func: CheckFunc) -> CheckFunc:
45         title = (func.__doc__ or '').split('\n', 1)[0].strip()
46
47         def run_check(conn: Connection, config: Configuration) -> CheckState:
48             print(title, end=' ... ')
49             ret = func(conn, config)
50             if isinstance(ret, tuple):
51                 ret, params = ret
52             else:
53                 params = {}
54             if ret == CheckState.OK:
55                 print('\033[92mOK\033[0m')
56             elif ret == CheckState.WARN:
57                 print('\033[93mWARNING\033[0m')
58                 if hint:
59                     print('')
60                     print(dedent(hint.format(**params)))
61             elif ret == CheckState.NOT_APPLICABLE:
62                 print('not applicable')
63             else:
64                 print('\x1B[31mFailed\033[0m')
65                 if hint:
66                     print(dedent(hint.format(**params)))
67             return ret
68
69         CHECKLIST.append(run_check)
70         return run_check
71
72     return decorator
73
74
75 class _BadConnection:
76
77     def __init__(self, msg: str) -> None:
78         self.msg = msg
79
80     def close(self) -> None:
81         """ Dummy function to provide the implementation.
82         """
83
84
85 def check_database(config: Configuration) -> int:
86     """ Run a number of checks on the database and return the status.
87     """
88     try:
89         conn = connect(config.get_libpq_dsn())
90     except UsageError as err:
91         conn = _BadConnection(str(err))  # type: ignore[assignment]
92
93     overall_result = 0
94     for check in CHECKLIST:
95         ret = check(conn, config)
96         if ret == CheckState.FATAL:
97             conn.close()
98             return 1
99         if ret in (CheckState.FATAL, CheckState.FAIL):
100             overall_result = 1
101
102     conn.close()
103     return overall_result
104
105
106 def _get_indexes(conn: Connection) -> List[str]:
107     indexes = ['idx_place_addressline_address_place_id',
108                'idx_placex_rank_search',
109                'idx_placex_rank_address',
110                'idx_placex_parent_place_id',
111                'idx_placex_geometry_reverse_lookupplacenode',
112                'idx_placex_geometry_reverse_lookuppolygon',
113                'idx_placex_geometry_placenode',
114                'idx_osmline_parent_place_id',
115                'idx_osmline_parent_osm_id',
116                'idx_postcode_id',
117                'idx_postcode_postcode'
118                ]
119
120     # These won't exist if --reverse-only import was used
121     if table_exists(conn, 'search_name'):
122         indexes.extend(('idx_search_name_nameaddress_vector',
123                         'idx_search_name_name_vector',
124                         'idx_search_name_centroid',
125                         'idx_placex_housenumber',
126                         'idx_osmline_parent_osm_id_with_hnr'))
127
128     # These won't exist if --no-updates import was used
129     if table_exists(conn, 'place'):
130         indexes.extend(('idx_location_area_country_place_id',
131                         'idx_place_osm_unique',
132                         'idx_placex_rank_address_sector',
133                         'idx_placex_rank_boundaries_sector'))
134
135     return indexes
136
137
138 # CHECK FUNCTIONS
139 #
140 # Functions are executed in the order they appear here.
141
142 @_check(hint="""\
143              {error}
144
145              Hints:
146              * Is the database server started?
147              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
148              * Try connecting to the database with the same settings
149
150              Project directory: {config.project_dir}
151              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
152              """)
153 def check_connection(conn: Any, config: Configuration) -> CheckResult:
154     """ Checking database connection
155     """
156     if isinstance(conn, _BadConnection):
157         return CheckState.FATAL, dict(error=conn.msg, config=config)
158
159     return CheckState.OK
160
161
162 @_check(hint="""\
163              Database version ({db_version}) doesn't match Nominatim version ({nom_version})
164
165              Hints:
166              {instruction}
167
168              Project directory: {config.project_dir}
169              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
170              """)
171 def check_database_version(conn: Connection, config: Configuration) -> CheckResult:
172     """ Checking database_version matches Nominatim software version
173     """
174
175     db_version_str = None
176     if not table_exists(conn, 'nominatim_properties'):
177         instruction = 'Are you connecting to the correct database?'
178     else:
179         db_version_str = properties.get_property(conn, 'database_version')
180
181         if db_version_str is None:
182             instruction = 'Database version not found. Did the import finish?'
183         else:
184             db_version = parse_version(db_version_str)
185
186             if db_version == NOMINATIM_VERSION:
187                 return CheckState.OK
188
189             instruction = (
190                 "Run migrations: 'nominatim admin --migrate'"
191                 if db_version < NOMINATIM_VERSION
192                 else 'You need to upgrade the Nominatim software.'
193             ) + ' Check the Migration chapter of the Administration Guide.'
194
195     return CheckState.FATAL, dict(db_version=db_version_str,
196                                   nom_version=NOMINATIM_VERSION,
197                                   instruction=instruction,
198                                   config=config)
199
200
201 @_check(hint="""\
202              placex table not found
203
204              Hints:
205              * Are you connecting to the correct database?
206              * Did the import process finish without errors?
207
208              Project directory: {config.project_dir}
209              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
210              """)
211 def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
212     """ Checking for placex table
213     """
214     if table_exists(conn, 'placex'):
215         return CheckState.OK
216
217     return CheckState.FATAL, dict(config=config)
218
219
220 @_check(hint="""placex table has no data. Did the import finish successfully?""")
221 def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
222     """ Checking for placex content
223     """
224     cnt = execute_scalar(conn, 'SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
225
226     return CheckState.OK if cnt > 0 else CheckState.FATAL
227
228
229 @_check(hint="""{msg}""")
230 def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
231     """ Checking that tokenizer works
232     """
233     try:
234         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
235     except UsageError:
236         return CheckState.FAIL, dict(msg="""\
237             Cannot load tokenizer. Did the import finish successfully?""")
238
239     result = tokenizer.check_database(config)
240
241     if result is None:
242         return CheckState.OK
243
244     return CheckState.FAIL, dict(msg=result)
245
246
247 @_check(hint="""\
248              Wikipedia/Wikidata importance tables missing.
249              Quality of search results may be degraded. Reverse geocoding is unaffected.
250              See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
251              """)
252 def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
253     """ Checking for wikipedia/wikidata data
254     """
255     if not table_exists(conn, 'search_name') or not table_exists(conn, 'place'):
256         return CheckState.NOT_APPLICABLE
257
258     if table_exists(conn, 'wikimedia_importance'):
259         cnt = execute_scalar(conn, 'SELECT count(*) FROM wikimedia_importance')
260     else:
261         cnt = execute_scalar(conn, 'SELECT count(*) FROM wikipedia_article')
262
263     return CheckState.WARN if cnt == 0 else CheckState.OK
264
265
266 @_check(hint="""\
267              The indexing didn't finish. {count} entries are not yet indexed.
268
269              To index the remaining entries, run:   {index_cmd}
270              """)
271 def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
272     """ Checking indexing status
273     """
274     cnt = execute_scalar(conn, 'SELECT count(*) FROM placex WHERE indexed_status > 0')
275
276     if cnt == 0:
277         return CheckState.OK
278
279     if freeze.is_frozen(conn):
280         index_cmd = """\
281             Database is marked frozen, it cannot be updated.
282             Low counts of unindexed places are fine."""
283         return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
284
285     if index_exists(conn, 'idx_placex_rank_search'):
286         # Likely just an interrupted update.
287         index_cmd = 'nominatim index'
288     else:
289         # Looks like the import process got interrupted.
290         index_cmd = 'nominatim import --continue indexing'
291
292     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
293
294
295 @_check(hint="""\
296              The following indexes are missing:
297                {indexes}
298
299              Rerun the index creation with:   nominatim import --continue db-postprocess
300              """)
301 def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
302     """ Checking that database indexes are complete
303     """
304     missing = []
305     for index in _get_indexes(conn):
306         if not index_exists(conn, index):
307             missing.append(index)
308
309     if missing:
310         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
311
312     return CheckState.OK
313
314
315 @_check(hint="""\
316              At least one index is invalid. That can happen, e.g. when index creation was
317              disrupted and later restarted. You should delete the affected indices
318              and recreate them.
319
320              Invalid indexes:
321                {indexes}
322              """)
323 def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
324     """ Checking that all database indexes are valid
325     """
326     with conn.cursor() as cur:
327         cur.execute(""" SELECT relname FROM pg_class, pg_index
328                         WHERE pg_index.indisvalid = false
329                         AND pg_index.indexrelid = pg_class.oid""")
330
331         broken = [c[0] for c in cur]
332
333     if broken:
334         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
335
336     return CheckState.OK
337
338
339 @_check(hint="""\
340              {error}
341              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
342              """)
343 def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
344     """ Checking TIGER external data table.
345     """
346     if not config.get_bool('USE_US_TIGER_DATA'):
347         return CheckState.NOT_APPLICABLE
348
349     if not table_exists(conn, 'location_property_tiger'):
350         return CheckState.FAIL, dict(error='TIGER data table not found.')
351
352     if execute_scalar(conn, 'SELECT count(*) FROM location_property_tiger') == 0:
353         return CheckState.FAIL, dict(error='TIGER data table is empty.')
354
355     return CheckState.OK