From: Sarah Hoffmann Date: Thu, 14 Apr 2022 19:44:08 +0000 (+0200) Subject: Merge pull request #2666 from lonvia/admin-command-for-forced-indexing X-Git-Tag: v4.1.0~59 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/403e6f7e5c9aaeee6de0aefe76bdcbdb997cf108?hp=f8f20899a35863876c8dd552f4639d681102ee60 Merge pull request #2666 from lonvia/admin-command-for-forced-indexing Admin command for forced indexing --- diff --git a/docs/admin/Maintenance.md b/docs/admin/Maintenance.md index 782b377c..1ee313a9 100644 --- a/docs/admin/Maintenance.md +++ b/docs/admin/Maintenance.md @@ -34,6 +34,30 @@ to rerun the statistics computation when adding larger amounts of new data, for example, when adding an additional country via `nominatim add-data`. +## Forcing recomputation of places and areas + +Command: `nominatim refresh --data-object [NWR] --data-area [NWR]` + +When running replication updates, Nominatim tries to recompute the search +and address information for all places that are affected by a change. But it +needs to restrict the total number of changes to make sure it can keep up +with the minutely updates. Therefore it will refrain from propagating changes +that affect a lot of objects. + +The administrator may force an update of places in the database. +`nominatim refresh --data-object` invalidates a single OSM object. +`nominatim refresh --data-area` invalidates an OSM object and all dependent +objects. That are usually the places that inside its area or around the +center of the object. Both commands expect the OSM object as an argument +of the form OSM type + OSM id. The type must be `N` (node), `W` (way) or +`R` (relation). + +After invalidating the object, indexing must be run again. If continuous +update are running in the background, the objects will be recomputed together +with the next round of updates. Otherwise you need to run `nominatim index` +to finish the recomputation. + + ## Removing large deleted objects Nominatim refuses to delete very large areas because often these deletions are diff --git a/nominatim/cli.py b/nominatim/cli.py index 0a3bd2b0..315c1cfe 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -70,7 +70,10 @@ class CommandlineParser: appropriate subcommand. """ args = NominatimArgs() - self.parser.parse_args(args=kwargs.get('cli_args'), namespace=args) + try: + self.parser.parse_args(args=kwargs.get('cli_args'), namespace=args) + except SystemExit: + return 1 if args.subcommand is None: self.parser.print_help() diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index 3c245cd4..ecc7498e 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -7,6 +7,7 @@ """ Implementation of 'refresh' subcommand. """ +from argparse import ArgumentTypeError import logging from pathlib import Path @@ -19,6 +20,16 @@ from nominatim.db.connection import connect LOG = logging.getLogger() +def _parse_osm_object(obj): + """ Parse the given argument into a tuple of OSM type and ID. + Raises an ArgumentError if the format is not recognized. + """ + if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit(): + raise ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R].") + + return (obj[0].upper(), int(obj[1:])) + + class UpdateRefresh: """\ Recompute auxiliary data used by the indexing process. @@ -53,6 +64,15 @@ class UpdateRefresh: help='Recompute place importances (expensive!)') group.add_argument('--website', action='store_true', help='Refresh the directory that serves the scripts for the web API') + group.add_argument('--data-object', action='append', + type=_parse_osm_object, metavar='OBJECT', + help='Mark the given OSM object as requiring an update' + ' (format: [NWR])') + group.add_argument('--data-area', action='append', + type=_parse_osm_object, metavar='OBJECT', + help='Mark the area around the given OSM object as requiring an update' + ' (format: [NWR])') + group = parser.add_argument_group('Arguments for function refresh') group.add_argument('--no-diff-updates', action='store_false', dest='diffs', help='Do not enable code for propagating updates') @@ -60,7 +80,7 @@ class UpdateRefresh: help='Enable debug warning statements in functions') - def run(self, args): + def run(self, args): #pylint: disable=too-many-branches from ..tools import refresh, postcodes from ..indexer.indexer import Indexer @@ -124,6 +144,14 @@ class UpdateRefresh: with connect(args.config.get_libpq_dsn()) as conn: refresh.setup_website(webdir, args.config, conn) + if args.data_object or args.data_area: + with connect(args.config.get_libpq_dsn()) as conn: + for obj in args.data_object or []: + refresh.invalidate_osm_object(*obj, conn, recursive=False) + for obj in args.data_area or []: + refresh.invalidate_osm_object(*obj, conn, recursive=True) + conn.commit() + return 0 diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index 95be4c0f..aacc622b 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -19,6 +19,7 @@ from nominatim.version import NOMINATIM_VERSION LOG = logging.getLogger() +OSM_TYPE = {'N': 'node', 'W': 'way', 'R': 'relation'} def _add_address_level_rows_from_entry(rows, entry): """ Converts a single entry from the JSON format for address rank @@ -210,3 +211,28 @@ def setup_website(basedir, config, conn): (basedir / script).write_text(template.format('reverse-only-search.php'), 'utf-8') else: (basedir / script).write_text(template.format(script), 'utf-8') + + +def invalidate_osm_object(osm_type, osm_id, conn, recursive=True): + """ Mark the given OSM object for reindexing. When 'recursive' is set + to True (the default), then all dependent objects are marked for + reindexing as well. + + 'osm_type' must be on of 'N' (node), 'W' (way) or 'R' (relation). + If the given object does not exist, then nothing happens. + """ + assert osm_type in ('N', 'R', 'W') + + LOG.warning("Invalidating OSM %s %s%s.", + OSM_TYPE[osm_type], osm_id, + ' and its dependent places' if recursive else '') + + with conn.cursor() as cur: + if recursive: + sql = """SELECT place_force_update(place_id) + FROM placex WHERE osm_type = %s and osm_id = %s""" + else: + sql = """UPDATE placex SET indexed_status = 2 + WHERE osm_type = %s and osm_id = %s""" + + cur.execute(sql, (osm_type, osm_id)) diff --git a/test/python/cli/test_cmd_api.py b/test/python/cli/test_cmd_api.py index f24125c2..80248ac7 100644 --- a/test/python/cli/test_cmd_api.py +++ b/test/python/cli/test_cmd_api.py @@ -14,15 +14,14 @@ import nominatim.clicmd.api @pytest.mark.parametrize("endpoint", (('search', 'reverse', 'lookup', 'details', 'status'))) def test_no_api_without_phpcgi(src_dir, endpoint): - with pytest.raises(SystemExit): - nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE', - osm2pgsql_path='OSM2PGSQL NOT AVAILABLE', - phplib_dir=str(src_dir / 'lib-php'), - data_dir=str(src_dir / 'data'), - phpcgi_path=None, - sqllib_dir=str(src_dir / 'lib-sql'), - config_dir=str(src_dir / 'settings'), - cli_args=[endpoint]) + assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE', + osm2pgsql_path='OSM2PGSQL NOT AVAILABLE', + phplib_dir=str(src_dir / 'lib-php'), + data_dir=str(src_dir / 'data'), + phpcgi_path=None, + sqllib_dir=str(src_dir / 'lib-sql'), + config_dir=str(src_dir / 'settings'), + cli_args=[endpoint]) == 1 @pytest.mark.parametrize("params", [('search', '--query', 'new'), diff --git a/test/python/cli/test_cmd_refresh.py b/test/python/cli/test_cmd_refresh.py index b6281c7a..7f44765b 100644 --- a/test/python/cli/test_cmd_refresh.py +++ b/test/python/cli/test_cmd_refresh.py @@ -82,3 +82,24 @@ class TestRefresh: assert self.call_nominatim('refresh', '--importance', '--wiki-data') == 0 assert calls == ['import', 'update'] + + @pytest.mark.parametrize('params', [('--data-object', 'w234'), + ('--data-object', 'N23', '--data-object', 'N24'), + ('--data-area', 'R7723'), + ('--data-area', 'r7723', '--data-area', 'r2'), + ('--data-area', 'R9284425', '--data-object', 'n1234567894567')]) + def test_refresh_objects(self, params, mock_func_factory): + func_mock = mock_func_factory(nominatim.tools.refresh, 'invalidate_osm_object') + + assert self.call_nominatim('refresh', *params) == 0 + + assert func_mock.called == len(params)/2 + + + @pytest.mark.parametrize('func', ('--data-object', '--data-area')) + @pytest.mark.parametrize('param', ('234', 'a55', 'R 453', 'Rel')) + def test_refresh_objects_bad_param(self, func, param, mock_func_factory): + func_mock = mock_func_factory(nominatim.tools.refresh, 'invalidate_osm_object') + + self.call_nominatim('refresh', func, param) == 1 + assert func_mock.called == 0 diff --git a/test/python/tools/test_refresh.py b/test/python/tools/test_refresh.py index cbe7f7bd..ac52aa36 100644 --- a/test/python/tools/test_refresh.py +++ b/test/python/tools/test_refresh.py @@ -39,3 +39,47 @@ def test_recompute_importance(placex_table, table_factory, temp_db_conn, temp_db AS $$ SELECT 0.1::float, 'foo'::text $$ LANGUAGE SQL""") refresh.recompute_importance(temp_db_conn) + + +@pytest.mark.parametrize('osm_type', ('N', 'W', 'R')) +def test_invalidate_osm_object_simple(placex_table, osm_type, temp_db_conn, temp_db_cursor): + placex_table.add(osm_type=osm_type, osm_id=57283) + + refresh.invalidate_osm_object(osm_type, 57283, temp_db_conn, recursive=False) + temp_db_conn.commit() + + assert 2 == temp_db_cursor.scalar("""SELECT indexed_status FROM placex + WHERE osm_type = %s and osm_id = %s""", + (osm_type, 57283)) + + +def test_invalidate_osm_object_nonexisting_simple(placex_table, temp_db_conn, temp_db_cursor): + placex_table.add(osm_type='W', osm_id=57283) + + refresh.invalidate_osm_object('N', 57283, temp_db_conn, recursive=False) + temp_db_conn.commit() + + assert 0 == temp_db_cursor.scalar("""SELECT count(*) FROM placex + WHERE indexed_status > 0""") + + +@pytest.mark.parametrize('osm_type', ('N', 'W', 'R')) +def test_invalidate_osm_object_recursive(placex_table, osm_type, temp_db_conn, temp_db_cursor): + placex_table.add(osm_type=osm_type, osm_id=57283) + + temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_force_update(placeid BIGINT) + RETURNS BOOLEAN AS $$ + BEGIN + UPDATE placex SET indexed_status = 522 + WHERE place_id = placeid; + RETURN TRUE; + END; + $$ + LANGUAGE plpgsql;""") + + refresh.invalidate_osm_object(osm_type, 57283, temp_db_conn) + temp_db_conn.commit() + + assert 522 == temp_db_cursor.scalar("""SELECT indexed_status FROM placex + WHERE osm_type = %s and osm_id = %s""", + (osm_type, 57283))