From 101a1f895d25451535f5c186d2ce1f5b7e753e3b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 17 Feb 2021 21:43:15 +0100 Subject: [PATCH 1/1] port freeze function to python --- lib-php/setup/SetupClass.php | 45 +------------------- nominatim/cli.py | 23 +--------- nominatim/clicmd/__init__.py | 1 + nominatim/clicmd/freeze.py | 37 ++++++++++++++++ nominatim/tools/freeze.py | 43 +++++++++++++++++++ test/python/conftest.py | 8 ++++ test/python/test_cli.py | 73 +++++++++++++++++--------------- test/python/test_tools_freeze.py | 51 ++++++++++++++++++++++ 8 files changed, 182 insertions(+), 99 deletions(-) create mode 100644 nominatim/clicmd/freeze.py create mode 100644 nominatim/tools/freeze.py create mode 100644 test/python/test_tools_freeze.py diff --git a/lib-php/setup/SetupClass.php b/lib-php/setup/SetupClass.php index fedbb644..4946e070 100755 --- a/lib-php/setup/SetupClass.php +++ b/lib-php/setup/SetupClass.php @@ -657,50 +657,7 @@ class SetupFunctions public function drop() { - info('Drop tables only required for updates'); - - // The implementation is potentially a bit dangerous because it uses - // a positive selection of tables to keep, and deletes everything else. - // Including any tables that the unsuspecting user might have manually - // created. USE AT YOUR OWN PERIL. - // tables we want to keep. everything else goes. - $aKeepTables = array( - '*columns', - 'import_polygon_*', - 'import_status', - 'place_addressline', - 'location_postcode', - 'location_property*', - 'placex', - 'search_name', - 'seq_*', - 'word', - 'query_log', - 'new_query_log', - 'spatial_ref_sys', - 'country_name', - 'place_classtype_*', - 'country_osm_grid' - ); - - $aDropTables = array(); - $aHaveTables = $this->db()->getListOfTables(); - - foreach ($aHaveTables as $sTable) { - $bFound = false; - foreach ($aKeepTables as $sKeep) { - if (fnmatch($sKeep, $sTable)) { - $bFound = true; - break; - } - } - if (!$bFound) array_push($aDropTables, $sTable); - } - foreach ($aDropTables as $sDrop) { - $this->dropTable($sDrop); - } - - $this->removeFlatnodeFile(); + (clone($this->oNominatimCmd))->addParams('freeze')->run(); } /** diff --git a/nominatim/cli.py b/nominatim/cli.py index 8cb73a8e..83ecf67b 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -173,27 +173,6 @@ class SetupAll: return run_legacy_script(*params, nominatim_env=args) -class SetupFreeze: - """\ - Make database read-only. - - About half of data in the Nominatim database is kept only to be able to - keep the data up-to-date with new changes made in OpenStreetMap. This - command drops all this data and only keeps the part needed for geocoding - itself. - - This command has the same effect as the `--no-updates` option for imports. - """ - - @staticmethod - def add_args(parser): - pass # No options - - @staticmethod - def run(args): - return run_legacy_script('setup.php', '--drop', nominatim_env=args) - - class SetupSpecialPhrases: """\ Maintain special phrases. @@ -352,7 +331,7 @@ def nominatim(**kwargs): parser = CommandlineParser('nominatim', nominatim.__doc__) parser.add_subcommand('import', SetupAll) - parser.add_subcommand('freeze', SetupFreeze) + parser.add_subcommand('freeze', clicmd.SetupFreeze) parser.add_subcommand('replication', clicmd.UpdateReplication) parser.add_subcommand('special-phrases', SetupSpecialPhrases) diff --git a/nominatim/clicmd/__init__.py b/nominatim/clicmd/__init__.py index 9a686df2..ae970c82 100644 --- a/nominatim/clicmd/__init__.py +++ b/nominatim/clicmd/__init__.py @@ -7,3 +7,4 @@ from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus from .index import UpdateIndex from .refresh import UpdateRefresh from .admin import AdminFuncs +from .freeze import SetupFreeze diff --git a/nominatim/clicmd/freeze.py b/nominatim/clicmd/freeze.py new file mode 100644 index 00000000..8bca04b9 --- /dev/null +++ b/nominatim/clicmd/freeze.py @@ -0,0 +1,37 @@ +""" +Implementation of the 'freeze' subcommand. +""" + +from ..db.connection import connect + +# Do not repeat documentation of subcommand classes. +# pylint: disable=C0111 +# Using non-top-level imports to avoid eventually unused imports. +# pylint: disable=E0012,C0415 + +class SetupFreeze: + """\ + Make database read-only. + + About half of data in the Nominatim database is kept only to be able to + keep the data up-to-date with new changes made in OpenStreetMap. This + command drops all this data and only keeps the part needed for geocoding + itself. + + This command has the same effect as the `--no-updates` option for imports. + """ + + @staticmethod + def add_args(parser): + pass # No options + + @staticmethod + def run(args): + from ..tools import freeze + + conn = connect(args.config.get_libpq_dsn()) + freeze.drop_update_tables(conn) + freeze.drop_flatnode_file(args.config.FLATNODE_FILE) + conn.close() + + return 0 diff --git a/nominatim/tools/freeze.py b/nominatim/tools/freeze.py new file mode 100644 index 00000000..cc1bf97e --- /dev/null +++ b/nominatim/tools/freeze.py @@ -0,0 +1,43 @@ +""" +Functions for removing unnecessary data from the database. +""" +from pathlib import Path + +UPDATE_TABLES = [ + 'address_levels', + 'gb_postcode', + 'import_osmosis_log', + 'import_polygon_%', + 'location_area%', + 'location_road%', + 'place', + 'planet_osm_%', + 'search_name_%', + 'us_postcode', + 'wikipedia_%' +] + +def drop_update_tables(conn): + """ Drop all tables only necessary for updating the database from + OSM replication data. + """ + + where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES]) + + with conn.cursor() as cur: + cur.execute("SELECT tablename FROM pg_tables WHERE " + where) + tables = [r[0] for r in cur] + + for table in tables: + cur.execute('DROP TABLE IF EXISTS "{}" CASCADE'.format(table)) + + conn.commit() + + +def drop_flatnode_file(fname): + """ Remove the flatnode file if it exists. + """ + if fname: + fpath = Path(fname) + if fpath.exists(): + fpath.unlink() diff --git a/test/python/conftest.py b/test/python/conftest.py index ecd40d7c..72a56dcf 100644 --- a/test/python/conftest.py +++ b/test/python/conftest.py @@ -36,6 +36,14 @@ class _TestingCursor(psycopg2.extras.DictCursor): return set((tuple(row) for row in self)) + def table_exists(self, table): + """ Check that a table with the given name exists in the database. + """ + num = self.scalar("""SELECT count(*) FROM pg_tables + WHERE tablename = %s""", (table, )) + return num == 1 + + @pytest.fixture def temp_db(monkeypatch): """ Create an empty database for the test. The database name is also diff --git a/test/python/test_cli.py b/test/python/test_cli.py index 0c0a689e..e1df9478 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -17,6 +17,7 @@ import nominatim.clicmd.admin import nominatim.indexer.indexer import nominatim.tools.refresh import nominatim.tools.replication +import nominatim.tools.freeze from nominatim.errors import UsageError from nominatim.db import status @@ -50,6 +51,14 @@ def mock_run_legacy(monkeypatch): monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock) return mock +@pytest.fixture +def mock_func_factory(monkeypatch): + def get_mock(module, func): + mock = MockParamCapture() + monkeypatch.setattr(module, func, mock) + return mock + + return get_mock def test_cli_help(capsys): """ Running nominatim tool without arguments prints help. @@ -62,7 +71,6 @@ def test_cli_help(capsys): @pytest.mark.parametrize("command,script", [ (('import', '--continue', 'load-data'), 'setup'), - (('freeze',), 'setup'), (('special-phrases',), 'specialphrases'), (('add-data', '--tiger-data', 'tiger'), 'setup'), (('add-data', '--file', 'foo.osm'), 'update'), @@ -75,22 +83,30 @@ def test_legacy_commands_simple(mock_run_legacy, command, script): assert mock_run_legacy.last_args[0] == script + '.php' +def test_freeze_command(mock_func_factory, temp_db): + mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables') + mock_flatnode = mock_func_factory(nominatim.tools.freeze, 'drop_flatnode_file') + + assert 0 == call_nominatim('freeze') + + assert mock_drop.called == 1 + assert mock_flatnode.called == 1 + + @pytest.mark.parametrize("params", [('--warm', ), ('--warm', '--reverse-only'), ('--warm', '--search-only'), ('--check-database', )]) -def test_admin_command_legacy(monkeypatch, params): - mock_run_legacy = MockParamCapture() - monkeypatch.setattr(nominatim.clicmd.admin, 'run_legacy_script', mock_run_legacy) +def test_admin_command_legacy(mock_func_factory, params): + mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script') assert 0 == call_nominatim('admin', *params) assert mock_run_legacy.called == 1 @pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))]) -def test_admin_command_tool(temp_db, monkeypatch, func, params): - mock = MockParamCapture() - monkeypatch.setattr(nominatim.tools.admin, func, mock) +def test_admin_command_tool(temp_db, mock_func_factory, func, params): + mock = mock_func_factory(nominatim.tools.admin, func) assert 0 == call_nominatim('admin', *params) assert mock.called == 1 @@ -109,12 +125,10 @@ def test_add_data_command(mock_run_legacy, name, oid): (['--boundaries-only'], 1, 0), (['--no-boundaries'], 0, 1), (['--boundaries-only', '--no-boundaries'], 0, 0)]) -def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks): +def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks): temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)") - bnd_mock = MockParamCapture() - monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', bnd_mock) - rank_mock = MockParamCapture() - monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', rank_mock) + bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries') + rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank') assert 0 == call_nominatim('index', *params) @@ -127,9 +141,8 @@ def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks): ('importance', ('update.php', '--recompute-importance')), ('website', ('setup.php', '--setup-website')), ]) -def test_refresh_legacy_command(monkeypatch, temp_db, command, params): - mock_run_legacy = MockParamCapture() - monkeypatch.setattr(nominatim.clicmd.refresh, 'run_legacy_script', mock_run_legacy) +def test_refresh_legacy_command(mock_func_factory, temp_db, command, params): + mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script') assert 0 == call_nominatim('refresh', '--' + command) @@ -143,17 +156,15 @@ def test_refresh_legacy_command(monkeypatch, temp_db, command, params): ('address-levels', 'load_address_levels_from_file'), ('functions', 'create_functions'), ]) -def test_refresh_command(monkeypatch, temp_db, command, func): - func_mock = MockParamCapture() - monkeypatch.setattr(nominatim.tools.refresh, func, func_mock) +def test_refresh_command(mock_func_factory, temp_db, command, func): + func_mock = mock_func_factory(nominatim.tools.refresh, func) assert 0 == call_nominatim('refresh', '--' + command) assert func_mock.called == 1 -def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db): - mock_run_legacy = MockParamCapture() - monkeypatch.setattr(nominatim.clicmd.refresh, 'run_legacy_script', mock_run_legacy) +def test_refresh_importance_computed_after_wiki_import(mock_func_factory, temp_db): + mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script') assert 0 == call_nominatim('refresh', '--importance', '--wiki-data') @@ -165,9 +176,8 @@ def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db): (('--init', '--no-update-functions'), 'init_replication'), (('--check-for-updates',), 'check_for_updates') ]) -def test_replication_command(monkeypatch, temp_db, params, func): - func_mock = MockParamCapture() - monkeypatch.setattr(nominatim.tools.replication, func, func_mock) +def test_replication_command(mock_func_factory, temp_db, params, func): + func_mock = mock_func_factory(nominatim.tools.replication, func) assert 0 == call_nominatim('replication', *params) assert func_mock.called == 1 @@ -188,11 +198,10 @@ def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db): @pytest.mark.parametrize("state", [nominatim.tools.replication.UpdateState.UP_TO_DATE, nominatim.tools.replication.UpdateState.NO_CHANGES]) -def test_replication_update_once_no_index(monkeypatch, temp_db, temp_db_conn, +def test_replication_update_once_no_index(mock_func_factory, temp_db, temp_db_conn, status_table, state): status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) - func_mock = MockParamCapture(retval=state) - monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock) + func_mock = mock_func_factory(nominatim.tools.replication, 'update') assert 0 == call_nominatim('replication', '--once', '--no-index') @@ -236,9 +245,8 @@ def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, stat assert sleep_mock.last_args[0] == 60 -def test_serve_command(monkeypatch): - func = MockParamCapture() - monkeypatch.setattr(nominatim.cli, 'run_php_server', func) +def test_serve_command(mock_func_factory): + func = mock_func_factory(nominatim.cli, 'run_php_server') call_nominatim('serve') @@ -254,9 +262,8 @@ def test_serve_command(monkeypatch): ('details', '--place_id', '10001'), ('status',) ]) -def test_api_commands_simple(monkeypatch, params): - mock_run_api = MockParamCapture() - monkeypatch.setattr(nominatim.clicmd.api, 'run_api_script', mock_run_api) +def test_api_commands_simple(mock_func_factory, params): + mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script') assert 0 == call_nominatim(*params) diff --git a/test/python/test_tools_freeze.py b/test/python/test_tools_freeze.py new file mode 100644 index 00000000..fcdab23a --- /dev/null +++ b/test/python/test_tools_freeze.py @@ -0,0 +1,51 @@ +""" +Tests for freeze functions (removing unused database parts). +""" +import pytest + +from nominatim.tools import freeze + +NOMINATIM_RUNTIME_TABLES = [ + 'country_name', 'country_osm_grid', + 'location_postcode', 'location_property_osmline', 'location_property_tiger', + 'placex', 'place_adressline', + 'search_name', + 'word' +] + +NOMINATIM_DROP_TABLES = [ + 'address_levels', + 'location_area', 'location_area_country', 'location_area_large_100', + 'location_road_1', + 'place', 'planet_osm_nodes', 'planet_osm_rels', 'planet_osm_ways', + 'search_name_111', + 'wikipedia_article', 'wikipedia_redirect' +] + +def test_drop_tables(temp_db_conn, temp_db_cursor): + for table in NOMINATIM_RUNTIME_TABLES + NOMINATIM_DROP_TABLES: + temp_db_cursor.execute('CREATE TABLE {} (id int)'.format(table)) + + freeze.drop_update_tables(temp_db_conn) + + for table in NOMINATIM_RUNTIME_TABLES: + assert temp_db_cursor.table_exists(table) + + for table in NOMINATIM_DROP_TABLES: + assert not temp_db_cursor.table_exists(table) + +def test_drop_flatnode_file_no_file(): + freeze.drop_flatnode_file('') + + +def test_drop_flatnode_file_file_already_gone(tmp_path): + freeze.drop_flatnode_file(str(tmp_path / 'something.store')) + + +def test_drop_flatnode_file_delte(tmp_path): + flatfile = tmp_path / 'flatnode.store' + flatfile.write_text('Some content') + + freeze.drop_flatnode_file(str(flatfile)) + + assert not flatfile.exists() -- 2.45.2