]> git.openstreetmap.org Git - nominatim.git/commitdiff
port freeze function to python
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 17 Feb 2021 20:43:15 +0000 (21:43 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Wed, 17 Feb 2021 20:43:15 +0000 (21:43 +0100)
lib-php/setup/SetupClass.php
nominatim/cli.py
nominatim/clicmd/__init__.py
nominatim/clicmd/freeze.py [new file with mode: 0644]
nominatim/tools/freeze.py [new file with mode: 0644]
test/python/conftest.py
test/python/test_cli.py
test/python/test_tools_freeze.py [new file with mode: 0644]

index fedbb644b4238289a97086a06856c2b7d7ab5d65..4946e07046529b548537bfe64f01bff490b24666 100755 (executable)
@@ -657,50 +657,7 @@ class SetupFunctions
 
     public function drop()
     {
 
     public function drop()
     {
-        info('Drop tables only required for updates');
-
-        // The implementation is potentially a bit dangerous because it uses
-        // a positive selection of tables to keep, and deletes everything else.
-        // Including any tables that the unsuspecting user might have manually
-        // created. USE AT YOUR OWN PERIL.
-        // tables we want to keep. everything else goes.
-        $aKeepTables = array(
-                        '*columns',
-                        'import_polygon_*',
-                        'import_status',
-                        'place_addressline',
-                        'location_postcode',
-                        'location_property*',
-                        'placex',
-                        'search_name',
-                        'seq_*',
-                        'word',
-                        'query_log',
-                        'new_query_log',
-                        'spatial_ref_sys',
-                        'country_name',
-                        'place_classtype_*',
-                        'country_osm_grid'
-                       );
-
-        $aDropTables = array();
-        $aHaveTables = $this->db()->getListOfTables();
-
-        foreach ($aHaveTables as $sTable) {
-            $bFound = false;
-            foreach ($aKeepTables as $sKeep) {
-                if (fnmatch($sKeep, $sTable)) {
-                    $bFound = true;
-                    break;
-                }
-            }
-            if (!$bFound) array_push($aDropTables, $sTable);
-        }
-        foreach ($aDropTables as $sDrop) {
-            $this->dropTable($sDrop);
-        }
-
-        $this->removeFlatnodeFile();
+        (clone($this->oNominatimCmd))->addParams('freeze')->run();
     }
 
     /**
     }
 
     /**
index 8cb73a8ecda425395d06e325f21ae806b549a6ff..83ecf67be69f8e496befb541ede0b1657c1a87e0 100644 (file)
@@ -173,27 +173,6 @@ class SetupAll:
         return run_legacy_script(*params, nominatim_env=args)
 
 
         return run_legacy_script(*params, nominatim_env=args)
 
 
-class SetupFreeze:
-    """\
-    Make database read-only.
-
-    About half of data in the Nominatim database is kept only to be able to
-    keep the data up-to-date with new changes made in OpenStreetMap. This
-    command drops all this data and only keeps the part needed for geocoding
-    itself.
-
-    This command has the same effect as the `--no-updates` option for imports.
-    """
-
-    @staticmethod
-    def add_args(parser):
-        pass # No options
-
-    @staticmethod
-    def run(args):
-        return run_legacy_script('setup.php', '--drop', nominatim_env=args)
-
-
 class SetupSpecialPhrases:
     """\
     Maintain special phrases.
 class SetupSpecialPhrases:
     """\
     Maintain special phrases.
@@ -352,7 +331,7 @@ def nominatim(**kwargs):
     parser = CommandlineParser('nominatim', nominatim.__doc__)
 
     parser.add_subcommand('import', SetupAll)
     parser = CommandlineParser('nominatim', nominatim.__doc__)
 
     parser.add_subcommand('import', SetupAll)
-    parser.add_subcommand('freeze', SetupFreeze)
+    parser.add_subcommand('freeze', clicmd.SetupFreeze)
     parser.add_subcommand('replication', clicmd.UpdateReplication)
 
     parser.add_subcommand('special-phrases', SetupSpecialPhrases)
     parser.add_subcommand('replication', clicmd.UpdateReplication)
 
     parser.add_subcommand('special-phrases', SetupSpecialPhrases)
index 9a686df256b48bd87635a880f241152845defbe3..ae970c822c9832ad61b4dca94a9425b87728fcc6 100644 (file)
@@ -7,3 +7,4 @@ from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
 from .index import UpdateIndex
 from .refresh import UpdateRefresh
 from .admin import AdminFuncs
 from .index import UpdateIndex
 from .refresh import UpdateRefresh
 from .admin import AdminFuncs
+from .freeze import SetupFreeze
diff --git a/nominatim/clicmd/freeze.py b/nominatim/clicmd/freeze.py
new file mode 100644 (file)
index 0000000..8bca04b
--- /dev/null
@@ -0,0 +1,37 @@
+"""
+Implementation of the 'freeze' subcommand.
+"""
+
+from ..db.connection import connect
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+class SetupFreeze:
+    """\
+    Make database read-only.
+
+    About half of data in the Nominatim database is kept only to be able to
+    keep the data up-to-date with new changes made in OpenStreetMap. This
+    command drops all this data and only keeps the part needed for geocoding
+    itself.
+
+    This command has the same effect as the `--no-updates` option for imports.
+    """
+
+    @staticmethod
+    def add_args(parser):
+        pass # No options
+
+    @staticmethod
+    def run(args):
+        from ..tools import freeze
+
+        conn = connect(args.config.get_libpq_dsn())
+        freeze.drop_update_tables(conn)
+        freeze.drop_flatnode_file(args.config.FLATNODE_FILE)
+        conn.close()
+
+        return 0
diff --git a/nominatim/tools/freeze.py b/nominatim/tools/freeze.py
new file mode 100644 (file)
index 0000000..cc1bf97
--- /dev/null
@@ -0,0 +1,43 @@
+"""
+Functions for removing unnecessary data from the database.
+"""
+from pathlib import Path
+
+UPDATE_TABLES = [
+    'address_levels',
+    'gb_postcode',
+    'import_osmosis_log',
+    'import_polygon_%',
+    'location_area%',
+    'location_road%',
+    'place',
+    'planet_osm_%',
+    'search_name_%',
+    'us_postcode',
+    'wikipedia_%'
+]
+
+def drop_update_tables(conn):
+    """ Drop all tables only necessary for updating the database from
+        OSM replication data.
+    """
+
+    where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES])
+
+    with conn.cursor() as cur:
+        cur.execute("SELECT tablename FROM pg_tables WHERE " + where)
+        tables = [r[0] for r in cur]
+
+        for table in tables:
+            cur.execute('DROP TABLE IF EXISTS "{}" CASCADE'.format(table))
+
+    conn.commit()
+
+
+def drop_flatnode_file(fname):
+    """ Remove the flatnode file if it exists.
+    """
+    if fname:
+        fpath = Path(fname)
+        if fpath.exists():
+            fpath.unlink()
index ecd40d7cf8b616c0af126d5c411c030527d30c77..72a56dcff581bb123ee29855589352cf3eeee47b 100644 (file)
@@ -36,6 +36,14 @@ class _TestingCursor(psycopg2.extras.DictCursor):
 
         return set((tuple(row) for row in self))
 
 
         return set((tuple(row) for row in self))
 
+    def table_exists(self, table):
+        """ Check that a table with the given name exists in the database.
+        """
+        num = self.scalar("""SELECT count(*) FROM pg_tables
+                             WHERE tablename = %s""", (table, ))
+        return num == 1
+
+
 @pytest.fixture
 def temp_db(monkeypatch):
     """ Create an empty database for the test. The database name is also
 @pytest.fixture
 def temp_db(monkeypatch):
     """ Create an empty database for the test. The database name is also
index 0c0a689e28b9f99a5897332babd711d9f7cacfa5..e1df94785f907186c43d509e652dc57a98273ced 100644 (file)
@@ -17,6 +17,7 @@ import nominatim.clicmd.admin
 import nominatim.indexer.indexer
 import nominatim.tools.refresh
 import nominatim.tools.replication
 import nominatim.indexer.indexer
 import nominatim.tools.refresh
 import nominatim.tools.replication
+import nominatim.tools.freeze
 from nominatim.errors import UsageError
 from nominatim.db import status
 
 from nominatim.errors import UsageError
 from nominatim.db import status
 
@@ -50,6 +51,14 @@ def mock_run_legacy(monkeypatch):
     monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
     return mock
 
     monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
     return mock
 
+@pytest.fixture
+def mock_func_factory(monkeypatch):
+    def get_mock(module, func):
+        mock = MockParamCapture()
+        monkeypatch.setattr(module, func, mock)
+        return mock
+
+    return get_mock
 
 def test_cli_help(capsys):
     """ Running nominatim tool without arguments prints help.
 
 def test_cli_help(capsys):
     """ Running nominatim tool without arguments prints help.
@@ -62,7 +71,6 @@ def test_cli_help(capsys):
 
 @pytest.mark.parametrize("command,script", [
                          (('import', '--continue', 'load-data'), 'setup'),
 
 @pytest.mark.parametrize("command,script", [
                          (('import', '--continue', 'load-data'), 'setup'),
-                         (('freeze',), 'setup'),
                          (('special-phrases',), 'specialphrases'),
                          (('add-data', '--tiger-data', 'tiger'), 'setup'),
                          (('add-data', '--file', 'foo.osm'), 'update'),
                          (('special-phrases',), 'specialphrases'),
                          (('add-data', '--tiger-data', 'tiger'), 'setup'),
                          (('add-data', '--file', 'foo.osm'), 'update'),
@@ -75,22 +83,30 @@ def test_legacy_commands_simple(mock_run_legacy, command, script):
     assert mock_run_legacy.last_args[0] == script + '.php'
 
 
     assert mock_run_legacy.last_args[0] == script + '.php'
 
 
+def test_freeze_command(mock_func_factory, temp_db):
+    mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables')
+    mock_flatnode = mock_func_factory(nominatim.tools.freeze, 'drop_flatnode_file')
+
+    assert 0 == call_nominatim('freeze')
+
+    assert mock_drop.called == 1
+    assert mock_flatnode.called == 1
+
+
 @pytest.mark.parametrize("params", [('--warm', ),
                                     ('--warm', '--reverse-only'),
                                     ('--warm', '--search-only'),
                                     ('--check-database', )])
 @pytest.mark.parametrize("params", [('--warm', ),
                                     ('--warm', '--reverse-only'),
                                     ('--warm', '--search-only'),
                                     ('--check-database', )])
-def test_admin_command_legacy(monkeypatch, params):
-    mock_run_legacy = MockParamCapture()
-    monkeypatch.setattr(nominatim.clicmd.admin, 'run_legacy_script', mock_run_legacy)
+def test_admin_command_legacy(mock_func_factory, params):
+    mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script')
 
     assert 0 == call_nominatim('admin', *params)
 
     assert mock_run_legacy.called == 1
 
 @pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))])
 
     assert 0 == call_nominatim('admin', *params)
 
     assert mock_run_legacy.called == 1
 
 @pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))])
-def test_admin_command_tool(temp_db, monkeypatch, func, params):
-    mock = MockParamCapture()
-    monkeypatch.setattr(nominatim.tools.admin, func, mock)
+def test_admin_command_tool(temp_db, mock_func_factory, func, params):
+    mock = mock_func_factory(nominatim.tools.admin, func)
 
     assert 0 == call_nominatim('admin', *params)
     assert mock.called == 1
 
     assert 0 == call_nominatim('admin', *params)
     assert mock.called == 1
@@ -109,12 +125,10 @@ def test_add_data_command(mock_run_legacy, name, oid):
                           (['--boundaries-only'], 1, 0),
                           (['--no-boundaries'], 0, 1),
                           (['--boundaries-only', '--no-boundaries'], 0, 0)])
                           (['--boundaries-only'], 1, 0),
                           (['--no-boundaries'], 0, 1),
                           (['--boundaries-only', '--no-boundaries'], 0, 0)])
-def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
+def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks):
     temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
     temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
-    bnd_mock = MockParamCapture()
-    monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', bnd_mock)
-    rank_mock = MockParamCapture()
-    monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', rank_mock)
+    bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries')
+    rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank')
 
     assert 0 == call_nominatim('index', *params)
 
 
     assert 0 == call_nominatim('index', *params)
 
@@ -127,9 +141,8 @@ def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
                          ('importance', ('update.php', '--recompute-importance')),
                          ('website', ('setup.php', '--setup-website')),
                          ])
                          ('importance', ('update.php', '--recompute-importance')),
                          ('website', ('setup.php', '--setup-website')),
                          ])
-def test_refresh_legacy_command(monkeypatch, temp_db, command, params):
-    mock_run_legacy = MockParamCapture()
-    monkeypatch.setattr(nominatim.clicmd.refresh, 'run_legacy_script', mock_run_legacy)
+def test_refresh_legacy_command(mock_func_factory, temp_db, command, params):
+    mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
 
     assert 0 == call_nominatim('refresh', '--' + command)
 
 
     assert 0 == call_nominatim('refresh', '--' + command)
 
@@ -143,17 +156,15 @@ def test_refresh_legacy_command(monkeypatch, temp_db, command, params):
                          ('address-levels', 'load_address_levels_from_file'),
                          ('functions', 'create_functions'),
                          ])
                          ('address-levels', 'load_address_levels_from_file'),
                          ('functions', 'create_functions'),
                          ])
-def test_refresh_command(monkeypatch, temp_db, command, func):
-    func_mock = MockParamCapture()
-    monkeypatch.setattr(nominatim.tools.refresh, func, func_mock)
+def test_refresh_command(mock_func_factory, temp_db, command, func):
+    func_mock = mock_func_factory(nominatim.tools.refresh, func)
 
     assert 0 == call_nominatim('refresh', '--' + command)
     assert func_mock.called == 1
 
 
 
     assert 0 == call_nominatim('refresh', '--' + command)
     assert func_mock.called == 1
 
 
-def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
-    mock_run_legacy = MockParamCapture()
-    monkeypatch.setattr(nominatim.clicmd.refresh, 'run_legacy_script', mock_run_legacy)
+def test_refresh_importance_computed_after_wiki_import(mock_func_factory, temp_db):
+    mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
 
     assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
 
 
     assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
 
@@ -165,9 +176,8 @@ def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
                          (('--init', '--no-update-functions'), 'init_replication'),
                          (('--check-for-updates',), 'check_for_updates')
                          ])
                          (('--init', '--no-update-functions'), 'init_replication'),
                          (('--check-for-updates',), 'check_for_updates')
                          ])
-def test_replication_command(monkeypatch, temp_db, params, func):
-    func_mock = MockParamCapture()
-    monkeypatch.setattr(nominatim.tools.replication, func, func_mock)
+def test_replication_command(mock_func_factory, temp_db, params, func):
+    func_mock = mock_func_factory(nominatim.tools.replication, func)
 
     assert 0 == call_nominatim('replication', *params)
     assert func_mock.called == 1
 
     assert 0 == call_nominatim('replication', *params)
     assert func_mock.called == 1
@@ -188,11 +198,10 @@ def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
 
 @pytest.mark.parametrize("state", [nominatim.tools.replication.UpdateState.UP_TO_DATE,
                                    nominatim.tools.replication.UpdateState.NO_CHANGES])
 
 @pytest.mark.parametrize("state", [nominatim.tools.replication.UpdateState.UP_TO_DATE,
                                    nominatim.tools.replication.UpdateState.NO_CHANGES])
-def test_replication_update_once_no_index(monkeypatch, temp_db, temp_db_conn,
+def test_replication_update_once_no_index(mock_func_factory, temp_db, temp_db_conn,
                                           status_table, state):
     status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
                                           status_table, state):
     status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
-    func_mock = MockParamCapture(retval=state)
-    monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock)
+    func_mock = mock_func_factory(nominatim.tools.replication, 'update')
 
     assert 0 == call_nominatim('replication', '--once', '--no-index')
 
 
     assert 0 == call_nominatim('replication', '--once', '--no-index')
 
@@ -236,9 +245,8 @@ def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, stat
     assert sleep_mock.last_args[0] == 60
 
 
     assert sleep_mock.last_args[0] == 60
 
 
-def test_serve_command(monkeypatch):
-    func = MockParamCapture()
-    monkeypatch.setattr(nominatim.cli, 'run_php_server', func)
+def test_serve_command(mock_func_factory):
+    func = mock_func_factory(nominatim.cli, 'run_php_server')
 
     call_nominatim('serve')
 
 
     call_nominatim('serve')
 
@@ -254,9 +262,8 @@ def test_serve_command(monkeypatch):
                          ('details', '--place_id', '10001'),
                          ('status',)
                          ])
                          ('details', '--place_id', '10001'),
                          ('status',)
                          ])
-def test_api_commands_simple(monkeypatch, params):
-    mock_run_api = MockParamCapture()
-    monkeypatch.setattr(nominatim.clicmd.api, 'run_api_script', mock_run_api)
+def test_api_commands_simple(mock_func_factory, params):
+    mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script')
 
     assert 0 == call_nominatim(*params)
 
 
     assert 0 == call_nominatim(*params)
 
diff --git a/test/python/test_tools_freeze.py b/test/python/test_tools_freeze.py
new file mode 100644 (file)
index 0000000..fcdab23
--- /dev/null
@@ -0,0 +1,51 @@
+"""
+Tests for freeze functions (removing unused database parts).
+"""
+import pytest
+
+from nominatim.tools import freeze
+
+NOMINATIM_RUNTIME_TABLES = [
+    'country_name', 'country_osm_grid',
+    'location_postcode', 'location_property_osmline', 'location_property_tiger',
+    'placex', 'place_adressline',
+    'search_name',
+    'word'
+]
+
+NOMINATIM_DROP_TABLES = [
+    'address_levels',
+    'location_area', 'location_area_country', 'location_area_large_100',
+    'location_road_1',
+    'place', 'planet_osm_nodes', 'planet_osm_rels', 'planet_osm_ways',
+    'search_name_111',
+    'wikipedia_article', 'wikipedia_redirect'
+]
+
+def test_drop_tables(temp_db_conn, temp_db_cursor):
+    for table in NOMINATIM_RUNTIME_TABLES + NOMINATIM_DROP_TABLES:
+        temp_db_cursor.execute('CREATE TABLE {} (id int)'.format(table))
+
+    freeze.drop_update_tables(temp_db_conn)
+
+    for table in NOMINATIM_RUNTIME_TABLES:
+        assert temp_db_cursor.table_exists(table)
+
+    for table in NOMINATIM_DROP_TABLES:
+        assert not temp_db_cursor.table_exists(table)
+
+def test_drop_flatnode_file_no_file():
+    freeze.drop_flatnode_file('')
+
+
+def test_drop_flatnode_file_file_already_gone(tmp_path):
+    freeze.drop_flatnode_file(str(tmp_path / 'something.store'))
+
+
+def test_drop_flatnode_file_delte(tmp_path):
+    flatfile = tmp_path / 'flatnode.store'
+    flatfile.write_text('Some content')
+
+    freeze.drop_flatnode_file(str(flatfile))
+
+    assert not flatfile.exists()