]> git.openstreetmap.org Git - nominatim.git/commitdiff
port freeze function to python
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 17 Feb 2021 20:43:15 +0000 (21:43 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Wed, 17 Feb 2021 20:43:15 +0000 (21:43 +0100)
lib-php/setup/SetupClass.php
nominatim/cli.py
nominatim/clicmd/__init__.py
nominatim/clicmd/freeze.py [new file with mode: 0644]
nominatim/tools/freeze.py [new file with mode: 0644]
test/python/conftest.py
test/python/test_cli.py
test/python/test_tools_freeze.py [new file with mode: 0644]

index fedbb644b4238289a97086a06856c2b7d7ab5d65..4946e07046529b548537bfe64f01bff490b24666 100755 (executable)
@@ -657,50 +657,7 @@ class SetupFunctions
 
     public function drop()
     {
-        info('Drop tables only required for updates');
-
-        // The implementation is potentially a bit dangerous because it uses
-        // a positive selection of tables to keep, and deletes everything else.
-        // Including any tables that the unsuspecting user might have manually
-        // created. USE AT YOUR OWN PERIL.
-        // tables we want to keep. everything else goes.
-        $aKeepTables = array(
-                        '*columns',
-                        'import_polygon_*',
-                        'import_status',
-                        'place_addressline',
-                        'location_postcode',
-                        'location_property*',
-                        'placex',
-                        'search_name',
-                        'seq_*',
-                        'word',
-                        'query_log',
-                        'new_query_log',
-                        'spatial_ref_sys',
-                        'country_name',
-                        'place_classtype_*',
-                        'country_osm_grid'
-                       );
-
-        $aDropTables = array();
-        $aHaveTables = $this->db()->getListOfTables();
-
-        foreach ($aHaveTables as $sTable) {
-            $bFound = false;
-            foreach ($aKeepTables as $sKeep) {
-                if (fnmatch($sKeep, $sTable)) {
-                    $bFound = true;
-                    break;
-                }
-            }
-            if (!$bFound) array_push($aDropTables, $sTable);
-        }
-        foreach ($aDropTables as $sDrop) {
-            $this->dropTable($sDrop);
-        }
-
-        $this->removeFlatnodeFile();
+        (clone($this->oNominatimCmd))->addParams('freeze')->run();
     }
 
     /**
index 8cb73a8ecda425395d06e325f21ae806b549a6ff..83ecf67be69f8e496befb541ede0b1657c1a87e0 100644 (file)
@@ -173,27 +173,6 @@ class SetupAll:
         return run_legacy_script(*params, nominatim_env=args)
 
 
-class SetupFreeze:
-    """\
-    Make database read-only.
-
-    About half of data in the Nominatim database is kept only to be able to
-    keep the data up-to-date with new changes made in OpenStreetMap. This
-    command drops all this data and only keeps the part needed for geocoding
-    itself.
-
-    This command has the same effect as the `--no-updates` option for imports.
-    """
-
-    @staticmethod
-    def add_args(parser):
-        pass # No options
-
-    @staticmethod
-    def run(args):
-        return run_legacy_script('setup.php', '--drop', nominatim_env=args)
-
-
 class SetupSpecialPhrases:
     """\
     Maintain special phrases.
@@ -352,7 +331,7 @@ def nominatim(**kwargs):
     parser = CommandlineParser('nominatim', nominatim.__doc__)
 
     parser.add_subcommand('import', SetupAll)
-    parser.add_subcommand('freeze', SetupFreeze)
+    parser.add_subcommand('freeze', clicmd.SetupFreeze)
     parser.add_subcommand('replication', clicmd.UpdateReplication)
 
     parser.add_subcommand('special-phrases', SetupSpecialPhrases)
index 9a686df256b48bd87635a880f241152845defbe3..ae970c822c9832ad61b4dca94a9425b87728fcc6 100644 (file)
@@ -7,3 +7,4 @@ from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
 from .index import UpdateIndex
 from .refresh import UpdateRefresh
 from .admin import AdminFuncs
+from .freeze import SetupFreeze
diff --git a/nominatim/clicmd/freeze.py b/nominatim/clicmd/freeze.py
new file mode 100644 (file)
index 0000000..8bca04b
--- /dev/null
@@ -0,0 +1,37 @@
+"""
+Implementation of the 'freeze' subcommand.
+"""
+
+from ..db.connection import connect
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+class SetupFreeze:
+    """\
+    Make database read-only.
+
+    About half of data in the Nominatim database is kept only to be able to
+    keep the data up-to-date with new changes made in OpenStreetMap. This
+    command drops all this data and only keeps the part needed for geocoding
+    itself.
+
+    This command has the same effect as the `--no-updates` option for imports.
+    """
+
+    @staticmethod
+    def add_args(parser):
+        pass # No options
+
+    @staticmethod
+    def run(args):
+        from ..tools import freeze
+
+        conn = connect(args.config.get_libpq_dsn())
+        freeze.drop_update_tables(conn)
+        freeze.drop_flatnode_file(args.config.FLATNODE_FILE)
+        conn.close()
+
+        return 0
diff --git a/nominatim/tools/freeze.py b/nominatim/tools/freeze.py
new file mode 100644 (file)
index 0000000..cc1bf97
--- /dev/null
@@ -0,0 +1,43 @@
+"""
+Functions for removing unnecessary data from the database.
+"""
+from pathlib import Path
+
+UPDATE_TABLES = [
+    'address_levels',
+    'gb_postcode',
+    'import_osmosis_log',
+    'import_polygon_%',
+    'location_area%',
+    'location_road%',
+    'place',
+    'planet_osm_%',
+    'search_name_%',
+    'us_postcode',
+    'wikipedia_%'
+]
+
+def drop_update_tables(conn):
+    """ Drop all tables only necessary for updating the database from
+        OSM replication data.
+    """
+
+    where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES])
+
+    with conn.cursor() as cur:
+        cur.execute("SELECT tablename FROM pg_tables WHERE " + where)
+        tables = [r[0] for r in cur]
+
+        for table in tables:
+            cur.execute('DROP TABLE IF EXISTS "{}" CASCADE'.format(table))
+
+    conn.commit()
+
+
+def drop_flatnode_file(fname):
+    """ Remove the flatnode file if it exists.
+    """
+    if fname:
+        fpath = Path(fname)
+        if fpath.exists():
+            fpath.unlink()
index ecd40d7cf8b616c0af126d5c411c030527d30c77..72a56dcff581bb123ee29855589352cf3eeee47b 100644 (file)
@@ -36,6 +36,14 @@ class _TestingCursor(psycopg2.extras.DictCursor):
 
         return set((tuple(row) for row in self))
 
+    def table_exists(self, table):
+        """ Check that a table with the given name exists in the database.
+        """
+        num = self.scalar("""SELECT count(*) FROM pg_tables
+                             WHERE tablename = %s""", (table, ))
+        return num == 1
+
+
 @pytest.fixture
 def temp_db(monkeypatch):
     """ Create an empty database for the test. The database name is also
index 0c0a689e28b9f99a5897332babd711d9f7cacfa5..e1df94785f907186c43d509e652dc57a98273ced 100644 (file)
@@ -17,6 +17,7 @@ import nominatim.clicmd.admin
 import nominatim.indexer.indexer
 import nominatim.tools.refresh
 import nominatim.tools.replication
+import nominatim.tools.freeze
 from nominatim.errors import UsageError
 from nominatim.db import status
 
@@ -50,6 +51,14 @@ def mock_run_legacy(monkeypatch):
     monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
     return mock
 
+@pytest.fixture
+def mock_func_factory(monkeypatch):
+    def get_mock(module, func):
+        mock = MockParamCapture()
+        monkeypatch.setattr(module, func, mock)
+        return mock
+
+    return get_mock
 
 def test_cli_help(capsys):
     """ Running nominatim tool without arguments prints help.
@@ -62,7 +71,6 @@ def test_cli_help(capsys):
 
 @pytest.mark.parametrize("command,script", [
                          (('import', '--continue', 'load-data'), 'setup'),
-                         (('freeze',), 'setup'),
                          (('special-phrases',), 'specialphrases'),
                          (('add-data', '--tiger-data', 'tiger'), 'setup'),
                          (('add-data', '--file', 'foo.osm'), 'update'),
@@ -75,22 +83,30 @@ def test_legacy_commands_simple(mock_run_legacy, command, script):
     assert mock_run_legacy.last_args[0] == script + '.php'
 
 
+def test_freeze_command(mock_func_factory, temp_db):
+    mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables')
+    mock_flatnode = mock_func_factory(nominatim.tools.freeze, 'drop_flatnode_file')
+
+    assert 0 == call_nominatim('freeze')
+
+    assert mock_drop.called == 1
+    assert mock_flatnode.called == 1
+
+
 @pytest.mark.parametrize("params", [('--warm', ),
                                     ('--warm', '--reverse-only'),
                                     ('--warm', '--search-only'),
                                     ('--check-database', )])
-def test_admin_command_legacy(monkeypatch, params):
-    mock_run_legacy = MockParamCapture()
-    monkeypatch.setattr(nominatim.clicmd.admin, 'run_legacy_script', mock_run_legacy)
+def test_admin_command_legacy(mock_func_factory, params):
+    mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script')
 
     assert 0 == call_nominatim('admin', *params)
 
     assert mock_run_legacy.called == 1
 
 @pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))])
-def test_admin_command_tool(temp_db, monkeypatch, func, params):
-    mock = MockParamCapture()
-    monkeypatch.setattr(nominatim.tools.admin, func, mock)
+def test_admin_command_tool(temp_db, mock_func_factory, func, params):
+    mock = mock_func_factory(nominatim.tools.admin, func)
 
     assert 0 == call_nominatim('admin', *params)
     assert mock.called == 1
@@ -109,12 +125,10 @@ def test_add_data_command(mock_run_legacy, name, oid):
                           (['--boundaries-only'], 1, 0),
                           (['--no-boundaries'], 0, 1),
                           (['--boundaries-only', '--no-boundaries'], 0, 0)])
-def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
+def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks):
     temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
-    bnd_mock = MockParamCapture()
-    monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', bnd_mock)
-    rank_mock = MockParamCapture()
-    monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', rank_mock)
+    bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries')
+    rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank')
 
     assert 0 == call_nominatim('index', *params)
 
@@ -127,9 +141,8 @@ def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
                          ('importance', ('update.php', '--recompute-importance')),
                          ('website', ('setup.php', '--setup-website')),
                          ])
-def test_refresh_legacy_command(monkeypatch, temp_db, command, params):
-    mock_run_legacy = MockParamCapture()
-    monkeypatch.setattr(nominatim.clicmd.refresh, 'run_legacy_script', mock_run_legacy)
+def test_refresh_legacy_command(mock_func_factory, temp_db, command, params):
+    mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
 
     assert 0 == call_nominatim('refresh', '--' + command)
 
@@ -143,17 +156,15 @@ def test_refresh_legacy_command(monkeypatch, temp_db, command, params):
                          ('address-levels', 'load_address_levels_from_file'),
                          ('functions', 'create_functions'),
                          ])
-def test_refresh_command(monkeypatch, temp_db, command, func):
-    func_mock = MockParamCapture()
-    monkeypatch.setattr(nominatim.tools.refresh, func, func_mock)
+def test_refresh_command(mock_func_factory, temp_db, command, func):
+    func_mock = mock_func_factory(nominatim.tools.refresh, func)
 
     assert 0 == call_nominatim('refresh', '--' + command)
     assert func_mock.called == 1
 
 
-def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
-    mock_run_legacy = MockParamCapture()
-    monkeypatch.setattr(nominatim.clicmd.refresh, 'run_legacy_script', mock_run_legacy)
+def test_refresh_importance_computed_after_wiki_import(mock_func_factory, temp_db):
+    mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
 
     assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
 
@@ -165,9 +176,8 @@ def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
                          (('--init', '--no-update-functions'), 'init_replication'),
                          (('--check-for-updates',), 'check_for_updates')
                          ])
-def test_replication_command(monkeypatch, temp_db, params, func):
-    func_mock = MockParamCapture()
-    monkeypatch.setattr(nominatim.tools.replication, func, func_mock)
+def test_replication_command(mock_func_factory, temp_db, params, func):
+    func_mock = mock_func_factory(nominatim.tools.replication, func)
 
     assert 0 == call_nominatim('replication', *params)
     assert func_mock.called == 1
@@ -188,11 +198,10 @@ def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
 
 @pytest.mark.parametrize("state", [nominatim.tools.replication.UpdateState.UP_TO_DATE,
                                    nominatim.tools.replication.UpdateState.NO_CHANGES])
-def test_replication_update_once_no_index(monkeypatch, temp_db, temp_db_conn,
+def test_replication_update_once_no_index(mock_func_factory, temp_db, temp_db_conn,
                                           status_table, state):
     status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
-    func_mock = MockParamCapture(retval=state)
-    monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock)
+    func_mock = mock_func_factory(nominatim.tools.replication, 'update')
 
     assert 0 == call_nominatim('replication', '--once', '--no-index')
 
@@ -236,9 +245,8 @@ def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, stat
     assert sleep_mock.last_args[0] == 60
 
 
-def test_serve_command(monkeypatch):
-    func = MockParamCapture()
-    monkeypatch.setattr(nominatim.cli, 'run_php_server', func)
+def test_serve_command(mock_func_factory):
+    func = mock_func_factory(nominatim.cli, 'run_php_server')
 
     call_nominatim('serve')
 
@@ -254,9 +262,8 @@ def test_serve_command(monkeypatch):
                          ('details', '--place_id', '10001'),
                          ('status',)
                          ])
-def test_api_commands_simple(monkeypatch, params):
-    mock_run_api = MockParamCapture()
-    monkeypatch.setattr(nominatim.clicmd.api, 'run_api_script', mock_run_api)
+def test_api_commands_simple(mock_func_factory, params):
+    mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script')
 
     assert 0 == call_nominatim(*params)
 
diff --git a/test/python/test_tools_freeze.py b/test/python/test_tools_freeze.py
new file mode 100644 (file)
index 0000000..fcdab23
--- /dev/null
@@ -0,0 +1,51 @@
+"""
+Tests for freeze functions (removing unused database parts).
+"""
+import pytest
+
+from nominatim.tools import freeze
+
+NOMINATIM_RUNTIME_TABLES = [
+    'country_name', 'country_osm_grid',
+    'location_postcode', 'location_property_osmline', 'location_property_tiger',
+    'placex', 'place_adressline',
+    'search_name',
+    'word'
+]
+
+NOMINATIM_DROP_TABLES = [
+    'address_levels',
+    'location_area', 'location_area_country', 'location_area_large_100',
+    'location_road_1',
+    'place', 'planet_osm_nodes', 'planet_osm_rels', 'planet_osm_ways',
+    'search_name_111',
+    'wikipedia_article', 'wikipedia_redirect'
+]
+
+def test_drop_tables(temp_db_conn, temp_db_cursor):
+    for table in NOMINATIM_RUNTIME_TABLES + NOMINATIM_DROP_TABLES:
+        temp_db_cursor.execute('CREATE TABLE {} (id int)'.format(table))
+
+    freeze.drop_update_tables(temp_db_conn)
+
+    for table in NOMINATIM_RUNTIME_TABLES:
+        assert temp_db_cursor.table_exists(table)
+
+    for table in NOMINATIM_DROP_TABLES:
+        assert not temp_db_cursor.table_exists(table)
+
+def test_drop_flatnode_file_no_file():
+    freeze.drop_flatnode_file('')
+
+
+def test_drop_flatnode_file_file_already_gone(tmp_path):
+    freeze.drop_flatnode_file(str(tmp_path / 'something.store'))
+
+
+def test_drop_flatnode_file_delte(tmp_path):
+    flatfile = tmp_path / 'flatnode.store'
+    flatfile.write_text('Some content')
+
+    freeze.drop_flatnode_file(str(flatfile))
+
+    assert not flatfile.exists()