]> git.openstreetmap.org Git - nominatim.git/commitdiff
move update code for postcode and word count to Python
authorSarah Hoffmann <lonvia@denofr.de>
Fri, 22 Jan 2021 22:25:37 +0000 (23:25 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Tue, 26 Jan 2021 21:50:54 +0000 (22:50 +0100)
Adds also tests for the new function to execute a SQL script.

lib/admin/update.php
nominatim/cli.py
nominatim/db/utils.py [new file with mode: 0644]
nominatim/tools/refresh.py [new file with mode: 0644]
test/python/test_cli.py
test/python/test_db_utils.py [new file with mode: 0644]

index fe9658b54f42ee458d25fcfb75be6ae38a1782fd..48609c3e87407f06ec43899661f2add87a0b83dc 100644 (file)
@@ -104,14 +104,12 @@ if ($fPostgresVersion >= 11.0) {
     );
 }
 
-
-$oIndexCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
-             ->addParams('index');
+$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
 if ($aResult['quiet']) {
-    $oIndexCmd->addParams('--quiet');
+    $oNominatimCmd->addParams('--quiet');
 }
 if ($aResult['verbose']) {
-    $oIndexCmd->addParams('--verbose');
+    $oNominatimCmd->addParams('--verbose');
 }
 
 $sPyosmiumBin = getSetting('PYOSMIUM_BINARY');
@@ -220,9 +218,7 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
 }
 
 if ($aResult['calculate-postcodes']) {
-    info('Update postcodes centroids');
-    $sTemplate = file_get_contents(CONST_DataDir.'/sql/update-postcodes.sql');
-    runSQLScript($sTemplate, true, true);
+    (clone($oNominatimCmd))->addParams('refresh', '--postcodes')->run();
 }
 
 $sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
@@ -271,15 +267,11 @@ if ($bHaveDiff) {
 }
 
 if ($aResult['recompute-word-counts']) {
-    info('Recompute frequency of full-word search terms');
-    $sTemplate = file_get_contents(CONST_DataDir.'/sql/words_from_search_name.sql');
-    runSQLScript($sTemplate, true, true);
+    (clone($oNominatimCmd))->addParams('refresh', '--word-counts')->run();
 }
 
 if ($aResult['index']) {
-    $oCmd = (clone $oIndexCmd)
-            ->addParams('--minrank', $aResult['index-rank']);
-    $oCmd->run();
+    (clone $oNominatimCmd)->addParams('index', '--minrank', $aResult['index-rank'])->run();
 }
 
 if ($aResult['update-address-levels']) {
@@ -421,7 +413,8 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
         if (!$aResult['no-index']) {
             $fCMDStartTime = time();
 
-            $oThisIndexCmd = clone($oIndexCmd);
+            $oThisIndexCmd = clone($oNominatimCmd);
+            $oThisIndexCmd->addParams('index');
             echo $oThisIndexCmd->escapedCmd()."\n";
             $iErrorLevel = $oThisIndexCmd->run();
             if ($iErrorLevel) {
index c558eb849f77743f62e5a4758f0d229622cd2578..4388902dcbbd1ecbe9630154ac4d5d2375f00ec6 100644 (file)
@@ -8,9 +8,13 @@ import argparse
 import logging
 from pathlib import Path
 
+import psycopg2
+
 from .config import Configuration
 from .tools.exec_utils import run_legacy_script, run_api_script
 
+LOG = logging.getLogger()
+
 def _num_system_cpus():
     try:
         cpus = len(os.sched_getaffinity(0))
@@ -366,32 +370,35 @@ class UpdateRefresh:
 
     @staticmethod
     def run(args):
-        if args.postcodes:
-            run_legacy_script('update.php', '--calculate-postcodes',
-                              nominatim_env=args, throw_on_fail=True)
-        if args.word_counts:
-            run_legacy_script('update.php', '--recompute-word-counts',
-                              nominatim_env=args, throw_on_fail=True)
-        if args.address_levels:
-            run_legacy_script('update.php', '--update-address-levels',
-                              nominatim_env=args, throw_on_fail=True)
-        if args.functions:
-            params = ['setup.php', '--create-functions', '--create-partition-functions']
-            if args.diffs:
-                params.append('--enable-diff-updates')
-            if args.enable_debug_statements:
-                params.append('--enable-debug-statements')
-            run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
-        if args.wiki_data:
-            run_legacy_script('setup.php', '--import-wikipedia-articles',
-                              nominatim_env=args, throw_on_fail=True)
-        # Attention: importance MUST come after wiki data import.
-        if args.importance:
-            run_legacy_script('update.php', '--recompute-importance',
-                              nominatim_env=args, throw_on_fail=True)
-        if args.website:
-            run_legacy_script('setup.php', '--setup-website',
-                              nominatim_env=args, throw_on_fail=True)
+        import nominatim.tools.refresh
+
+        with psycopg2.connect(args.config.get_libpq_dsn()) as conn:
+            if args.postcodes:
+                LOG.warning("Update postcodes centroid")
+                nominatim.tools.refresh.update_postcodes(conn, args.data_dir)
+            if args.word_counts:
+                LOG.warning('Recompute frequency of full-word search terms')
+                nominatim.tools.refresh.recompute_word_counts(conn, args.data_dir)
+            if args.address_levels:
+                run_legacy_script('update.php', '--update-address-levels',
+                                  nominatim_env=args, throw_on_fail=True)
+            if args.functions:
+                params = ['setup.php', '--create-functions', '--create-partition-functions']
+                if args.diffs:
+                    params.append('--enable-diff-updates')
+                if args.enable_debug_statements:
+                    params.append('--enable-debug-statements')
+                run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
+            if args.wiki_data:
+                run_legacy_script('setup.php', '--import-wikipedia-articles',
+                                  nominatim_env=args, throw_on_fail=True)
+            # Attention: importance MUST come after wiki data import.
+            if args.importance:
+                run_legacy_script('update.php', '--recompute-importance',
+                                  nominatim_env=args, throw_on_fail=True)
+            if args.website:
+                run_legacy_script('setup.php', '--setup-website',
+                                  nominatim_env=args, throw_on_fail=True)
         return 0
 
 
diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py
new file mode 100644 (file)
index 0000000..1a39746
--- /dev/null
@@ -0,0 +1,11 @@
+"""
+Helper functions for handling DB accesses.
+"""
+
+def execute_file(conn, fname):
+    """ Read an SQL file and run its contents against the given connection.
+    """
+    with fname.open('r') as fdesc:
+        sql = fdesc.read()
+    with conn.cursor() as cur:
+        cur.execute(sql)
diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py
new file mode 100644 (file)
index 0000000..859b564
--- /dev/null
@@ -0,0 +1,16 @@
+"""
+Functions for bringing auxiliary data in the database up-to-date.
+"""
+from ..db.utils import execute_file
+
+def update_postcodes(conn, datadir):
+    """ Recalculate postcode centroids and add, remove and update entries in the
+        location_postcode table. `conn` is an opne connection to the database.
+    """
+    execute_file(conn, datadir / 'sql' / 'update-postcodes.sql')
+
+
+def recompute_word_counts(conn, datadir):
+    """ Compute the frequency of full-word search terms.
+    """
+    execute_file(conn, datadir / 'sql' / 'words_from_search_name.sql')
index 9ac629731e6c0b41dc35d0b31f7c9f9e0b77a86f..33c65adee0bdd35a1f85aa125984fb37e9424289 100644 (file)
@@ -6,6 +6,7 @@ import pytest
 
 import nominatim.cli
 import nominatim.indexer.indexer
+import nominatim.tools.refresh
 
 def call_nominatim(*args):
     return nominatim.cli.nominatim(module_dir='build/module',
@@ -99,21 +100,30 @@ def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks):
 
 
 @pytest.mark.parametrize("command,params", [
-                         ('postcodes', ('update.php', '--calculate-postcodes')),
-                         ('word-counts', ('update.php', '--recompute-word-counts')),
                          ('address-levels', ('update.php', '--update-address-levels')),
                          ('functions', ('setup.php',)),
                          ('wiki-data', ('setup.php', '--import-wikipedia-articles')),
                          ('importance', ('update.php', '--recompute-importance')),
                          ('website', ('setup.php', '--setup-website')),
                          ])
-def test_refresh_command(mock_run_legacy, command, params):
+def test_refresh_legacy_command(mock_run_legacy, command, params):
     assert 0 == call_nominatim('refresh', '--' + command)
 
     assert mock_run_legacy.called == 1
     assert len(mock_run_legacy.last_args) >= len(params)
     assert mock_run_legacy.last_args[:len(params)] == params
 
+@pytest.mark.parametrize("command,func", [
+                         ('postcodes', 'update_postcodes'),
+                         ('word-counts', 'recompute_word_counts'),
+                         ])
+def test_refresh_command(monkeypatch, command, func):
+    func_mock = MockParamCapture()
+    monkeypatch.setattr(nominatim.tools.refresh, func, func_mock)
+
+    assert 0 == call_nominatim('refresh', '--' + command)
+
+    assert func_mock.called == 1
 
 def test_refresh_importance_computed_after_wiki_import(mock_run_legacy):
     assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
diff --git a/test/python/test_db_utils.py b/test/python/test_db_utils.py
new file mode 100644 (file)
index 0000000..3210721
--- /dev/null
@@ -0,0 +1,33 @@
+"""
+Tests for DB utility functions in db.utils
+"""
+import psycopg2
+import pytest
+
+import nominatim.db.utils as db_utils
+
+def test_execute_file_success(temp_db, tmp_path):
+    tmpfile = tmp_path / 'test.sql'
+    tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
+
+    with psycopg2.connect('dbname=' + temp_db) as conn:
+        db_utils.execute_file(conn, tmpfile)
+
+        with conn.cursor() as cur:
+            cur.execute('SELECT * FROM test')
+
+            assert cur.rowcount == 1
+            assert cur.fetchone()[0] == 56
+
+def test_execute_file_bad_file(temp_db, tmp_path):
+    with psycopg2.connect('dbname=' + temp_db) as conn:
+        with pytest.raises(FileNotFoundError):
+            db_utils.execute_file(conn, tmp_path / 'test2.sql')
+
+def test_execute_file_bad_sql(temp_db, tmp_path):
+    tmpfile = tmp_path / 'test.sql'
+    tmpfile.write_text('CREATE STABLE test (id INT)')
+
+    with psycopg2.connect('dbname=' + temp_db) as conn:
+        with pytest.raises(psycopg2.ProgrammingError):
+            db_utils.execute_file(conn, tmpfile)