From: Sarah Hoffmann Date: Sun, 24 Jan 2021 13:35:35 +0000 (+0100) Subject: convert functon creation to python X-Git-Tag: v3.7.0~46^2~9 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/5b46fcad8ec166d1b8e35cfbc58405b71e27caa3 convert functon creation to python The new functions always creates normal and partitioned functions. Also adds specialised connection and cursor classes for adding frequently used helper functions. --- diff --git a/lib/admin/update.php b/lib/admin/update.php index 972a6fe5..a0fbbc46 100644 --- a/lib/admin/update.php +++ b/lib/admin/update.php @@ -143,12 +143,7 @@ if ($aResult['init-updates']) { } if (!$aResult['no-update-functions']) { - // instantiate setupClass to use the function therein - $cSetup = new SetupFunctions(array( - 'enable-diff-updates' => true, - 'verbose' => $aResult['verbose'] - )); - $cSetup->createFunctions(); + (clone($oNominatimCmd))->addParams('refresh', '--functions')->run(); } $sDatabaseDate = getDatabaseDate($oDB); diff --git a/lib/setup/SetupClass.php b/lib/setup/SetupClass.php index 635949b9..a865b8f0 100755 --- a/lib/setup/SetupClass.php +++ b/lib/setup/SetupClass.php @@ -290,9 +290,7 @@ class SetupFunctions public function createPartitionFunctions() { info('Create Partition Functions'); - - $sTemplate = file_get_contents(CONST_DataDir.'/sql/partition-functions.src.sql'); - $this->pgsqlRunPartitionScript($sTemplate); + $this->createSqlFunctions(); // also create partition functions } public function importWikipediaArticles() @@ -788,43 +786,18 @@ class SetupFunctions private function createSqlFunctions() { - $sBasePath = CONST_DataDir.'/sql/functions/'; - $sTemplate = file_get_contents($sBasePath.'utils.sql'); - $sTemplate .= file_get_contents($sBasePath.'normalization.sql'); - $sTemplate .= file_get_contents($sBasePath.'ranking.sql'); - $sTemplate .= file_get_contents($sBasePath.'importance.sql'); - $sTemplate .= file_get_contents($sBasePath.'address_lookup.sql'); - $sTemplate .= file_get_contents($sBasePath.'interpolation.sql'); - if ($this->db()->tableExists('place')) { - $sTemplate .= file_get_contents($sBasePath.'place_triggers.sql'); - } - if ($this->db()->tableExists('placex')) { - $sTemplate .= file_get_contents($sBasePath.'placex_triggers.sql'); - } - if ($this->db()->tableExists('location_postcode')) { - $sTemplate .= file_get_contents($sBasePath.'postcode_triggers.sql'); - } - $sTemplate = str_replace('{modulepath}', $this->sModulePath, $sTemplate); - if ($this->bEnableDiffUpdates) { - $sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate); + $oCmd = (clone($this->oNominatimCmd)) + ->addParams('refresh', '--functions'); + + if (!$this->bEnableDiffUpdates) { + $oCmd->addParams('--no-diff-updates'); } + if ($this->bEnableDebugStatements) { - $sTemplate = str_replace('--DEBUG:', '', $sTemplate); - } - if (getSettingBool('LIMIT_REINDEXING')) { - $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate); + $oCmd->addParams('--enable-debug-statements'); } - if (!getSettingBool('USE_US_TIGER_DATA')) { - $sTemplate = str_replace('-- %NOTIGERDATA% ', '', $sTemplate); - } - if (!getSettingBool('USE_AUX_LOCATION_DATA')) { - $sTemplate = str_replace('-- %NOAUXDATA% ', '', $sTemplate); - } - - $sReverseOnly = $this->dbReverseOnly() ? 'true' : 'false'; - $sTemplate = str_replace('%REVERSE-ONLY%', $sReverseOnly, $sTemplate); - $this->pgsqlRunScript($sTemplate); + $oCmd->run(); } private function pgsqlRunPartitionScript($sTemplate) diff --git a/nominatim/cli.py b/nominatim/cli.py index 00eb905e..4b38040c 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -8,10 +8,9 @@ import argparse import logging from pathlib import Path -import psycopg2 - from .config import Configuration from .tools.exec_utils import run_legacy_script, run_api_script +from .db.connection import connect LOG = logging.getLogger() @@ -370,27 +369,28 @@ class UpdateRefresh: @staticmethod def run(args): - import nominatim.tools.refresh + from .tools import refresh - conn = psycopg2.connect(args.config.get_libpq_dsn()) + conn = connect(args.config.get_libpq_dsn()) if args.postcodes: LOG.warning("Update postcodes centroid") - nominatim.tools.refresh.update_postcodes(conn, args.data_dir) + refresh.update_postcodes(conn, args.data_dir) + if args.word_counts: LOG.warning('Recompute frequency of full-word search terms') - nominatim.tools.refresh.recompute_word_counts(conn, args.data_dir) + refresh.recompute_word_counts(conn, args.data_dir) + if args.address_levels: cfg = Path(args.config.ADDRESS_LEVEL_CONFIG) LOG.warning('Updating address levels from %s', cfg) - nominatim.tools.refresh.load_address_levels_from_file(conn, cfg) + refresh.load_address_levels_from_file(conn, cfg) + if args.functions: - params = ['setup.php', '--create-functions', '--create-partition-functions'] - if args.diffs: - params.append('--enable-diff-updates') - if args.enable_debug_statements: - params.append('--enable-debug-statements') - run_legacy_script(*params, nominatim_env=args, throw_on_fail=True) + LOG.warning('Create functions') + refresh.create_functions(conn, args.config, args.data_dir, + args.diffs, args.enable_debug_statements) + if args.wiki_data: run_legacy_script('setup.php', '--import-wikipedia-articles', nominatim_env=args, throw_on_fail=True) diff --git a/nominatim/config.py b/nominatim/config.py index d4ba0d7a..3f75ce33 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -20,6 +20,7 @@ class Configuration: """ def __init__(self, project_dir, config_dir): + self.project_dir = project_dir self._config = dotenv_values(str((config_dir / 'env.defaults').resolve())) if project_dir is not None: self._config.update(dotenv_values(str((project_dir / '.env').resolve()))) @@ -36,6 +37,13 @@ class Configuration: return os.environ.get(name) or self._config[name] + def get_bool(self, name): + """ Return the given configuration parameters as a boolean. + Values of '1', 'yes' and 'true' are accepted as truthy values, + everything else is interpreted as false. + """ + return self.__getattr__(name).lower() in ('1', 'yes', 'true') + def get_libpq_dsn(self): """ Get configured database DSN converted into the key/value format understood by libpq and psycopg. diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index 885caca5..5fbb07f8 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -2,6 +2,7 @@ Functions for bringing auxiliary data in the database up-to-date. """ import json +import re from psycopg2.extras import execute_values @@ -69,3 +70,100 @@ def load_address_levels_from_file(conn, config_file): """ with config_file.open('r') as fdesc: load_address_levels(conn, 'address_levels', json.load(fdesc)) + +PLPGSQL_BASE_MODULES = ( + 'utils.sql', + 'normalization.sql', + 'ranking.sql', + 'importance.sql', + 'address_lookup.sql', + 'interpolation.sql' +) + +PLPGSQL_TABLE_MODULES = ( + ('place', 'place_triggers.sql'), + ('placex', 'placex_triggers.sql'), + ('location_postcode', 'postcode_triggers.sql') +) + +def _get_standard_function_sql(conn, config, sql_dir, enable_diff_updates, enable_debug): + """ Read all applicable SQLs containing PL/pgSQL functions, replace + placefolders and execute them. + """ + sql_func_dir = sql_dir / 'functions' + sql = '' + + # Get the basic set of functions that is always imported. + for sql_file in PLPGSQL_BASE_MODULES: + with (sql_func_dir / sql_file).open('r') as fdesc: + sql += fdesc.read() + + # Some files require the presence of a certain table + for table, fname in PLPGSQL_TABLE_MODULES: + if conn.table_exists(table): + with (sql_func_dir / fname).open('r') as fdesc: + sql += fdesc.read() + + # Replace placeholders. + sql = sql.replace('{modulepath}', + config.DATABASE_MODULE_PATH or str((config.project_dir / 'module').resolve())) + + if enable_diff_updates: + sql = sql.replace('RETURN NEW; -- %DIFFUPDATES%', '--') + + if enable_debug: + sql = sql.replace('--DEBUG:', '') + + if config.get_bool('LIMIT_REINDEXING'): + sql = sql.replace('--LIMIT INDEXING:', '') + + if not config.get_bool('USE_US_TIGER_DATA'): + sql = sql.replace('-- %NOTIGERDATA% ', '') + + if not config.get_bool('USE_AUX_LOCATION_DATA'): + sql = sql.replace('-- %NOAUXDATA% ', '') + + reverse_only = 'false' if conn.table_exists('search_name') else 'true' + + return sql.replace('%REVERSE-ONLY%', reverse_only) + + +def replace_partition_string(sql, partitions): + """ Replace a partition template with the actual partition code. + """ + for match in re.findall('^-- start(.*?)^-- end', sql, re.M | re.S): + repl = '' + for part in partitions: + repl += match.replace('-partition-', str(part)) + sql = sql.replace(match, repl) + + return sql + +def _get_partition_function_sql(conn, sql_dir): + """ Create functions that work on partition tables. + """ + with conn.cursor() as cur: + cur.execute('SELECT distinct partition FROM country_name') + partitions = set([0]) + for row in cur: + partitions.add(row[0]) + + with (sql_dir / 'partition-functions.src.sql').open('r') as fdesc: + sql = fdesc.read() + + return replace_partition_string(sql, sorted(partitions)) + +def create_functions(conn, config, data_dir, + enable_diff_updates=True, enable_debug=False): + """ (Re)create the PL/pgSQL functions. + """ + sql_dir = data_dir / 'sql' + + sql = _get_standard_function_sql(conn, config, sql_dir, + enable_diff_updates, enable_debug) + sql += _get_partition_function_sql(conn, sql_dir) + + with conn.cursor() as cur: + cur.execute(sql) + + conn.commit() diff --git a/test/python/test_cli.py b/test/python/test_cli.py index bd192260..942a9409 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -98,7 +98,6 @@ def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks): @pytest.mark.parametrize("command,params", [ - ('functions', ('setup.php',)), ('wiki-data', ('setup.php', '--import-wikipedia-articles')), ('importance', ('update.php', '--recompute-importance')), ('website', ('setup.php', '--setup-website')), @@ -114,6 +113,7 @@ def test_refresh_legacy_command(mock_run_legacy, command, params): ('postcodes', 'update_postcodes'), ('word-counts', 'recompute_word_counts'), ('address-levels', 'load_address_levels_from_file'), + ('functions', 'create_functions'), ]) def test_refresh_command(monkeypatch, command, func): func_mock = MockParamCapture() @@ -129,7 +129,6 @@ def test_refresh_importance_computed_after_wiki_import(mock_run_legacy): assert mock_run_legacy.called == 2 assert mock_run_legacy.last_args == ('update.php', '--recompute-importance') - @pytest.mark.parametrize("params", [ ('search', '--query', 'new'), ('reverse', '--lat', '0', '--lon', '0'), diff --git a/test/python/test_config.py b/test/python/test_config.py index bada9d86..064f71ba 100644 --- a/test/python/test_config.py +++ b/test/python/test_config.py @@ -15,6 +15,7 @@ def test_no_project_dir(): assert config.DATABASE_WEBUSER == 'www-data' + def test_prefer_project_setting_over_default(): with tempfile.TemporaryDirectory() as project_dir: with open(project_dir + '/.env', 'w') as envfile: @@ -24,6 +25,7 @@ def test_prefer_project_setting_over_default(): assert config.DATABASE_WEBUSER == 'apache' + def test_prefer_os_environ_over_project_setting(monkeypatch): with tempfile.TemporaryDirectory() as project_dir: with open(project_dir + '/.env', 'w') as envfile: @@ -35,6 +37,7 @@ def test_prefer_os_environ_over_project_setting(monkeypatch): assert config.DATABASE_WEBUSER == 'nobody' + def test_get_os_env_add_defaults(monkeypatch): config = Configuration(None, DEFCFG_DIR) @@ -42,6 +45,7 @@ def test_get_os_env_add_defaults(monkeypatch): assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'www-data' + def test_get_os_env_prefer_os_environ(monkeypatch): config = Configuration(None, DEFCFG_DIR) @@ -49,11 +53,13 @@ def test_get_os_env_prefer_os_environ(monkeypatch): assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'nobody' + def test_get_libpq_dsn_convert_default(): config = Configuration(None, DEFCFG_DIR) assert config.get_libpq_dsn() == 'dbname=nominatim' + def test_get_libpq_dsn_convert_php(monkeypatch): config = Configuration(None, DEFCFG_DIR) @@ -62,6 +68,7 @@ def test_get_libpq_dsn_convert_php(monkeypatch): assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost' + def test_get_libpq_dsn_convert_libpq(monkeypatch): config = Configuration(None, DEFCFG_DIR) @@ -69,3 +76,20 @@ def test_get_libpq_dsn_convert_libpq(monkeypatch): 'host=localhost dbname=gis password=foo') assert config.get_libpq_dsn() == 'host=localhost dbname=gis password=foo' + + +@pytest.mark.parametrize("value,result", + [(x, True) for x in ('1', 'true', 'True', 'yes', 'YES')] + + [(x, False) for x in ('0', 'false', 'no', 'NO', 'x')]) +def test_get_bool(monkeypatch, value, result): + config = Configuration(None, DEFCFG_DIR) + + monkeypatch.setenv('NOMINATIM_FOOBAR', value) + + assert config.get_bool('FOOBAR') == result + +def test_get_bool_empty(): + config = Configuration(None, DEFCFG_DIR) + + assert config.DATABASE_MODULE_PATH == '' + assert config.get_bool('DATABASE_MODULE_PATH') == False diff --git a/test/python/test_db_connection.py b/test/python/test_db_connection.py new file mode 100644 index 00000000..5c484558 --- /dev/null +++ b/test/python/test_db_connection.py @@ -0,0 +1,32 @@ +""" +Tests for specialised conenction and cursor classes. +""" +import pytest + +from nominatim.db.connection import connect + +@pytest.fixture +def db(temp_db): + conn = connect('dbname=' + temp_db) + yield conn + conn.close() + + +def test_connection_table_exists(db, temp_db_cursor): + assert db.table_exists('foobar') == False + + temp_db_cursor.execute('CREATE TABLE foobar (id INT)') + + assert db.table_exists('foobar') == True + + +def test_cursor_scalar(db, temp_db_cursor): + temp_db_cursor.execute('CREATE TABLE dummy (id INT)') + + with db.cursor() as cur: + assert cur.scalar('SELECT count(*) FROM dummy') == 0 + +def test_cursor_scalar_many_rows(db): + with db.cursor() as cur: + with pytest.raises(ValueError): + cur.scalar('SELECT * FROM pg_tables') diff --git a/test/python/test_tools_refresh_create_functions.py b/test/python/test_tools_refresh_create_functions.py new file mode 100644 index 00000000..4807e64f --- /dev/null +++ b/test/python/test_tools_refresh_create_functions.py @@ -0,0 +1,99 @@ +""" +Tests for creating PL/pgSQL functions for Nominatim. +""" +from pathlib import Path +import pytest + +from nominatim.db.connection import connect +from nominatim.tools.refresh import _get_standard_function_sql, _get_partition_function_sql + +SQL_DIR = (Path(__file__) / '..' / '..' / '..' / 'sql').resolve() + +@pytest.fixture +def db(temp_db): + conn = connect('dbname=' + temp_db) + yield conn + conn.close() + +@pytest.fixture +def db_with_tables(db): + with db.cursor() as cur: + for table in ('place', 'placex', 'location_postcode'): + cur.execute('CREATE TABLE {} (place_id BIGINT)'.format(table)) + + return db + + +def test_standard_functions_replace_module_default(db, def_config): + def_config.project_dir = Path('.') + sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False) + + assert sql + assert sql.find('{modulepath}') < 0 + assert sql.find("'{}'".format(Path('module/nominatim.so').resolve())) >= 0 + + +def test_standard_functions_replace_module_custom(monkeypatch, db, def_config): + monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', 'custom') + sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False) + + assert sql + assert sql.find('{modulepath}') < 0 + assert sql.find("'custom/nominatim.so'") >= 0 + + +@pytest.mark.parametrize("enabled", (True, False)) +def test_standard_functions_enable_diff(db_with_tables, def_config, enabled): + def_config.project_dir = Path('.') + sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, enabled, False) + + assert sql + assert (sql.find('%DIFFUPDATES%') < 0) == enabled + + +@pytest.mark.parametrize("enabled", (True, False)) +def test_standard_functions_enable_debug(db_with_tables, def_config, enabled): + def_config.project_dir = Path('.') + sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, enabled) + + assert sql + assert (sql.find('--DEBUG') < 0) == enabled + + +@pytest.mark.parametrize("enabled", (True, False)) +def test_standard_functions_enable_limit_reindexing(monkeypatch, db_with_tables, def_config, enabled): + def_config.project_dir = Path('.') + monkeypatch.setenv('NOMINATIM_LIMIT_REINDEXING', 'yes' if enabled else 'no') + sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False) + + assert sql + assert (sql.find('--LIMIT INDEXING') < 0) == enabled + + +@pytest.mark.parametrize("enabled", (True, False)) +def test_standard_functions_enable_tiger(monkeypatch, db_with_tables, def_config, enabled): + def_config.project_dir = Path('.') + monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA', 'yes' if enabled else 'no') + sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False) + + assert sql + assert (sql.find('%NOTIGERDATA%') >= 0) == enabled + + +@pytest.mark.parametrize("enabled", (True, False)) +def test_standard_functions_enable_aux(monkeypatch, db_with_tables, def_config, enabled): + def_config.project_dir = Path('.') + monkeypatch.setenv('NOMINATIM_USE_AUX_LOCATION_DATA', 'yes' if enabled else 'no') + sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False) + + assert sql + assert (sql.find('%NOAUXDATA%') >= 0) == enabled + + +def test_partition_function(temp_db_cursor, db, def_config): + temp_db_cursor.execute("CREATE TABLE country_name (partition SMALLINT)") + + sql = _get_partition_function_sql(db, SQL_DIR) + + assert sql + assert sql.find('-partition-') < 0