From: AntoJvlt Date: Mon, 22 Mar 2021 22:56:24 +0000 (+0100) Subject: Code cleaning, tests simplification and use of python3-icu package X-Git-Tag: v3.7.0~10^2~3 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/ff341985699f8148bedf142bea9c5deddaa03b6f Code cleaning, tests simplification and use of python3-icu package --- diff --git a/.github/actions/build-nominatim/action.yml b/.github/actions/build-nominatim/action.yml index 18bdc335..d0a89774 100644 --- a/.github/actions/build-nominatim/action.yml +++ b/.github/actions/build-nominatim/action.yml @@ -6,8 +6,7 @@ runs: steps: - name: Install prerequisites run: | - sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 - sudo pip install PyICU + sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu shell: bash - name: Download dependencies diff --git a/.pylintrc b/.pylintrc index da6dbe03..eab04181 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,6 +1,7 @@ [MASTER] extension-pkg-whitelist=osmium +ignored-modules=icu [MESSAGES CONTROL] diff --git a/cmake/tool-installed.tmpl b/cmake/tool-installed.tmpl index 6128dd2f..0b245dbb 100644 --- a/cmake/tool-installed.tmpl +++ b/cmake/tool-installed.tmpl @@ -2,9 +2,7 @@ import sys import os -sys.path.insert(0, '@NOMINATIM_LIBDIR@/lib-python') -#Add config directory to the python path for module importation -sys.path.insert(1, '@NOMINATIM_CONFIGDIR@/..') +sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python') os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__) diff --git a/lib-php/migration/phraseSettingsToJson.php b/lib-php/migration/phraseSettingsToJson.php index 10500eeb..15c49f0a 100644 --- a/lib-php/migration/phraseSettingsToJson.php +++ b/lib-php/migration/phraseSettingsToJson.php @@ -16,4 +16,4 @@ if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile) $jsonFile = fopen($jsonPhraseSettingsFile, 'w'); fwrite($jsonFile, json_encode($data)); fclose($jsonFile); -} \ No newline at end of file +} diff --git a/nominatim/tools/special_phrases.py b/nominatim/tools/special_phrases.py index f8373790..c0f472d6 100644 --- a/nominatim/tools/special_phrases.py +++ b/nominatim/tools/special_phrases.py @@ -5,10 +5,9 @@ import logging import os import re import subprocess -import sys import json from os.path import isfile -from icu import Transliterator # pylint: disable-msg=no-name-in-module +from icu import Transliterator from psycopg2.sql import Identifier, Literal, SQL from nominatim.tools.exec_utils import get_url @@ -32,7 +31,7 @@ def import_from_wiki(args, db_connection, languages=None): languages = _get_languages(args.config) if not languages else languages #array for pairs of class/type - pairs = dict() + class_type_pairs = set() transliterator = Transliterator.createFromRules("special-phrases normalizer", args.config.TERM_NORMALIZATION) @@ -63,14 +62,14 @@ def import_from_wiki(args, db_connection, languages=None): continue #add class/type to the pairs dict - pairs[f'{phrase_class}|{phrase_type}'] = (phrase_class, phrase_type) + class_type_pairs.add((phrase_class, phrase_type)) _process_amenity( db_connection, phrase_label, normalized_label, phrase_class, phrase_type, phrase_operator ) - _create_place_classtype_table_and_indexes(db_connection, args.config, pairs) + _create_place_classtype_table_and_indexes(db_connection, args.config, class_type_pairs) db_connection.commit() LOG.warning('Import done.') @@ -118,12 +117,8 @@ def _check_sanity(lang, phrase_class, phrase_type, pattern): Check sanity of given inputs in case somebody added garbage in the wiki. If a bad class/type is detected the system will exit with an error. """ - try: - if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1: - sys.exit() - except SystemExit: + if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1: LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type) - raise def _process_amenity(db_connection, phrase_label, normalized_label, @@ -147,7 +142,7 @@ def _process_amenity(db_connection, phrase_label, normalized_label, (phrase_label, normalized_label, phrase_class, phrase_type)) -def _create_place_classtype_table_and_indexes(db_connection, config, pairs): +def _create_place_classtype_table_and_indexes(db_connection, config, class_type_pairs): """ Create table place_classtype for each given pair. Also create indexes on place_id and centroid. @@ -161,7 +156,7 @@ def _create_place_classtype_table_and_indexes(db_connection, config, pairs): with db_connection.cursor() as db_cursor: db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)") - for _, pair in pairs.items(): + for pair in class_type_pairs.items(): phrase_class = pair[0] phrase_type = pair[1] @@ -188,53 +183,54 @@ def _create_place_classtype_table(db_connection, sql_tablespace, phrase_class, p """ Create table place_classtype of the given phrase_class/phrase_type if doesn't exit. """ + table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) with db_connection.cursor() as db_cursor: - db_cursor.execute(SQL(f""" - CREATE TABLE IF NOT EXISTS {{}} {sql_tablespace} + db_cursor.execute(SQL(""" + CREATE TABLE IF NOT EXISTS {{}} {} AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex - WHERE class = {{}} AND type = {{}}""") - .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'), - Literal(phrase_class), Literal(phrase_type))) + WHERE class = {{}} AND type = {{}}""".format(sql_tablespace)) + .format(Identifier(table_name), Literal(phrase_class), + Literal(phrase_type))) def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type): """ Create indexes on centroid and place_id for the place_classtype table. """ + index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type) + base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) #Index on centroid - if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid'): + if not db_connection.index_exists(index_prefix + 'centroid'): with db_connection.cursor() as db_cursor: - db_cursor.execute(SQL(f""" - CREATE INDEX {{}} ON {{}} USING GIST (centroid) {sql_tablespace}""") - .format(Identifier( - f"""idx_place_classtype_{phrase_class}_{phrase_type}_centroid"""), - Identifier(f'place_classtype_{phrase_class}_{phrase_type}'))) + db_cursor.execute(SQL(""" + CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace)) + .format(Identifier(index_prefix + 'centroid'), + Identifier(base_table)), sql_tablespace) #Index on place_id - if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id'): + if not db_connection.index_exists(index_prefix + 'place_id'): with db_connection.cursor() as db_cursor: - db_cursor.execute(SQL(f""" - CREATE INDEX {{}} ON {{}} USING btree(place_id) {sql_tablespace}""") - .format(Identifier( - f"""idx_place_classtype_{phrase_class}_{phrase_type}_place_id"""), - Identifier(f'place_classtype_{phrase_class}_{phrase_type}'))) + db_cursor.execute(SQL( + """CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace)) + .format(Identifier(index_prefix + 'place_id'), + Identifier(base_table))) def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type): """ Grant access on read to the table place_classtype for the webuser. """ + table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) with db_connection.cursor() as db_cursor: db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""") - .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'), - Identifier(config.DATABASE_WEBUSER))) + .format(Identifier(table_name), Identifier(config.DATABASE_WEBUSER))) def _convert_php_settings_if_needed(args, file_path): """ Convert php settings file of special phrases to json file if it is still in php format. """ file, extension = os.path.splitext(file_path) - json_file_path = f'{file}.json' + json_file_path = file + '.json' if extension == '.php' and not isfile(json_file_path): try: subprocess.run(['/usr/bin/env', 'php', '-Cq', diff --git a/settings/env.defaults b/settings/env.defaults index 78370cf4..4069270e 100644 --- a/settings/env.defaults +++ b/settings/env.defaults @@ -77,7 +77,7 @@ NOMINATIM_TIGER_DATA_PATH= NOMINATIM_WIKIPEDIA_DATA_PATH= # Configuration file for special phrase import. -# When unset, the internal default settings from 'settings/phrase_settings.py' +# When unset, the internal default settings from 'settings/phrase-settings.json' # are used. NOMINATIM_PHRASE_CONFIG= diff --git a/test/python/test_tools_import_special_phrases.py b/test/python/test_tools_import_special_phrases.py index d2693a9a..2b638866 100644 --- a/test/python/test_tools_import_special_phrases.py +++ b/test/python/test_tools_import_special_phrases.py @@ -4,75 +4,27 @@ import pytest from nominatim.tools.special_phrases import _create_place_classtype_indexes, _create_place_classtype_table, _get_wiki_content, _grant_access_to_webuser, _process_amenity -def test_get_wiki_content(): - assert _get_wiki_content('fr') +def test_process_amenity_with_operator(temp_db_conn, getorcreate_amenityoperator_funcs): + _process_amenity(temp_db_conn, '', '', '', '', 'near') + _process_amenity(temp_db_conn, '', '', '', '', 'in') -def execute_and_verify_add_word(temp_db_conn, phrase_label, normalized_label, - phrase_class, phrase_type): - _process_amenity(temp_db_conn, phrase_label, normalized_label, - phrase_class, phrase_type, '') - - with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute(f""" - SELECT * FROM word - WHERE word_token=' {normalized_label}' - AND word='{normalized_label}' - AND class='{phrase_class}' - AND type='{phrase_type}' - AND type='{phrase_type}'""") - return temp_db_cursor.fetchone() - -def execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, normalized_label, - phrase_class, phrase_type, phrase_operator): - _process_amenity(temp_db_conn, phrase_label, normalized_label, - phrase_class, phrase_type, phrase_operator) - - with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute(f""" - SELECT * FROM word - WHERE word_token=' {normalized_label}' - AND word='{normalized_label}' - AND class='{phrase_class}' - AND type='{phrase_type}' - AND operator='{phrase_operator}'""") - return temp_db_cursor.fetchone() - -def test_process_amenity_with_near_operator(temp_db_conn, word_table, amenity_operator_funcs): - phrase_label = ' label ' - normalized_label = 'label' - phrase_class = 'class' - phrase_type = 'type' - - assert execute_and_verify_add_word(temp_db_conn, phrase_label, normalized_label, - phrase_class, phrase_type) - assert execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, normalized_label, - phrase_class, phrase_type, 'near') - assert execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, normalized_label, - phrase_class, phrase_type, 'in') - -def index_exists(db_connect, index): - """ Check that an index with the given name exists in the database. - """ - with db_connect.cursor() as cur: - cur.execute("""SELECT tablename FROM pg_indexes - WHERE indexname = %s and schemaname = 'public'""", (index, )) - if cur.rowcount == 0: - return False - return True +def test_process_amenity_without_operator(temp_db_conn, getorcreate_amenity_funcs): + _process_amenity(temp_db_conn, '', '', '', '', '') def test_create_place_classtype_indexes(temp_db_conn): phrase_class = 'class' phrase_type = 'type' - table_name = f'place_classtype_{phrase_class}_{phrase_type}' + table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) + index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type) with temp_db_conn.cursor() as temp_db_cursor: temp_db_cursor.execute("CREATE EXTENSION postgis;") - temp_db_cursor.execute(f'CREATE TABLE {table_name}(place_id BIGINT, centroid GEOMETRY)') + temp_db_cursor.execute('CREATE TABLE {}(place_id BIGINT, centroid GEOMETRY)'.format(table_name)) _create_place_classtype_indexes(temp_db_conn, '', phrase_class, phrase_type) - centroid_index_exists = index_exists(temp_db_conn, f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid') - place_id_index_exists = index_exists(temp_db_conn, f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id') + centroid_index_exists = temp_db_conn.index_exists(index_prefix + 'centroid') + place_id_index_exists = temp_db_conn.index_exists(index_prefix + 'place_id') assert centroid_index_exists and place_id_index_exists @@ -93,10 +45,10 @@ def test_create_place_classtype_table(temp_db_conn, placex_table): def test_grant_access_to_web_user(temp_db_conn, def_config): phrase_class = 'class' phrase_type = 'type' - table_name = f'place_classtype_{phrase_class}_{phrase_type}' + table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute(f'CREATE TABLE {table_name}()') + temp_db_cursor.execute('CREATE TABLE {}()'.format(table_name)) _grant_access_to_webuser(temp_db_conn, def_config, phrase_class, phrase_type) @@ -110,65 +62,28 @@ def test_grant_access_to_web_user(temp_db_conn, def_config): assert result @pytest.fixture -def amenity_operator_funcs(temp_db_cursor): +def make_strandard_name_func(temp_db_cursor): temp_db_cursor.execute(f""" - CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT - AS $$ - DECLARE - o TEXT; + CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$ BEGIN RETURN trim(name); --Basically return only the trimed name for the tests END; - $$ - LANGUAGE plpgsql IMMUTABLE; - - CREATE SEQUENCE seq_word start 1; - + $$ LANGUAGE plpgsql IMMUTABLE;""") + +@pytest.fixture +def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func): + temp_db_cursor.execute(f""" CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text) - RETURNS INTEGER - AS $$ - DECLARE - lookup_token TEXT; - return_word_id INTEGER; - BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and word = normalized_word - and class = lookup_class and type = lookup_type - INTO return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, - lookup_class, lookup_type, null, 0); - END IF; - RETURN return_word_id; - END; - $$ - LANGUAGE plpgsql; + RETURNS void as $$ + BEGIN END; + $$ LANGUAGE plpgsql""") - CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, - normalized_word TEXT, - lookup_class text, - lookup_type text, - op text) - RETURNS INTEGER - AS $$ - DECLARE - lookup_token TEXT; - return_word_id INTEGER; - BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and word = normalized_word - and class = lookup_class and type = lookup_type and operator = op - INTO return_word_id; - IF return_word_id IS NULL THEN - return_word_id := nextval('seq_word'); - INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, - lookup_class, lookup_type, null, 0, op); - END IF; - RETURN return_word_id; - END; - $$ - LANGUAGE plpgsql;""") +@pytest.fixture +def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func): + temp_db_cursor.execute(f""" + CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT, + lookup_class text, lookup_type text, op text) + RETURNS void as $$ + BEGIN END; + $$ LANGUAGE plpgsql""") \ No newline at end of file diff --git a/vagrant/Install-on-Ubuntu-18.sh b/vagrant/Install-on-Ubuntu-18.sh index eb1bdc82..b90da871 100755 --- a/vagrant/Install-on-Ubuntu-18.sh +++ b/vagrant/Install-on-Ubuntu-18.sh @@ -29,13 +29,13 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: libbz2-dev libpq-dev libproj-dev \ postgresql-server-dev-10 postgresql-10-postgis-2.4 \ postgresql-contrib-10 postgresql-10-postgis-scripts \ - php php-pgsql php-intl python3-pip \ - python3-psycopg2 python3-psutil python3-jinja2 git libicu-dev + php php-pgsql php-intl libicu-dev python3-pip \ + python3-psycopg2 python3-psutil python3-jinja2 python3-icu git # The python-dotenv package that comes with Ubuntu 18.04 is too old, so # install the latest version from pip: - pip3 install python-dotenv PyICU + pip3 install python-dotenv # # System Configuration diff --git a/vagrant/Install-on-Ubuntu-20.sh b/vagrant/Install-on-Ubuntu-20.sh index 2b7a6a1b..d04f5796 100755 --- a/vagrant/Install-on-Ubuntu-20.sh +++ b/vagrant/Install-on-Ubuntu-20.sh @@ -32,11 +32,8 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: libbz2-dev libpq-dev libproj-dev \ postgresql-server-dev-12 postgresql-12-postgis-3 \ postgresql-contrib-12 postgresql-12-postgis-3-scripts \ - php php-pgsql php-intl python3-dotenv \ - python3-psycopg2 python3-psutil python3-jinja2 git libicu-dev -#Python packages: - - pip3 install PyICU + php php-pgsql php-intl libicu-dev python3-dotenv \ + python3-psycopg2 python3-psutil python3-jinja2 python3-icu git # # System Configuration