]> git.openstreetmap.org Git - nominatim.git/commitdiff
Code cleaning, tests simplification and use of python3-icu package
authorAntoJvlt <antonin.jolivat@gmail.com>
Mon, 22 Mar 2021 22:56:24 +0000 (23:56 +0100)
committerAntoJvlt <antonin.jolivat@gmail.com>
Tue, 23 Mar 2021 22:56:39 +0000 (23:56 +0100)
.github/actions/build-nominatim/action.yml
.pylintrc
cmake/tool-installed.tmpl
lib-php/migration/phraseSettingsToJson.php
nominatim/tools/special_phrases.py
settings/env.defaults
test/python/test_tools_import_special_phrases.py
vagrant/Install-on-Ubuntu-18.sh
vagrant/Install-on-Ubuntu-20.sh

index 18bdc335b42fa0a4f46e1d5a08b7b5ed58c66751..d0a89774637eb9238de77f767daa4451d047e34b 100644 (file)
@@ -6,8 +6,7 @@ runs:
     steps:
         - name: Install prerequisites
           run: |
-            sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2
-            sudo pip install PyICU
+            sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu
           shell: bash
 
         - name: Download dependencies
index da6dbe0376cd5285a6cc206f1c0f5beaadad0a79..eab041818058526209a36951c902e615095d7d23 100644 (file)
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,6 +1,7 @@
 [MASTER]
 
 extension-pkg-whitelist=osmium
+ignored-modules=icu
 
 [MESSAGES CONTROL]
 
index 6128dd2f73e868f7e8545dd0e227e8095f14d3df..0b245dbbe74b6dd43e34dfaaa6418aaa559cb570 100644 (file)
@@ -2,9 +2,7 @@
 import sys
 import os
 
-sys.path.insert(0, '@NOMINATIM_LIBDIR@/lib-python')
-#Add config directory to the python path for module importation
-sys.path.insert(1, '@NOMINATIM_CONFIGDIR@/..')
+sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')
 
 os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
 
index 10500eebf40830989ad89116cc724986e4511de6..15c49f0aa0094fa87bf452eed9eb5b9463e78714 100644 (file)
@@ -16,4 +16,4 @@ if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)
     $jsonFile = fopen($jsonPhraseSettingsFile, 'w');
     fwrite($jsonFile, json_encode($data));
     fclose($jsonFile);
-}
\ No newline at end of file
+}
index f8373790b624ad2c0016193aa8f4634307d348e3..c0f472d65feca9419a34389a47738e3e114975d6 100644 (file)
@@ -5,10 +5,9 @@ import logging
 import os
 import re
 import subprocess
-import sys
 import json
 from os.path import isfile
-from icu import Transliterator # pylint: disable-msg=no-name-in-module
+from icu import Transliterator
 from psycopg2.sql import Identifier, Literal, SQL
 from nominatim.tools.exec_utils import get_url
 
@@ -32,7 +31,7 @@ def import_from_wiki(args, db_connection, languages=None):
     languages = _get_languages(args.config) if not languages else languages
 
     #array for pairs of class/type
-    pairs = dict()
+    class_type_pairs = set()
 
     transliterator = Transliterator.createFromRules("special-phrases normalizer",
                                                     args.config.TERM_NORMALIZATION)
@@ -63,14 +62,14 @@ def import_from_wiki(args, db_connection, languages=None):
                 continue
 
             #add class/type to the pairs dict
-            pairs[f'{phrase_class}|{phrase_type}'] = (phrase_class, phrase_type)
+            class_type_pairs.add((phrase_class, phrase_type))
 
             _process_amenity(
                 db_connection, phrase_label, normalized_label,
                 phrase_class, phrase_type, phrase_operator
             )
 
-    _create_place_classtype_table_and_indexes(db_connection, args.config, pairs)
+    _create_place_classtype_table_and_indexes(db_connection, args.config, class_type_pairs)
     db_connection.commit()
     LOG.warning('Import done.')
 
@@ -118,12 +117,8 @@ def _check_sanity(lang, phrase_class, phrase_type, pattern):
         Check sanity of given inputs in case somebody added garbage in the wiki.
         If a bad class/type is detected the system will exit with an error.
     """
-    try:
-        if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
-            sys.exit()
-    except SystemExit:
+    if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
         LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type)
-        raise
 
 
 def _process_amenity(db_connection, phrase_label, normalized_label,
@@ -147,7 +142,7 @@ def _process_amenity(db_connection, phrase_label, normalized_label,
                               (phrase_label, normalized_label, phrase_class, phrase_type))
 
 
-def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
+def _create_place_classtype_table_and_indexes(db_connection, config, class_type_pairs):
     """
         Create table place_classtype for each given pair.
         Also create indexes on place_id and centroid.
@@ -161,7 +156,7 @@ def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
     with db_connection.cursor() as db_cursor:
         db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
 
-    for _, pair in pairs.items():
+    for pair in class_type_pairs.items():
         phrase_class = pair[0]
         phrase_type = pair[1]
 
@@ -188,53 +183,54 @@ def _create_place_classtype_table(db_connection, sql_tablespace, phrase_class, p
     """
         Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
     """
+    table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
     with db_connection.cursor() as db_cursor:
-        db_cursor.execute(SQL(f"""
-                CREATE TABLE IF NOT EXISTS {{}} {sql_tablespace
+        db_cursor.execute(SQL("""
+                CREATE TABLE IF NOT EXISTS {{}} {} 
                 AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex 
-                WHERE class = {{}} AND type = {{}}""")
-                          .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
-                                  Literal(phrase_class), Literal(phrase_type)))
+                WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
+                          .format(Identifier(table_name), Literal(phrase_class),
+                                  Literal(phrase_type)))
 
 
 def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type):
     """
         Create indexes on centroid and place_id for the place_classtype table.
     """
+    index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
+    base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
     #Index on centroid
-    if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid'):
+    if not db_connection.index_exists(index_prefix + 'centroid'):
         with db_connection.cursor() as db_cursor:
-            db_cursor.execute(SQL(f"""
-                    CREATE INDEX {{}} ON {{}} USING GIST (centroid) {sql_tablespace}""")
-                              .format(Identifier(
-                                  f"""idx_place_classtype_{phrase_class}_{phrase_type}_centroid"""),
-                                      Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
+            db_cursor.execute(SQL("""
+                CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
+                              .format(Identifier(index_prefix + 'centroid'),
+                                      Identifier(base_table)), sql_tablespace)
 
     #Index on place_id
-    if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id'):
+    if not db_connection.index_exists(index_prefix + 'place_id'):
         with db_connection.cursor() as db_cursor:
-            db_cursor.execute(SQL(f"""
-            CREATE INDEX {{}} ON {{}} USING btree(place_id) {sql_tablespace}""")
-                              .format(Identifier(
-                                  f"""idx_place_classtype_{phrase_class}_{phrase_type}_place_id"""),
-                                      Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
+            db_cursor.execute(SQL(
+                """CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
+                              .format(Identifier(index_prefix + 'place_id'),
+                                      Identifier(base_table)))
 
 
 def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
     """
         Grant access on read to the table place_classtype for the webuser.
     """
+    table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
     with db_connection.cursor() as db_cursor:
         db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
-                          .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
-                                  Identifier(config.DATABASE_WEBUSER)))
+                          .format(Identifier(table_name), Identifier(config.DATABASE_WEBUSER)))
 
 def _convert_php_settings_if_needed(args, file_path):
     """
         Convert php settings file of special phrases to json file if it is still in php format.
     """
     file, extension = os.path.splitext(file_path)
-    json_file_path = f'{file}.json'
+    json_file_path = file + '.json'
     if extension == '.php' and not isfile(json_file_path):
         try:
             subprocess.run(['/usr/bin/env', 'php', '-Cq',
index 78370cf48473a69c612ec1cf0d1520aa39d0cb70..4069270eb73161073fba7ed0af593a8c3730e69c 100644 (file)
@@ -77,7 +77,7 @@ NOMINATIM_TIGER_DATA_PATH=
 NOMINATIM_WIKIPEDIA_DATA_PATH=
 
 # Configuration file for special phrase import.
-# When unset, the internal default settings from 'settings/phrase_settings.py'
+# When unset, the internal default settings from 'settings/phrase-settings.json'
 # are used.
 NOMINATIM_PHRASE_CONFIG=
 
index d2693a9a702251fb8b0e9d4e7c58720424f18e2c..2b638866db4e65da6d0e4d96db7a434090a8c932 100644 (file)
@@ -4,75 +4,27 @@
 import pytest
 from nominatim.tools.special_phrases import _create_place_classtype_indexes, _create_place_classtype_table, _get_wiki_content, _grant_access_to_webuser, _process_amenity
 
-def test_get_wiki_content():
-    assert _get_wiki_content('fr')
+def test_process_amenity_with_operator(temp_db_conn, getorcreate_amenityoperator_funcs):
+    _process_amenity(temp_db_conn, '', '', '', '', 'near')
+    _process_amenity(temp_db_conn, '', '', '', '', 'in')
 
-def execute_and_verify_add_word(temp_db_conn, phrase_label, normalized_label, 
-                                phrase_class, phrase_type):
-    _process_amenity(temp_db_conn, phrase_label, normalized_label,
-                     phrase_class, phrase_type, '')
-
-    with temp_db_conn.cursor() as temp_db_cursor:
-        temp_db_cursor.execute(f"""
-            SELECT * FROM word 
-            WHERE word_token=' {normalized_label}'
-            AND word='{normalized_label}'
-            AND class='{phrase_class}'
-            AND type='{phrase_type}'
-            AND type='{phrase_type}'""")
-        return temp_db_cursor.fetchone()
-
-def execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, normalized_label,
-                                              phrase_class, phrase_type, phrase_operator):
-    _process_amenity(temp_db_conn, phrase_label, normalized_label,
-                     phrase_class, phrase_type, phrase_operator)
-
-    with temp_db_conn.cursor() as temp_db_cursor:
-        temp_db_cursor.execute(f"""
-            SELECT * FROM word 
-            WHERE word_token=' {normalized_label}'
-            AND word='{normalized_label}'
-            AND class='{phrase_class}'
-            AND type='{phrase_type}'
-            AND operator='{phrase_operator}'""")
-        return temp_db_cursor.fetchone()
-
-def test_process_amenity_with_near_operator(temp_db_conn, word_table, amenity_operator_funcs):
-    phrase_label = ' label    '
-    normalized_label = 'label'
-    phrase_class = 'class'
-    phrase_type = 'type'
-
-    assert execute_and_verify_add_word(temp_db_conn, phrase_label, normalized_label, 
-                                       phrase_class, phrase_type)
-    assert execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, normalized_label, 
-                                                     phrase_class, phrase_type, 'near')
-    assert execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, normalized_label, 
-                                                     phrase_class, phrase_type, 'in')
-
-def index_exists(db_connect, index):
-        """ Check that an index with the given name exists in the database.
-        """
-        with db_connect.cursor() as cur:
-            cur.execute("""SELECT tablename FROM pg_indexes
-                           WHERE indexname = %s and schemaname = 'public'""", (index, ))
-            if cur.rowcount == 0:
-                return False
-        return True
+def test_process_amenity_without_operator(temp_db_conn, getorcreate_amenity_funcs):
+    _process_amenity(temp_db_conn, '', '', '', '', '')
 
 def test_create_place_classtype_indexes(temp_db_conn):
     phrase_class = 'class'
     phrase_type = 'type'
-    table_name = f'place_classtype_{phrase_class}_{phrase_type}'
+    table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+    index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
 
     with temp_db_conn.cursor() as temp_db_cursor:
         temp_db_cursor.execute("CREATE EXTENSION postgis;")
-        temp_db_cursor.execute(f'CREATE TABLE {table_name}(place_id BIGINT, centroid GEOMETRY)')
+        temp_db_cursor.execute('CREATE TABLE {}(place_id BIGINT, centroid GEOMETRY)'.format(table_name))
 
     _create_place_classtype_indexes(temp_db_conn, '', phrase_class, phrase_type)
 
-    centroid_index_exists = index_exists(temp_db_conn, f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid')
-    place_id_index_exists = index_exists(temp_db_conn, f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id')
+    centroid_index_exists = temp_db_conn.index_exists(index_prefix + 'centroid')
+    place_id_index_exists = temp_db_conn.index_exists(index_prefix + 'place_id')
 
     assert centroid_index_exists and place_id_index_exists
 
@@ -93,10 +45,10 @@ def test_create_place_classtype_table(temp_db_conn, placex_table):
 def test_grant_access_to_web_user(temp_db_conn, def_config):
     phrase_class = 'class'
     phrase_type = 'type'
-    table_name = f'place_classtype_{phrase_class}_{phrase_type}'
+    table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
 
     with temp_db_conn.cursor() as temp_db_cursor:
-        temp_db_cursor.execute(f'CREATE TABLE {table_name}()')
+        temp_db_cursor.execute('CREATE TABLE {}()'.format(table_name))
 
     _grant_access_to_webuser(temp_db_conn, def_config, phrase_class, phrase_type)
 
@@ -110,65 +62,28 @@ def test_grant_access_to_web_user(temp_db_conn, def_config):
     assert result
 
 @pytest.fixture
-def amenity_operator_funcs(temp_db_cursor):                        
+def make_strandard_name_func(temp_db_cursor):
     temp_db_cursor.execute(f"""
-        CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
-        AS $$
-        DECLARE
-        o TEXT;
+        CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$
         BEGIN
         RETURN trim(name); --Basically return only the trimed name for the tests
         END;
-        $$
-        LANGUAGE plpgsql IMMUTABLE;
-
-        CREATE SEQUENCE seq_word start 1;
-
+        $$ LANGUAGE plpgsql IMMUTABLE;""")
+        
+@pytest.fixture
+def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
+    temp_db_cursor.execute(f"""
         CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
                                                     lookup_class text, lookup_type text)
-        RETURNS INTEGER
-        AS $$
-        DECLARE
-        lookup_token TEXT;
-        return_word_id INTEGER;
-        BEGIN
-        lookup_token := ' '||trim(lookup_word);
-        SELECT min(word_id) FROM word
-        WHERE word_token = lookup_token and word = normalized_word
-                and class = lookup_class and type = lookup_type
-        INTO return_word_id;
-        IF return_word_id IS NULL THEN
-            return_word_id := nextval('seq_word');
-            INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
-                                    lookup_class, lookup_type, null, 0);
-        END IF;
-        RETURN return_word_id;
-        END;
-        $$
-        LANGUAGE plpgsql;
+        RETURNS void as $$
+        BEGIN END;
+        $$ LANGUAGE plpgsql""")
 
-        CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
-                                                       normalized_word TEXT,
-                                                       lookup_class text,
-                                                       lookup_type text,
-                                                       op text)
-        RETURNS INTEGER
-        AS $$
-        DECLARE
-        lookup_token TEXT;
-        return_word_id INTEGER;
-        BEGIN
-        lookup_token := ' '||trim(lookup_word);
-        SELECT min(word_id) FROM word
-        WHERE word_token = lookup_token and word = normalized_word
-                and class = lookup_class and type = lookup_type and operator = op
-        INTO return_word_id;
-        IF return_word_id IS NULL THEN
-            return_word_id := nextval('seq_word');
-            INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
-                                    lookup_class, lookup_type, null, 0, op);
-        END IF;
-        RETURN return_word_id;
-        END;
-        $$
-        LANGUAGE plpgsql;""")
+@pytest.fixture
+def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func):
+    temp_db_cursor.execute(f"""
+        CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT,
+                                                    lookup_class text, lookup_type text, op text)
+        RETURNS void as $$
+        BEGIN END;
+        $$ LANGUAGE plpgsql""")
\ No newline at end of file
index eb1bdc82409e63bcdca9154402a116e501a39360..b90da87195de0509845926688baa4b69c37e92ba 100755 (executable)
@@ -29,13 +29,13 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
                         libbz2-dev libpq-dev libproj-dev \
                         postgresql-server-dev-10 postgresql-10-postgis-2.4 \
                         postgresql-contrib-10 postgresql-10-postgis-scripts \
-                        php php-pgsql php-intl python3-pip \
-                        python3-psycopg2 python3-psutil python3-jinja2 git libicu-dev
+                        php php-pgsql php-intl libicu-dev python3-pip \
+                        python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
 
 # The python-dotenv package that comes with Ubuntu 18.04 is too old, so
 # install the latest version from pip:
 
-    pip3 install python-dotenv PyICU
+    pip3 install python-dotenv
 
 #
 # System Configuration
index 2b7a6a1b815ed1f76d47ecca6974d5eba1db2c95..d04f5796180a19efef59ae84a75b2b20cb23b321 100755 (executable)
@@ -32,11 +32,8 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
                         libbz2-dev libpq-dev libproj-dev \
                         postgresql-server-dev-12 postgresql-12-postgis-3 \
                         postgresql-contrib-12 postgresql-12-postgis-3-scripts \
-                        php php-pgsql php-intl python3-dotenv \
-                        python3-psycopg2 python3-psutil python3-jinja2 git libicu-dev
-#Python packages:
-
-    pip3 install PyICU
+                        php php-pgsql php-intl libicu-dev python3-dotenv \
+                        python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
 
 #
 # System Configuration