]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tools/special_phrases.py
Code cleaning, tests simplification and use of python3-icu package
[nominatim.git] / nominatim / tools / special_phrases.py
index 3dead38b9ffb5cd50c91a613f209ecd59afc0427..c0f472d65feca9419a34389a47738e3e114975d6 100644 (file)
@@ -5,9 +5,9 @@ import logging
 import os
 import re
 import subprocess
-import sys
 import json
 from os.path import isfile
+from icu import Transliterator
 from psycopg2.sql import Identifier, Literal, SQL
 from nominatim.tools.exec_utils import get_url
 
@@ -27,10 +27,15 @@ def import_from_wiki(args, db_connection, languages=None):
     )
     sanity_check_pattern = re.compile(r'^\w+$')
 
+    #Get all languages to process.
     languages = _get_languages(args.config) if not languages else languages
 
     #array for pairs of class/type
-    pairs = dict()
+    class_type_pairs = set()
+
+    transliterator = Transliterator.createFromRules("special-phrases normalizer",
+                                                    args.config.TERM_NORMALIZATION)
+
     for lang in languages:
         LOG.warning('Import phrases for lang: %s', lang)
         wiki_page_xml_content = _get_wiki_content(lang)
@@ -39,6 +44,7 @@ def import_from_wiki(args, db_connection, languages=None):
 
         for match in matches:
             phrase_label = match[0].strip()
+            normalized_label = transliterator.transliterate(phrase_label)
             phrase_class = match[1].strip()
             phrase_type = match[2].strip()
             phrase_operator = match[3].strip()
@@ -56,13 +62,14 @@ def import_from_wiki(args, db_connection, languages=None):
                 continue
 
             #add class/type to the pairs dict
-            pairs[f'{phrase_class}|{phrase_type}'] = (phrase_class, phrase_type)
+            class_type_pairs.add((phrase_class, phrase_type))
 
             _process_amenity(
-                db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
+                db_connection, phrase_label, normalized_label,
+                phrase_class, phrase_type, phrase_operator
             )
 
-    _create_place_classtype_table_and_indexes(db_connection, args.config, pairs)
+    _create_place_classtype_table_and_indexes(db_connection, args.config, class_type_pairs)
     db_connection.commit()
     LOG.warning('Import done.')
 
@@ -110,34 +117,32 @@ def _check_sanity(lang, phrase_class, phrase_type, pattern):
         Check sanity of given inputs in case somebody added garbage in the wiki.
         If a bad class/type is detected the system will exit with an error.
     """
-    try:
-        if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
-            sys.exit()
-    except SystemExit:
+    if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
         LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type)
-        raise
 
 
-def _process_amenity(db_connection, phrase_label, phrase_class, phrase_type, phrase_operator):
+def _process_amenity(db_connection, phrase_label, normalized_label,
+                     phrase_class, phrase_type, phrase_operator):
+    # pylint: disable-msg=too-many-arguments
     """
         Add phrase lookup and corresponding class and type to the word table based on the operator.
     """
     with db_connection.cursor() as db_cursor:
         if phrase_operator == 'near':
             db_cursor.execute("""SELECT getorcreate_amenityoperator(
-                              make_standard_name(%s), %s, %s, 'near')""",
-                              (phrase_label, phrase_class, phrase_type))
+                              make_standard_name(%s), %s, %s, %s, 'near')""",
+                              (phrase_label, normalized_label, phrase_class, phrase_type))
         elif phrase_operator == 'in':
             db_cursor.execute("""SELECT getorcreate_amenityoperator(
-                              make_standard_name(%s), %s, %s, 'in')""",
-                              (phrase_label, phrase_class, phrase_type))
+                              make_standard_name(%s), %s, %s, %s, 'in')""",
+                              (phrase_label, normalized_label, phrase_class, phrase_type))
         else:
             db_cursor.execute("""SELECT getorcreate_amenity(
-                              make_standard_name(%s), %s, %s)""",
-                              (phrase_label, phrase_class, phrase_type))
+                              make_standard_name(%s), %s, %s, %s)""",
+                              (phrase_label, normalized_label, phrase_class, phrase_type))
 
 
-def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
+def _create_place_classtype_table_and_indexes(db_connection, config, class_type_pairs):
     """
         Create table place_classtype for each given pair.
         Also create indexes on place_id and centroid.
@@ -151,7 +156,7 @@ def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
     with db_connection.cursor() as db_cursor:
         db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
 
-    for _, pair in pairs.items():
+    for pair in class_type_pairs.items():
         phrase_class = pair[0]
         phrase_type = pair[1]
 
@@ -178,53 +183,54 @@ def _create_place_classtype_table(db_connection, sql_tablespace, phrase_class, p
     """
         Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
     """
+    table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
     with db_connection.cursor() as db_cursor:
-        db_cursor.execute(SQL(f"""
-                CREATE TABLE IF NOT EXISTS {{}} {sql_tablespace
+        db_cursor.execute(SQL("""
+                CREATE TABLE IF NOT EXISTS {{}} {} 
                 AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex 
-                WHERE class = {{}} AND type = {{}}""")
-                          .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
-                                  Literal(phrase_class), Literal(phrase_type)))
+                WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
+                          .format(Identifier(table_name), Literal(phrase_class),
+                                  Literal(phrase_type)))
 
 
 def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type):
     """
         Create indexes on centroid and place_id for the place_classtype table.
     """
+    index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
+    base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
     #Index on centroid
-    if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid'):
+    if not db_connection.index_exists(index_prefix + 'centroid'):
         with db_connection.cursor() as db_cursor:
-            db_cursor.execute(SQL(f"""
-                    CREATE INDEX {{}} ON {{}} USING GIST (centroid) {sql_tablespace}""")
-                              .format(Identifier(
-                                  f"""idx_place_classtype_{phrase_class}_{phrase_type}_centroid"""),
-                                      Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
+            db_cursor.execute(SQL("""
+                CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
+                              .format(Identifier(index_prefix + 'centroid'),
+                                      Identifier(base_table)), sql_tablespace)
 
     #Index on place_id
-    if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id'):
+    if not db_connection.index_exists(index_prefix + 'place_id'):
         with db_connection.cursor() as db_cursor:
-            db_cursor.execute(SQL(f"""
-            CREATE INDEX {{}} ON {{}} USING btree(place_id) {sql_tablespace}""")
-                              .format(Identifier(
-                                  f"""idx_place_classtype_{phrase_class}_{phrase_type}_place_id"""),
-                                      Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
+            db_cursor.execute(SQL(
+                """CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
+                              .format(Identifier(index_prefix + 'place_id'),
+                                      Identifier(base_table)))
 
 
 def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
     """
         Grant access on read to the table place_classtype for the webuser.
     """
+    table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
     with db_connection.cursor() as db_cursor:
         db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
-                          .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
-                                  Identifier(config.DATABASE_WEBUSER)))
+                          .format(Identifier(table_name), Identifier(config.DATABASE_WEBUSER)))
 
 def _convert_php_settings_if_needed(args, file_path):
     """
         Convert php settings file of special phrases to json file if it is still in php format.
     """
     file, extension = os.path.splitext(file_path)
-    json_file_path = f'{file}.json'
+    json_file_path = file + '.json'
     if extension == '.php' and not isfile(json_file_path):
         try:
             subprocess.run(['/usr/bin/env', 'php', '-Cq',