import os
import re
import subprocess
-import sys
import json
from os.path import isfile
+from icu import Transliterator
from psycopg2.sql import Identifier, Literal, SQL
from nominatim.tools.exec_utils import get_url
)
sanity_check_pattern = re.compile(r'^\w+$')
+ #Get all languages to process.
languages = _get_languages(args.config) if not languages else languages
#array for pairs of class/type
- pairs = dict()
+ class_type_pairs = set()
+
+ transliterator = Transliterator.createFromRules("special-phrases normalizer",
+ args.config.TERM_NORMALIZATION)
+
for lang in languages:
LOG.warning('Import phrases for lang: %s', lang)
wiki_page_xml_content = _get_wiki_content(lang)
for match in matches:
phrase_label = match[0].strip()
+ normalized_label = transliterator.transliterate(phrase_label)
phrase_class = match[1].strip()
phrase_type = match[2].strip()
phrase_operator = match[3].strip()
continue
#add class/type to the pairs dict
- pairs[f'{phrase_class}|{phrase_type}'] = (phrase_class, phrase_type)
+ class_type_pairs.add((phrase_class, phrase_type))
_process_amenity(
- db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
+ db_connection, phrase_label, normalized_label,
+ phrase_class, phrase_type, phrase_operator
)
- _create_place_classtype_table_and_indexes(db_connection, args.config, pairs)
+ _create_place_classtype_table_and_indexes(db_connection, args.config, class_type_pairs)
db_connection.commit()
LOG.warning('Import done.')
Check sanity of given inputs in case somebody added garbage in the wiki.
If a bad class/type is detected the system will exit with an error.
"""
- try:
- if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
- sys.exit()
- except SystemExit:
+ if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type)
- raise
-def _process_amenity(db_connection, phrase_label, phrase_class, phrase_type, phrase_operator):
+def _process_amenity(db_connection, phrase_label, normalized_label,
+ phrase_class, phrase_type, phrase_operator):
+ # pylint: disable-msg=too-many-arguments
"""
Add phrase lookup and corresponding class and type to the word table based on the operator.
"""
with db_connection.cursor() as db_cursor:
if phrase_operator == 'near':
db_cursor.execute("""SELECT getorcreate_amenityoperator(
- make_standard_name(%s), %s, %s, 'near')""",
- (phrase_label, phrase_class, phrase_type))
+ make_standard_name(%s), %s, %s, %s, 'near')""",
+ (phrase_label, normalized_label, phrase_class, phrase_type))
elif phrase_operator == 'in':
db_cursor.execute("""SELECT getorcreate_amenityoperator(
- make_standard_name(%s), %s, %s, 'in')""",
- (phrase_label, phrase_class, phrase_type))
+ make_standard_name(%s), %s, %s, %s, 'in')""",
+ (phrase_label, normalized_label, phrase_class, phrase_type))
else:
db_cursor.execute("""SELECT getorcreate_amenity(
- make_standard_name(%s), %s, %s)""",
- (phrase_label, phrase_class, phrase_type))
+ make_standard_name(%s), %s, %s, %s)""",
+ (phrase_label, normalized_label, phrase_class, phrase_type))
-def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
+def _create_place_classtype_table_and_indexes(db_connection, config, class_type_pairs):
"""
Create table place_classtype for each given pair.
Also create indexes on place_id and centroid.
with db_connection.cursor() as db_cursor:
db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
- for _, pair in pairs.items():
+ for pair in class_type_pairs.items():
phrase_class = pair[0]
phrase_type = pair[1]
"""
Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
"""
+ table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with db_connection.cursor() as db_cursor:
- db_cursor.execute(SQL(f"""
- CREATE TABLE IF NOT EXISTS {{}} {sql_tablespace}
+ db_cursor.execute(SQL("""
+ CREATE TABLE IF NOT EXISTS {{}} {}
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
- WHERE class = {{}} AND type = {{}}""")
- .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
- Literal(phrase_class), Literal(phrase_type)))
+ WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
+ .format(Identifier(table_name), Literal(phrase_class),
+ Literal(phrase_type)))
def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type):
"""
Create indexes on centroid and place_id for the place_classtype table.
"""
+ index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
+ base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
#Index on centroid
- if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid'):
+ if not db_connection.index_exists(index_prefix + 'centroid'):
with db_connection.cursor() as db_cursor:
- db_cursor.execute(SQL(f"""
- CREATE INDEX {{}} ON {{}} USING GIST (centroid) {sql_tablespace}""")
- .format(Identifier(
- f"""idx_place_classtype_{phrase_class}_{phrase_type}_centroid"""),
- Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
+ db_cursor.execute(SQL("""
+ CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
+ .format(Identifier(index_prefix + 'centroid'),
+ Identifier(base_table)), sql_tablespace)
#Index on place_id
- if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id'):
+ if not db_connection.index_exists(index_prefix + 'place_id'):
with db_connection.cursor() as db_cursor:
- db_cursor.execute(SQL(f"""
- CREATE INDEX {{}} ON {{}} USING btree(place_id) {sql_tablespace}""")
- .format(Identifier(
- f"""idx_place_classtype_{phrase_class}_{phrase_type}_place_id"""),
- Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
+ db_cursor.execute(SQL(
+ """CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
+ .format(Identifier(index_prefix + 'place_id'),
+ Identifier(base_table)))
def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
"""
Grant access on read to the table place_classtype for the webuser.
"""
+ table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with db_connection.cursor() as db_cursor:
db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
- .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
- Identifier(config.DATABASE_WEBUSER)))
+ .format(Identifier(table_name), Identifier(config.DATABASE_WEBUSER)))
def _convert_php_settings_if_needed(args, file_path):
"""
Convert php settings file of special phrases to json file if it is still in php format.
"""
file, extension = os.path.splitext(file_path)
- json_file_path = f'{file}.json'
+ json_file_path = file + '.json'
if extension == '.php' and not isfile(json_file_path):
try:
subprocess.run(['/usr/bin/env', 'php', '-Cq',