]> git.openstreetmap.org Git - nominatim.git/commitdiff
Changed phrase_settings.py to phrase-settings.json and added migration function for...
authorAntoJvlt <antonin.jolivat@gmail.com>
Sun, 21 Mar 2021 23:07:55 +0000 (00:07 +0100)
committerAntoJvlt <antonin.jolivat@gmail.com>
Tue, 23 Mar 2021 22:30:39 +0000 (23:30 +0100)
CMakeLists.txt
lib-php/admin/specialphrases.php [new file with mode: 0644]
lib-php/migration/phraseSettingsToJson.php
nominatim/clicmd/special_phrases.py
nominatim/tools/special_phrases.py
settings/__init__.py [deleted file]
settings/phrase-settings.json [new file with mode: 0644]
settings/phrase_settings.py [deleted file]

index 45881a4ab47babdf45e7da074c1757cda0153a32..1c6336a4c8c23c303d47851361ebfe0e434158f4 100644 (file)
@@ -216,7 +216,7 @@ endif()
 include(GNUInstallDirs)
 set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
 set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
 include(GNUInstallDirs)
 set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
 set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
-set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME}/settings)
+set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME})
 
 if (BUILD_IMPORTER)
     configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
 
 if (BUILD_IMPORTER)
     configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
@@ -257,9 +257,8 @@ if (BUILD_API)
 endif()
 
 install(FILES settings/env.defaults
 endif()
 
 install(FILES settings/env.defaults
-              settings/__init__.py
               settings/address-levels.json
               settings/address-levels.json
-              settings/phrase_settings.py
+              settings/phrase-settings.json
               settings/import-admin.style
               settings/import-street.style
               settings/import-address.style
               settings/import-admin.style
               settings/import-street.style
               settings/import-address.style
diff --git a/lib-php/admin/specialphrases.php b/lib-php/admin/specialphrases.php
new file mode 100644 (file)
index 0000000..9b90387
--- /dev/null
@@ -0,0 +1,163 @@
+<?php
+@define('CONST_LibDir', dirname(dirname(__FILE__)));
+
+require_once(CONST_LibDir.'/init-cmd.php');
+ini_set('memory_limit', '800M');
+ini_set('display_errors', 'stderr');
+
+$aCMDOptions
+= array(
+   'Import and export special phrases',
+   array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
+   array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
+   array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
+   array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '),
+   array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
+  );
+getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
+
+loadSettings($aCMDResult['project-dir'] ?? getcwd());
+setupHTTPProxy();
+
+include(getSettingConfig('PHRASE_CONFIG', 'phrase_settings.php'));
+
+if ($aCMDResult['wiki-import']) {
+    $oNormalizer = Transliterator::createFromRules(getSetting('TERM_NORMALIZATION'));
+    $aPairs = array();
+
+    $sLanguageIn = getSetting(
+        'LANGUAGES',
+        'af,ar,br,ca,cs,de,en,es,et,eu,fa,fi,fr,gl,hr,hu,'.
+        'ia,is,it,ja,mk,nl,no,pl,ps,pt,ru,sk,sl,sv,uk,vi'
+    );
+
+    foreach (explode(',', $sLanguageIn) as $sLanguage) {
+        $sURL = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage);
+        $sWikiPageXML = file_get_contents($sURL);
+
+        if (!preg_match_all(
+            '#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#',
+            $sWikiPageXML,
+            $aMatches,
+            PREG_SET_ORDER
+        )) {
+            continue;
+        }
+
+        foreach ($aMatches as $aMatch) {
+            $sLabel = trim($aMatch[1]);
+            if ($oNormalizer !== null) {
+                $sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
+            } else {
+                $sTrans = null;
+            }
+            $sClass = trim($aMatch[2]);
+            $sType = trim($aMatch[3]);
+            // hack around a bug where building=yes was imported with
+            // quotes into the wiki
+            $sType = preg_replace('/(&quot;|")/', '', $sType);
+            // sanity check, in case somebody added garbage in the wiki
+            if (preg_match('/^\\w+$/', $sClass) < 1
+                || preg_match('/^\\w+$/', $sType) < 1
+            ) {
+                trigger_error("Bad class/type for language $sLanguage: $sClass=$sType");
+                exit;
+            }
+            // blacklisting: disallow certain class/type combinations
+            if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) {
+                // fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n");
+                continue;
+            }
+            // whitelisting: if class is in whitelist, allow only tags in the list
+            if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) {
+                // fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n");
+                continue;
+            }
+            $aPairs[$sClass.'|'.$sType] = array($sClass, $sType);
+
+            switch (trim($aMatch[4])) {
+                case 'near':
+                    printf(
+                        "SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'near');\n",
+                        pg_escape_string($sLabel),
+                        $sTrans,
+                        $sClass,
+                        $sType
+                    );
+                    break;
+                case 'in':
+                    printf(
+                        "SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'in');\n",
+                        pg_escape_string($sLabel),
+                        $sTrans,
+                        $sClass,
+                        $sType
+                    );
+                    break;
+                default:
+                    printf(
+                        "SELECT getorcreate_amenity(make_standard_name('%s'), '%s', '%s', '%s');\n",
+                        pg_escape_string($sLabel),
+                        $sTrans,
+                        $sClass,
+                        $sType
+                    );
+                    break;
+            }
+        }
+    }
+
+    echo 'CREATE INDEX idx_placex_classtype ON placex (class, type);';
+
+    foreach ($aPairs as $aPair) {
+        $sql_tablespace = getSetting('TABLESPACE_AUX_DATA');
+        if ($sql_tablespace) {
+            $sql_tablespace = ' TABLESPACE '.$sql_tablespace;
+        }
+
+        printf(
+            'CREATE TABLE place_classtype_%s_%s'
+            . $sql_tablespace
+            . ' AS'
+            . ' SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex'
+            . " WHERE class = '%s' AND type = '%s'"
+            . ";\n",
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1]),
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1])
+        );
+
+        printf(
+            'CREATE INDEX idx_place_classtype_%s_%s_centroid'
+            . ' ON place_classtype_%s_%s USING GIST (centroid)'
+            . $sql_tablespace
+            . ";\n",
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1]),
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1])
+        );
+
+        printf(
+            'CREATE INDEX idx_place_classtype_%s_%s_place_id'
+            . ' ON place_classtype_%s_%s USING btree(place_id)'
+            . $sql_tablespace
+            . ";\n",
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1]),
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1])
+        );
+
+        printf(
+            'GRANT SELECT ON place_classtype_%s_%s TO "%s"'
+            . ";\n",
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1]),
+            getSetting('DATABASE_WEBUSER')
+        );
+    }
+
+    echo 'DROP INDEX idx_placex_classtype;';
+}
\ No newline at end of file
index 15c49f0aa0094fa87bf452eed9eb5b9463e78714..187e3fc6cc0600cd601724b1e913f457baa3fcfd 100644 (file)
@@ -1,9 +1,16 @@
 <?php
 
 $phpPhraseSettingsFile = $argv[1];
 <?php
 
 $phpPhraseSettingsFile = $argv[1];
+<<<<<<< HEAD
 $jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
 
 if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
 $jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
 
 if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
+=======
+$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile)."/".basename($phpPhraseSettingsFile, ".php").".json";
+
+if(file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile))
+{
+>>>>>>> 3d939458... Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.
     include $phpPhraseSettingsFile;
 
     $data = array();
     include $phpPhraseSettingsFile;
 
     $data = array();
@@ -16,4 +23,8 @@ if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)
     $jsonFile = fopen($jsonPhraseSettingsFile, 'w');
     fwrite($jsonFile, json_encode($data));
     fclose($jsonFile);
     $jsonFile = fopen($jsonPhraseSettingsFile, 'w');
     fwrite($jsonFile, json_encode($data));
     fclose($jsonFile);
+<<<<<<< HEAD
+}
+=======
 }
 }
+>>>>>>> 3d939458... Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.
index b7e0f5dc6ce825318ef0843669e56eb472312449..8198a4c39cf49ebde42e881c2133c0fe01fc95b5 100644 (file)
@@ -25,5 +25,5 @@ class ImportSpecialPhrases:
         if args.from_wiki:
             LOG.warning('Special phrases importation starting')
             with connect(args.config.get_libpq_dsn()) as db_connection:
         if args.from_wiki:
             LOG.warning('Special phrases importation starting')
             with connect(args.config.get_libpq_dsn()) as db_connection:
-                import_from_wiki(args.config, db_connection)
+                import_from_wiki(args, db_connection)
         return 0
         return 0
index a70d304770e049bedff9acc72cfca5ce06ea61cc..3dead38b9ffb5cd50c91a613f209ecd59afc0427 100644 (file)
@@ -2,26 +2,32 @@
     Functions to import special phrases into the database.
 """
 import logging
     Functions to import special phrases into the database.
 """
 import logging
+import os
 import re
 import re
+import subprocess
 import sys
 import sys
+import json
+from os.path import isfile
 from psycopg2.sql import Identifier, Literal, SQL
 from psycopg2.sql import Identifier, Literal, SQL
-from settings.phrase_settings import BLACK_LIST, WHITE_LIST
 from nominatim.tools.exec_utils import get_url
 
 LOG = logging.getLogger()
 
 from nominatim.tools.exec_utils import get_url
 
 LOG = logging.getLogger()
 
-def import_from_wiki(config, db_connection, languages=None):
+def import_from_wiki(args, db_connection, languages=None):
+    # pylint: disable-msg=too-many-locals
     """
         Iterate through all specified languages and
         extract corresponding special phrases from the wiki.
     """
     """
         Iterate through all specified languages and
         extract corresponding special phrases from the wiki.
     """
+    black_list, white_list = _load_white_and_black_lists(args)
+
     #Compile the match regex to increase performance for the following loop.
     occurence_pattern = re.compile(
         r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
     )
     sanity_check_pattern = re.compile(r'^\w+$')
 
     #Compile the match regex to increase performance for the following loop.
     occurence_pattern = re.compile(
         r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
     )
     sanity_check_pattern = re.compile(r'^\w+$')
 
-    languages = _get_languages(config) if not languages else languages
+    languages = _get_languages(args.config) if not languages else languages
 
     #array for pairs of class/type
     pairs = dict()
 
     #array for pairs of class/type
     pairs = dict()
@@ -43,10 +49,10 @@ def import_from_wiki(config, db_connection, languages=None):
             _check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
 
             #blacklisting: disallow certain class/type combinations
             _check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
 
             #blacklisting: disallow certain class/type combinations
-            if phrase_class in BLACK_LIST.keys() and phrase_type in BLACK_LIST[phrase_class]:
+            if phrase_class in black_list.keys() and phrase_type in black_list[phrase_class]:
                 continue
             #whitelisting: if class is in whitelist, allow only tags in the list
                 continue
             #whitelisting: if class is in whitelist, allow only tags in the list
-            if phrase_class in WHITE_LIST.keys() and phrase_type not in WHITE_LIST[phrase_class]:
+            if phrase_class in white_list.keys() and phrase_type not in white_list[phrase_class]:
                 continue
 
             #add class/type to the pairs dict
                 continue
 
             #add class/type to the pairs dict
@@ -56,10 +62,23 @@ def import_from_wiki(config, db_connection, languages=None):
                 db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
             )
 
                 db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
             )
 
-    _create_place_classtype_table_and_indexes(db_connection, config, pairs)
+    _create_place_classtype_table_and_indexes(db_connection, args.config, pairs)
     db_connection.commit()
     LOG.warning('Import done.')
 
     db_connection.commit()
     LOG.warning('Import done.')
 
+def _load_white_and_black_lists(args):
+    """
+        Load white and black lists from phrases-settings.json.
+    """
+    config = args.config
+    settings_path = str(config.config_dir)+'/phrase-settings.json'
+
+    if config.PHRASE_CONFIG:
+        settings_path = _convert_php_settings_if_needed(args, config.PHRASE_CONFIG)
+
+    with open(settings_path, "r") as json_settings:
+        settings = json.load(json_settings)
+    return settings['blackList'], settings['whiteList']
 
 def _get_languages(config):
     """
 
 def _get_languages(config):
     """
@@ -199,3 +218,22 @@ def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
         db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
                           .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
                                   Identifier(config.DATABASE_WEBUSER)))
         db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
                           .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
                                   Identifier(config.DATABASE_WEBUSER)))
+
+def _convert_php_settings_if_needed(args, file_path):
+    """
+        Convert php settings file of special phrases to json file if it is still in php format.
+    """
+    file, extension = os.path.splitext(file_path)
+    json_file_path = f'{file}.json'
+    if extension == '.php' and not isfile(json_file_path):
+        try:
+            subprocess.run(['/usr/bin/env', 'php', '-Cq',
+                            args.phplib_dir / 'migration/phraseSettingsToJson.php',
+                            file_path], check=True)
+            LOG.warning('special_phrase configuration file has been converted to json.')
+            return json_file_path
+        except subprocess.CalledProcessError:
+            LOG.error('Error while converting %s to json.', file_path)
+            raise
+    else:
+        return json_file_path
diff --git a/settings/__init__.py b/settings/__init__.py
deleted file mode 100644 (file)
index b49d97f..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-"""
-    Module for settings
-"""
\ No newline at end of file
diff --git a/settings/phrase-settings.json b/settings/phrase-settings.json
new file mode 100644 (file)
index 0000000..a097dca
--- /dev/null
@@ -0,0 +1,25 @@
+{
+    "Comments": [
+        "Black list correspond to class/type combinations to exclude",
+        "If a class is in the white list then all types will",
+        "be ignored except the ones given in the list.",
+        "Also use this list to exclude an entire class from special phrases."
+    ],
+    "blackList": {
+        "bounday": [
+            "administrative"
+        ],
+        "place": [
+            "house",
+            "houses"
+        ]
+    },
+    "whiteList": {
+        "highway": [
+            "bus_stop",
+            "rest_area",
+            "raceway'"
+        ],
+        "building": []
+    }
+}
diff --git a/settings/phrase_settings.py b/settings/phrase_settings.py
deleted file mode 100644 (file)
index 59a4e7c..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-"""
-    These settings control the import of special phrases from the wiki.
-"""
-#class/type combinations to exclude
-BLACK_LIST = {
-    'bounday': [
-        'administrative'
-    ],
-    'place': [
-        'house',
-        'houses'
-    ]
-}
-
-#If a class is in the white list then all types will
-#be ignored except the ones given in the list.
-#Also use this list to exclude an entire class from
-#special phrases.
-WHITE_LIST = {
-    'highway': [
-        'bus_stop',
-        'rest_area',
-        'raceway'
-    ],
-    'building': []
-}