If you are migrating from a version <3.6, then you still have to follow
the manual migration steps up to 3.6.
+## 3.7.0 -> master
+
+### NOMINATIM_PHRASE_CONFIG removed
+
+Custom blacklist configurations for special phrases now need to be handed
+with the `--config` parameter to `nominatim special-phrases`. Alternatively
+you can put your custom configuration in the project directory in a file
+named `phrase-settings.json`.
+
+Version 3.8 also removes the automatic converter for the php format of
+the configuration in older versions. If you are updating from Nominatim < 3.7
+and still work with a custom `phrase-settings.php`, you need to manually
+convert it into a json format.
+
## 3.6.0 -> 3.7.0
### New format and name of configuration file
[wikipedia ranking file](../admin/Import.md#wikipediawikidata-rankings). When
unset, Nominatim expects the data to be saved in the project directory.
-#### NOMINATIM_PHRASE_CONFIG
-
-| Summary | |
-| -------------- | --------------------------------------------------- |
-| **Description:** | Configuration file for special phrase imports |
-| **Format:** | path |
-| **Default:** | _empty_ (use default settings) |
-
-The _phrase_config_ file configures black and white lists of tag types,
-so that some of them can be ignored, when loading special phrases from
-the OSM wiki. The default settings can be found in the configuration
-directory as `phrase-settings.json`.
-
#### NOMINATIM_ADDRESS_LEVEL_CONFIG
| Summary | |
+++ /dev/null
-<?php
-
-$phpPhraseSettingsFile = $argv[1];
-$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
-
-if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
- include $phpPhraseSettingsFile;
-
- $data = array();
-
- if (isset($aTagsBlacklist)) {
- $data['blackList'] = $aTagsBlacklist;
- }
- if (isset($aTagsWhitelist)) {
- $data['whiteList'] = $aTagsWhitelist;
- }
-
- $jsonFile = fopen($jsonPhraseSettingsFile, 'w');
- fwrite($jsonFile, json_encode($data));
- fclose($jsonFile);
-}
An example file can be found in the Nominatim sources at
'test/testdb/full_en_phrases_test.csv'.
+
+ The import can be further configured to ignore specific key/value pairs.
+ This is particularly useful when importing phrases from the wiki. The
+ default configuration excludes some very common tags like building=yes.
+ The configuration can be customized by putting a file `phrase-settings.json`
+ with custom rules into the project directory or by using the `--config`
+ option to point to another configuration file.
"""
@staticmethod
def add_args(parser):
help='Import special phrases from a CSV file')
group.add_argument('--no-replace', action='store_true',
help='Keep the old phrases and only add the new ones')
+ group.add_argument('--config', action='store',
+ help='Configuration file for black/white listing '
+ '(default: phrase-settings.json)')
@staticmethod
def run(args):
should_replace = not args.no_replace
with connect(args.config.get_libpq_dsn()) as db_connection:
SPImporter(
- args.config, args.phplib_dir, db_connection, loader
+ args.config, db_connection, loader
).import_phrases(tokenizer, should_replace)
import logging
import os
from pathlib import Path
+import json
import yaml
from dotenv import dotenv_values
is loaded using this function and added at the position in the
configuration tree.
"""
- assert Path(filename).suffix == '.yaml'
+ configfile = self.find_config_file(filename, config)
- configfile = self._find_config_file(filename, config)
+ if configfile.suffix in ('.yaml', '.yml'):
+ return self._load_from_yaml(configfile)
- return self._load_from_yaml(configfile)
+ if configfile.suffix == '.json':
+ with configfile.open('r') as cfg:
+ return json.load(cfg)
+ raise UsageError(f"Config file '{configfile}' has unknown format.")
- def _find_config_file(self, filename, config=None):
+
+ def find_config_file(self, filename, config=None):
""" Resolve the location of a configuration file given a filename and
an optional configuration option with the file name.
Raises a UsageError when the file cannot be found or is not
if Path(fname).is_absolute():
configfile = Path(fname)
else:
- configfile = self._find_config_file(loader.construct_scalar(node))
+ configfile = self.find_config_file(loader.construct_scalar(node))
if configfile.suffix != '.yaml':
LOG.fatal("Format error while reading '%s': only YAML format supported.",
valids anymore are removed.
"""
import logging
-import os
-from os.path import isfile
-from pathlib import Path
import re
-import subprocess
-import json
from psycopg2.sql import Identifier, Literal, SQL
-from nominatim.errors import UsageError
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
LOG = logging.getLogger()
Take a sp loader which load the phrases from an external source.
"""
- def __init__(self, config, phplib_dir, db_connection, sp_loader) -> None:
+ def __init__(self, config, db_connection, sp_loader) -> None:
self.config = config
- self.phplib_dir = phplib_dir
self.db_connection = db_connection
self.sp_loader = sp_loader
self.statistics_handler = SpecialPhrasesImporterStatistics()
"""
Load white and black lists from phrases-settings.json.
"""
- settings_path = (self.config.config_dir / 'phrase-settings.json').resolve()
+ settings = self.config.load_sub_configuration('phrase-settings.json')
- if self.config.PHRASE_CONFIG:
- settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
-
- with settings_path.open("r") as json_settings:
- settings = json.load(json_settings)
return settings['blackList'], settings['whiteList']
def _check_sanity(self, phrase):
for table in self.table_phrases_to_delete:
self.statistics_handler.notify_one_table_deleted()
db_cursor.drop_table(table)
-
-
- def _convert_php_settings_if_needed(self, file_path):
- """
- Convert php settings file of special phrases to json file if it is still in php format.
- """
- if not isfile(file_path):
- raise UsageError(str(file_path) + ' is not a valid file.')
-
- file, extension = os.path.splitext(file_path)
- json_file_path = Path(file + '.json').resolve()
-
- if extension not in ('.php', '.json'):
- raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
-
- if extension == '.php' and not isfile(json_file_path):
- try:
- subprocess.run(['/usr/bin/env', 'php', '-Cq',
- (self.phplib_dir / 'migration/PhraseSettingsToJson.php').resolve(),
- file_path], check=True)
- LOG.warning('special_phrase configuration file has been converted to json.')
- except subprocess.CalledProcessError:
- LOG.error('Error while converting %s to json.', file_path)
- raise
-
- return json_file_path
NOMINATIM_WIKIPEDIA_DATA_PATH=
# Configuration file for special phrase import.
-# When unset, the internal default settings from 'settings/phrase-settings.json'
-# are used.
+# OBSOLETE: use `nominatim special-phrases --config <file>` or simply put
+# a custom phrase-settings.json into your project directory.
NOMINATIM_PHRASE_CONFIG=
# Configuration file for rank assignments.
"Also use this list to exclude an entire class from special phrases."
],
"blackList": {
- "bounday": [
+ "boundary": [
"administrative"
],
"place": [
@pytest.fixture
-def sp_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
+def sp_importer(temp_db_conn, def_config):
"""
Return an instance of SPImporter.
"""
loader = SPWikiLoader(def_config, ['en'])
- return SPImporter(def_config, temp_phplib_dir_with_migration, temp_db_conn, loader)
-
-
-@pytest.fixture
-def temp_phplib_dir_with_migration(src_dir, tmp_path):
- """
- Return temporary phpdir with migration subdirectory and
- PhraseSettingsToJson.php script inside.
- """
- migration_file = (src_dir / 'lib-php' / 'migration' / 'PhraseSettingsToJson.php').resolve()
-
- phpdir = tmp_path / 'tempphp'
- phpdir.mkdir()
-
- (phpdir / 'migration').mkdir()
- migration_dest_path = (phpdir / 'migration' / 'PhraseSettingsToJson.php').resolve()
- copyfile(str(migration_file), str(migration_dest_path))
-
- return phpdir
+ return SPImporter(def_config, temp_db_conn, loader)
@pytest.fixture
assert isinstance(black_list, dict) and isinstance(white_list, dict)
-def test_convert_php_settings(sp_importer, testfile_dir, tmp_path):
- """
- Test that _convert_php_settings_if_needed() convert the given
- php file to a json file.
- """
- php_file = (testfile_dir / 'phrase_settings.php').resolve()
-
- temp_settings = (tmp_path / 'phrase_settings.php').resolve()
- copyfile(php_file, temp_settings)
- sp_importer._convert_php_settings_if_needed(temp_settings)
-
- assert (tmp_path / 'phrase_settings.json').is_file()
-
-def test_convert_settings_wrong_file(sp_importer):
- """
- Test that _convert_php_settings_if_needed() raise an exception
- if the given file is not a valid file.
- """
- with pytest.raises(UsageError, match='random_file is not a valid file.'):
- sp_importer._convert_php_settings_if_needed('random_file')
-
-def test_convert_settings_json_already_exist(sp_importer, testfile_dir):
- """
- Test that if we give to '_convert_php_settings_if_needed' a php file path
- and that a the corresponding json file already exists, it is returned.
- """
- php_file = (testfile_dir / 'phrase_settings.php').resolve()
- json_file = (testfile_dir / 'phrase_settings.json').resolve()
-
- returned = sp_importer._convert_php_settings_if_needed(php_file)
-
- assert returned == json_file
-
-def test_convert_settings_giving_json(sp_importer, testfile_dir):
- """
- Test that if we give to '_convert_php_settings_if_needed' a json file path
- the same path is directly returned
- """
- json_file = (testfile_dir / 'phrase_settings.json').resolve()
-
- returned = sp_importer._convert_php_settings_if_needed(json_file)
-
- assert returned == json_file
def test_create_place_classtype_indexes(temp_db_with_extensions, temp_db_conn,
table_factory, sp_importer):