from ..tools import database_import, refresh, postcodes, freeze, country_info
from ..indexer.indexer import Indexer
- country_info.setup_country_config(args.config.config_dir / 'country_settings.yaml')
+ country_info.setup_country_config(args.config)
if args.continue_at is None:
files = args.get_osm_file_list()
import logging
import os
from pathlib import Path
+import yaml
from dotenv import dotenv_values
def __getattr__(self, name):
name = 'NOMINATIM_' + name
- return self.environ.get(name) or self._config[name]
+ if name in self.environ:
+ return self.environ[name]
+
+ return self._config[name]
def get_bool(self, name):
""" Return the given configuration parameter as a boolean.
env.update(self.environ)
return env
+
+
+ def load_sub_configuration(self, filename, config=None):
+ """ Load additional configuration from a file. `filename` is the name
+ of the configuration file. The file is first searched in the
+ project directory and then in the global settings dirctory.
+
+ If `config` is set, then the name of the configuration file can
+ be additionally given through a .env configuration option. When
+ the option is set, then the file will be exclusively loaded as set:
+ if the name is an absolute path, the file name is taken as is,
+ if the name is relative, it is taken to be relative to the
+ project directory.
+
+ The format of the file is determined from the filename suffix.
+ Currently only files with extension '.yaml' are supported.
+
+ YAML files support a special '!include' construct. When the
+ directive is given, the value is taken to be a filename, the file
+ is loaded using this function and added at the position in the
+ configuration tree.
+ """
+ assert Path(filename).suffix == '.yaml'
+
+ configfile = self._find_config_file(filename, config)
+
+ return self._load_from_yaml(configfile)
+
+
+ def _find_config_file(self, filename, config=None):
+ """ Resolve the location of a configuration file given a filename and
+ an optional configuration option with the file name.
+ Raises a UsageError when the file cannot be found or is not
+ a regular file.
+ """
+ if config is not None:
+ cfg_filename = self.__getattr__(config)
+ if cfg_filename:
+ cfg_filename = Path(cfg_filename)
+
+ if cfg_filename.is_absolute():
+ cfg_filename = cfg_filename.resolve()
+
+ if not cfg_filename.is_file():
+ LOG.fatal("Cannot find config file '%s'.", cfg_filename)
+ raise UsageError("Config file not found.")
+
+ return cfg_filename
+
+ filename = cfg_filename
+
+
+ search_paths = [self.project_dir, self.config_dir]
+ for path in search_paths:
+ if path is not None and (path / filename).is_file():
+ return path / filename
+
+ LOG.fatal("Configuration file '%s' not found.\nDirectories searched: %s",
+ filename, search_paths)
+ raise UsageError("Config file not found.")
+
+
+ def _load_from_yaml(self, cfgfile):
+ """ Load a YAML configuration file. This installs a special handler that
+ allows to include other YAML files using the '!include' operator.
+ """
+ yaml.add_constructor('!include', self._yaml_include_representer,
+ Loader=yaml.SafeLoader)
+ return yaml.safe_load(cfgfile.read_text(encoding='utf-8'))
+
+
+ def _yaml_include_representer(self, loader, node):
+ """ Handler for the '!include' operator in YAML files.
+
+ When the filename is relative, then the file is first searched in the
+ project directory and then in the global settings dirctory.
+ """
+ fname = loader.construct_scalar(node)
+
+ if Path(fname).is_absolute():
+ configfile = Path(fname)
+ else:
+ configfile = self._find_config_file(loader.construct_scalar(node))
+
+ if configfile.suffix != '.yaml':
+ LOG.fatal("Format error while reading '%s': only YAML format supported.",
+ configfile)
+ raise UsageError("Cannot handle config file format.")
+
+ return yaml.safe_load(configfile.read_text(encoding='utf-8'))
import io
import logging
import itertools
-from pathlib import Path
import re
-import yaml
from icu import Transliterator
from nominatim.errors import UsageError
LOG = logging.getLogger()
-def _flatten_yaml_list(content):
+def _flatten_config_list(content):
if not content:
return []
if not isinstance(content, list):
- raise UsageError("List expected in ICU yaml configuration.")
+ raise UsageError("List expected in ICU configuration.")
output = []
for ele in content:
if isinstance(ele, list):
- output.extend(_flatten_yaml_list(ele))
+ output.extend(_flatten_config_list(ele))
else:
output.append(ele)
""" Compiler for ICU rules from a tokenizer configuration file.
"""
- def __init__(self, configfile):
- self.configfile = configfile
+ def __init__(self, rules):
self.variants = set()
- if configfile.suffix == '.yaml':
- self._load_from_yaml()
- else:
- raise UsageError("Unknown format of tokenizer configuration.")
+ self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization')
+ self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration')
+ self._parse_variant_list(self._get_section(rules, 'variants'))
def get_search_rules(self):
"""
return self.variants
- def _yaml_include_representer(self, loader, node):
- value = loader.construct_scalar(node)
-
- if Path(value).is_absolute():
- content = Path(value)
- else:
- content = (self.configfile.parent / value)
-
- return yaml.safe_load(content.read_text(encoding='utf-8'))
-
-
- def _load_from_yaml(self):
- yaml.add_constructor('!include', self._yaml_include_representer,
- Loader=yaml.SafeLoader)
- rules = yaml.safe_load(self.configfile.read_text(encoding='utf-8'))
-
- self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization')
- self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration')
- self._parse_variant_list(self._get_section(rules, 'variants'))
-
- def _get_section(self, rules, section):
+ @staticmethod
+ def _get_section(rules, section):
""" Get the section named 'section' from the rules. If the section does
not exist, raise a usage error with a meaningful message.
"""
if section not in rules:
- LOG.fatal("Section '%s' not found in tokenizer config '%s'.",
- section, str(self.configfile))
+ LOG.fatal("Section '%s' not found in tokenizer config.", section)
raise UsageError("Syntax error in tokenizer configuration file.")
return rules[section]
if content is None:
return ''
- return ';'.join(_flatten_yaml_list(content)) + ';'
+ return ';'.join(_flatten_config_list(content)) + ';'
def _parse_variant_list(self, rules):
if not rules:
return
- rules = _flatten_yaml_list(rules)
+ rules = _flatten_config_list(rules)
vmaker = _VariantMaker(self.normalization_rules)
import logging
import re
from textwrap import dedent
-from pathlib import Path
from nominatim.db.connection import connect
from nominatim.db.properties import set_property, get_property
This copies all necessary data in the project directory to make
sure the tokenizer remains stable even over updates.
"""
- if config.TOKENIZER_CONFIG:
- cfgfile = Path(config.TOKENIZER_CONFIG)
- else:
- cfgfile = config.config_dir / 'icu_tokenizer.yaml'
-
- loader = ICURuleLoader(cfgfile)
+ loader = ICURuleLoader(config.load_sub_configuration('icu_tokenizer.yaml',
+ config='TOKENIZER_CONFIG'))
self.naming_rules = ICUNameProcessorRules(loader=loader)
self.term_normalization = config.TERM_NORMALIZATION
self.max_word_frequency = config.MAX_WORD_FREQUENCY
Functions for importing and managing static country information.
"""
import psycopg2.extras
-import yaml
from nominatim.db import utils as db_utils
from nominatim.db.connection import connect
def __init__(self):
self._info = {}
- def load(self, configfile):
+ def load(self, config):
""" Load the country properties from the configuration files,
if they are not loaded yet.
"""
if not self._info:
- self._info = yaml.safe_load(configfile.read_text(encoding='utf-8'))
+ self._info = config.load_sub_configuration('country_settings.yaml')
def items(self):
""" Return tuples of (country_code, property dict) as iterable.
_COUNTRY_INFO = _CountryInfo()
-def setup_country_config(configfile):
+def setup_country_config(config):
""" Load country properties from the configuration file.
Needs to be called before using any other functions in this
file.
"""
- _COUNTRY_INFO.load(configfile)
+ _COUNTRY_INFO.load(config)
def setup_country_tables(dsn, sql_dir, ignore_partitions=False):
return _mk_config
+@pytest.fixture
+def make_config_path(src_dir, tmp_path):
+ """ Create a configuration object with project and config directories
+ in a temporary directory.
+ """
+ def _mk_config():
+ (tmp_path / 'project').mkdir()
+ (tmp_path / 'config').mkdir()
+ conf = Configuration(tmp_path / 'project', src_dir / 'settings')
+ conf.config_dir = tmp_path / 'config'
+ return conf
+
+ return _mk_config
+
def test_no_project_dir(make_config):
config = make_config()
assert config.DATABASE_WEBUSER == 'nobody'
+def test_prefer_os_environ_can_unset_project_setting(make_config, monkeypatch, tmp_path):
+ envfile = tmp_path / '.env'
+ envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n')
+
+ monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', '')
+
+ config = make_config(tmp_path)
+
+ assert config.DATABASE_WEBUSER == ''
+
+
def test_get_os_env_add_defaults(make_config, monkeypatch):
config = make_config()
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', value)
assert str(config.get_import_style_file()) == value
+
+
+def test_load_subconf_from_project_dir(make_config_path):
+ config = make_config_path()
+
+ testfile = config.project_dir / 'test.yaml'
+ testfile.write_text('cow: muh\ncat: miau\n')
+
+ testfile = config.config_dir / 'test.yaml'
+ testfile.write_text('cow: miau\ncat: muh\n')
+
+ rules = config.load_sub_configuration('test.yaml')
+
+ assert rules == dict(cow='muh', cat='miau')
+
+
+def test_load_subconf_from_settings_dir(make_config_path):
+ config = make_config_path()
+
+ testfile = config.config_dir / 'test.yaml'
+ testfile.write_text('cow: muh\ncat: miau\n')
+
+ rules = config.load_sub_configuration('test.yaml')
+
+ assert rules == dict(cow='muh', cat='miau')
+
+
+def test_load_subconf_empty_env_conf(make_config_path, monkeypatch):
+ monkeypatch.setenv('NOMINATIM_MY_CONFIG', '')
+ config = make_config_path()
+
+ testfile = config.config_dir / 'test.yaml'
+ testfile.write_text('cow: muh\ncat: miau\n')
+
+ rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
+
+ assert rules == dict(cow='muh', cat='miau')
+
+
+def test_load_subconf_env_absolute_found(make_config_path, monkeypatch, tmp_path):
+ monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
+ config = make_config_path()
+
+ (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
+ (tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n')
+
+ rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
+
+ assert rules == dict(dog='muh', frog='miau')
+
+
+def test_load_subconf_env_absolute_not_found(make_config_path, monkeypatch, tmp_path):
+ monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
+ config = make_config_path()
+
+ (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
+
+ with pytest.raises(UsageError, match='Config file not found.'):
+ rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
+
+
+@pytest.mark.parametrize("location", ['project_dir', 'config_dir'])
+def test_load_subconf_env_relative_found(make_config_path, monkeypatch, location):
+ monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
+ config = make_config_path()
+
+ (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
+ (getattr(config, location) / 'other.yaml').write_text('dog: bark\n')
+
+ rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
+
+ assert rules == dict(dog='bark')
+
+
+def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch):
+ monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
+ config = make_config_path()
+
+ (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
+
+ with pytest.raises(UsageError, match='Config file not found.'):
+ rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
+
+
+def test_load_subconf_not_found(make_config_path):
+ config = make_config_path()
+
+ with pytest.raises(UsageError, match='Config file not found.'):
+ rules = config.load_sub_configuration('test.yaml')
+
+
+def test_load_subconf_include_absolute(make_config_path, tmp_path):
+ config = make_config_path()
+
+ testfile = config.config_dir / 'test.yaml'
+ testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n')
+ (tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n')
+
+ rules = config.load_sub_configuration('test.yaml')
+
+ assert rules == dict(base=dict(first=1, second=2))
+
+
+@pytest.mark.parametrize("location", ['project_dir', 'config_dir'])
+def test_load_subconf_include_relative(make_config_path, tmp_path, location):
+ config = make_config_path()
+
+ testfile = config.config_dir / 'test.yaml'
+ testfile.write_text(f'base: !include inc.yaml\n')
+ (getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n')
+
+ rules = config.load_sub_configuration('test.yaml')
+
+ assert rules == dict(base=dict(first=1, second=2))
+
+
+def test_load_subconf_include_bad_format(make_config_path):
+ config = make_config_path()
+
+ testfile = config.config_dir / 'test.yaml'
+ testfile.write_text(f'base: !include inc.txt\n')
+ (config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n')
+
+ with pytest.raises(UsageError, match='Cannot handle config file format.'):
+ rules = config.load_sub_configuration('test.yaml')
+
+
+def test_load_subconf_include_not_found(make_config_path):
+ config = make_config_path()
+
+ testfile = config.config_dir / 'test.yaml'
+ testfile.write_text(f'base: !include inc.txt\n')
+
+ with pytest.raises(UsageError, match='Config file not found.'):
+ rules = config.load_sub_configuration('test.yaml')
+
+
+def test_load_subconf_include_recursive(make_config_path):
+ config = make_config_path()
+
+ testfile = config.config_dir / 'test.yaml'
+ testfile.write_text(f'base: !include inc.yaml\n')
+ (config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n')
+ (config.config_dir / 'more.yaml').write_text('- the end\n')
+
+ rules = config.load_sub_configuration('test.yaml')
+
+ assert rules == dict(base=[['the end'], 'upper'])
def _mk_analyser(norm=("[[:Punctuation:][:Space:]]+ > ' '",), trans=(':: upper()',),
variants=('~gasse -> gasse', 'street => st', )):
- cfgfile = tmp_path / 'analyser_test_config.yaml'
- with cfgfile.open('w') as stream:
- cfgstr = {'normalization' : list(norm),
- 'transliteration' : list(trans),
- 'variants' : [ {'words': list(variants)}]}
- yaml.dump(cfgstr, stream)
- tok.naming_rules = ICUNameProcessorRules(loader=ICURuleLoader(cfgfile))
+ cfgstr = {'normalization' : list(norm),
+ 'transliteration' : list(trans),
+ 'variants' : [ {'words': list(variants)}]}
+ tok.naming_rules = ICUNameProcessorRules(loader=ICURuleLoader(cfgstr))
return tok.name_analyzer()
from textwrap import dedent
import pytest
+import yaml
from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
from nominatim.tokenizer.icu_name_processor import ICUNameProcessor, ICUNameProcessorRules
from nominatim.errors import UsageError
@pytest.fixture
-def cfgfile(tmp_path, suffix='.yaml'):
+def cfgfile():
def _create_config(*variants, **kwargs):
content = dedent("""\
normalization:
content += '\n'.join((" - " + s for s in variants)) + '\n'
for k, v in kwargs:
content += " {}: {}\n".format(k, v)
- fpath = tmp_path / ('test_config' + suffix)
- fpath.write_text(dedent(content))
- return fpath
+ return yaml.safe_load(content)
return _create_config
"""
Tests for converting a config file to ICU rules.
"""
-import pytest
from textwrap import dedent
+import pytest
+import yaml
+
from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
from nominatim.errors import UsageError
from icu import Transliterator
@pytest.fixture
-def cfgfile(tmp_path, suffix='.yaml'):
+def cfgrules():
def _create_config(*variants, **kwargs):
content = dedent("""\
normalization:
content += '\n'.join((" - " + s for s in variants)) + '\n'
for k, v in kwargs:
content += " {}: {}\n".format(k, v)
- fpath = tmp_path / ('test_config' + suffix)
- fpath.write_text(dedent(content))
- return fpath
+ return yaml.safe_load(content)
return _create_config
-def test_empty_rule_file(tmp_path):
- fpath = tmp_path / ('test_config.yaml')
- fpath.write_text(dedent("""\
+def test_empty_rule_set():
+ rule_cfg = yaml.safe_load(dedent("""\
normalization:
transliteration:
variants:
"""))
- rules = ICURuleLoader(fpath)
+ rules = ICURuleLoader(rule_cfg)
assert rules.get_search_rules() == ''
assert rules.get_normalization_rules() == ''
assert rules.get_transliteration_rules() == ''
CONFIG_SECTIONS = ('normalization', 'transliteration', 'variants')
@pytest.mark.parametrize("section", CONFIG_SECTIONS)
-def test_missing_normalization(tmp_path, section):
- fpath = tmp_path / ('test_config.yaml')
- with fpath.open('w') as fd:
- for name in CONFIG_SECTIONS:
- if name != section:
- fd.write(name + ':\n')
+def test_missing_section(section):
+ rule_cfg = { s: {} for s in CONFIG_SECTIONS if s != section}
with pytest.raises(UsageError):
- ICURuleLoader(fpath)
+ ICURuleLoader(rule_cfg)
-def test_get_search_rules(cfgfile):
- loader = ICURuleLoader(cfgfile())
+def test_get_search_rules(cfgrules):
+ loader = ICURuleLoader(cfgrules())
rules = loader.get_search_rules()
trans = Transliterator.createFromRules("test", rules)
assert trans.transliterate(" проспект ") == " prospekt "
-def test_get_normalization_rules(cfgfile):
- loader = ICURuleLoader(cfgfile())
+def test_get_normalization_rules(cfgrules):
+ loader = ICURuleLoader(cfgrules())
rules = loader.get_normalization_rules()
trans = Transliterator.createFromRules("test", rules)
assert trans.transliterate(" проспект-Prospekt ") == " проспект prospekt "
-def test_get_transliteration_rules(cfgfile):
- loader = ICURuleLoader(cfgfile())
+def test_get_transliteration_rules(cfgrules):
+ loader = ICURuleLoader(cfgrules())
rules = loader.get_transliteration_rules()
trans = Transliterator.createFromRules("test", rules)
assert trans.transliterate(" проспект-Prospekt ") == " prospekt Prospekt "
-def test_transliteration_rules_from_file(tmp_path):
+def test_transliteration_rules_from_file(def_config, tmp_path):
+ def_config.project_dir = tmp_path
cfgpath = tmp_path / ('test_config.yaml')
cfgpath.write_text(dedent("""\
normalization:
transpath = tmp_path / ('transliteration.yaml')
transpath.write_text('- "x > y"')
- loader = ICURuleLoader(cfgpath)
+ loader = ICURuleLoader(def_config.load_sub_configuration('test_config.yaml'))
rules = loader.get_transliteration_rules()
trans = Transliterator.createFromRules("test", rules)
class TestGetReplacements:
@pytest.fixture(autouse=True)
- def setup_cfg(self, cfgfile):
- self.cfgfile = cfgfile
+ def setup_cfg(self, cfgrules):
+ self.cfgrules = cfgrules
def get_replacements(self, *variants):
- loader = ICURuleLoader(self.cfgfile(*variants))
+ loader = ICURuleLoader(self.cfgrules(*variants))
rules = loader.get_replacement_pairs()
return set((v.source, v.replacement) for v in rules)
'~foo~ -> bar', 'fo~ o -> bar'])
def test_invalid_variant_description(self, variant):
with pytest.raises(UsageError):
- ICURuleLoader(self.cfgfile(variant))
+ ICURuleLoader(self.cfgrules(variant))
def test_add_full(self):
repl = self.get_replacements("foo -> bar")
@pytest.fixture(autouse=True)
def read_config(def_config):
- country_info.setup_country_config(def_config.config_dir / 'country_settings.yaml')
+ country_info.setup_country_config(def_config)
@pytest.mark.parametrize("no_partitions", (True, False))
def test_setup_country_tables(src_dir, temp_db_with_extensions, dsn, temp_db_cursor,