From: Sarah Hoffmann Date: Thu, 28 Jul 2022 19:58:04 +0000 (+0200) Subject: Merge pull request #2780 from lonvia/python-modules-in-project-directory X-Git-Tag: v4.1.0~5 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/a8b037669ac8a9f52ad0091b83ae4f7f9b78b28e?hp=95d4061b2aa67d7fd3d29099cc1cfd9f2b4022c8 Merge pull request #2780 from lonvia/python-modules-in-project-directory Support for external sanitizer and token analysis modules --- diff --git a/nominatim/config.py b/nominatim/config.py index 43a634db..7502aff7 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -8,8 +8,10 @@ Nominatim configuration accessor. """ from typing import Dict, Any, List, Mapping, Optional +import importlib.util import logging import os +import sys from pathlib import Path import json import yaml @@ -73,6 +75,7 @@ class Configuration: data: Path self.lib_dir = _LibDirs() + self._private_plugins: Dict[str, object] = {} def set_libdirs(self, **kwargs: StrPath) -> None: @@ -219,6 +222,49 @@ class Configuration: return result + def load_plugin_module(self, module_name: str, internal_path: str) -> Any: + """ Load a Python module as a plugin. + + The module_name may have three variants: + + * A name without any '.' is assumed to be an internal module + and will be searched relative to `internal_path`. + * If the name ends in `.py`, module_name is assumed to be a + file name relative to the project directory. + * Any other name is assumed to be an absolute module name. + + In either of the variants the module name must start with a letter. + """ + if not module_name or not module_name[0].isidentifier(): + raise UsageError(f'Invalid module name {module_name}') + + if '.' not in module_name: + module_name = module_name.replace('-', '_') + full_module = f'{internal_path}.{module_name}' + return sys.modules.get(full_module) or importlib.import_module(full_module) + + if module_name.endswith('.py'): + if self.project_dir is None or not (self.project_dir / module_name).exists(): + raise UsageError(f"Cannot find module '{module_name}' in project directory.") + + if module_name in self._private_plugins: + return self._private_plugins[module_name] + + file_path = str(self.project_dir / module_name) + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec: + module = importlib.util.module_from_spec(spec) + # Do not add to global modules because there is no standard + # module name that Python can resolve. + self._private_plugins[module_name] = module + assert spec.loader is not None + spec.loader.exec_module(module) + + return module + + return sys.modules.get(module_name) or importlib.import_module(module_name) + + def find_config_file(self, filename: StrPath, config: Optional[str] = None) -> Path: """ Resolve the location of a configuration file given a filename and diff --git a/nominatim/tokenizer/icu_rule_loader.py b/nominatim/tokenizer/icu_rule_loader.py index 84040ddc..f461a1f1 100644 --- a/nominatim/tokenizer/icu_rule_loader.py +++ b/nominatim/tokenizer/icu_rule_loader.py @@ -8,7 +8,6 @@ Helper class to create ICU rules from a configuration file. """ from typing import Mapping, Any, Dict, Optional -import importlib import io import json import logging @@ -45,6 +44,7 @@ class ICURuleLoader: """ def __init__(self, config: Configuration) -> None: + self.config = config rules = config.load_sub_configuration('icu_tokenizer.yaml', config='TOKENIZER_CONFIG') @@ -92,7 +92,7 @@ class ICURuleLoader: def make_sanitizer(self) -> PlaceSanitizer: """ Create a place sanitizer from the configured rules. """ - return PlaceSanitizer(self.sanitizer_rules) + return PlaceSanitizer(self.sanitizer_rules, self.config) def make_token_analysis(self) -> ICUTokenAnalysis: @@ -144,7 +144,9 @@ class ICURuleLoader: LOG.fatal("ICU tokenizer configuration has two token " "analyzers with id '%s'.", name) raise UsageError("Syntax error in ICU tokenizer config.") - self.analysis[name] = TokenAnalyzerRule(section, self.normalization_rules) + self.analysis[name] = TokenAnalyzerRule(section, + self.normalization_rules, + self.config) @staticmethod @@ -168,15 +170,18 @@ class TokenAnalyzerRule: and creates a new token analyzer on request. """ - def __init__(self, rules: Mapping[str, Any], normalization_rules: str) -> None: - # Find the analysis module - module_name = 'nominatim.tokenizer.token_analysis.' \ - + _get_section(rules, 'analyzer').replace('-', '_') - self._analysis_mod: AnalysisModule = importlib.import_module(module_name) + def __init__(self, rules: Mapping[str, Any], normalization_rules: str, + config: Configuration) -> None: + analyzer_name = _get_section(rules, 'analyzer') + if not analyzer_name or not isinstance(analyzer_name, str): + raise UsageError("'analyzer' parameter needs to be simple string") + + self._analysis_mod: AnalysisModule = \ + config.load_plugin_module(analyzer_name, 'nominatim.tokenizer.token_analysis') - # Load the configuration. self.config = self._analysis_mod.configure(rules, normalization_rules) + def create(self, normalizer: Any, transliterator: Any) -> Analyser: """ Create a new analyser instance for the given rule. """ diff --git a/nominatim/tokenizer/place_sanitizer.py b/nominatim/tokenizer/place_sanitizer.py index 3f548e06..c7dfd1ba 100644 --- a/nominatim/tokenizer/place_sanitizer.py +++ b/nominatim/tokenizer/place_sanitizer.py @@ -9,9 +9,9 @@ Handler for cleaning name and address tags in place information before it is handed to the token analysis. """ from typing import Optional, List, Mapping, Sequence, Callable, Any, Tuple -import importlib from nominatim.errors import UsageError +from nominatim.config import Configuration from nominatim.tokenizer.sanitizers.config import SanitizerConfig from nominatim.tokenizer.sanitizers.base import SanitizerHandler, ProcessInfo, PlaceName from nominatim.data.place_info import PlaceInfo @@ -22,16 +22,21 @@ class PlaceSanitizer: names and address before they are used by the token analysers. """ - def __init__(self, rules: Optional[Sequence[Mapping[str, Any]]]) -> None: + def __init__(self, rules: Optional[Sequence[Mapping[str, Any]]], + config: Configuration) -> None: self.handlers: List[Callable[[ProcessInfo], None]] = [] if rules: for func in rules: if 'step' not in func: raise UsageError("Sanitizer rule is missing the 'step' attribute.") - module_name = 'nominatim.tokenizer.sanitizers.' + func['step'].replace('-', '_') - handler_module: SanitizerHandler = importlib.import_module(module_name) - self.handlers.append(handler_module.create(SanitizerConfig(func))) + if not isinstance(func['step'], str): + raise UsageError("'step' attribute must be a simple string.") + + module: SanitizerHandler = \ + config.load_plugin_module(func['step'], 'nominatim.tokenizer.sanitizers') + + self.handlers.append(module.create(SanitizerConfig(func))) def process_names(self, place: PlaceInfo) -> Tuple[List[PlaceName], List[PlaceName]]: diff --git a/test/python/config/test_config_load_module.py b/test/python/config/test_config_load_module.py new file mode 100644 index 00000000..df6c4794 --- /dev/null +++ b/test/python/config/test_config_load_module.py @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Test for loading extra Python modules. +""" +from pathlib import Path +import sys + +import pytest + +from nominatim.config import Configuration + +@pytest.fixture +def test_config(src_dir, tmp_path): + """ Create a configuration object with project and config directories + in a temporary directory. + """ + (tmp_path / 'project').mkdir() + (tmp_path / 'config').mkdir() + conf = Configuration(tmp_path / 'project', src_dir / 'settings') + conf.config_dir = tmp_path / 'config' + return conf + + +def test_load_default_module(test_config): + module = test_config.load_plugin_module('version', 'nominatim') + + assert isinstance(module.NOMINATIM_VERSION, tuple) + +def test_load_default_module_with_hyphen(test_config): + module = test_config.load_plugin_module('place-info', 'nominatim.data') + + assert isinstance(module.PlaceInfo, object) + + +def test_load_plugin_module(test_config, tmp_path): + (tmp_path / 'project' / 'testpath').mkdir() + (tmp_path / 'project' / 'testpath' / 'mymod.py')\ + .write_text("def my_test_function():\n return 'gjwitlsSG42TG%'") + + module = test_config.load_plugin_module('testpath/mymod.py', 'private.something') + + assert module.my_test_function() == 'gjwitlsSG42TG%' + + # also test reloading module + (tmp_path / 'project' / 'testpath' / 'mymod.py')\ + .write_text("def my_test_function():\n return 'hjothjorhj'") + + module = test_config.load_plugin_module('testpath/mymod.py', 'private.something') + + assert module.my_test_function() == 'gjwitlsSG42TG%' + + +def test_load_external_library_module(test_config, tmp_path, monkeypatch): + MODULE_NAME = 'foogurenqodr4' + pythonpath = tmp_path / 'priv-python' + pythonpath.mkdir() + (pythonpath / MODULE_NAME).mkdir() + (pythonpath / MODULE_NAME / '__init__.py').write_text('') + (pythonpath / MODULE_NAME / 'tester.py')\ + .write_text("def my_test_function():\n return 'gjwitlsSG42TG%'") + + monkeypatch.syspath_prepend(pythonpath) + + module = test_config.load_plugin_module(f'{MODULE_NAME}.tester', 'private.something') + + assert module.my_test_function() == 'gjwitlsSG42TG%' + + # also test reloading module + (pythonpath / MODULE_NAME / 'tester.py')\ + .write_text("def my_test_function():\n return 'dfigjreigj'") + + module = test_config.load_plugin_module(f'{MODULE_NAME}.tester', 'private.something') + + assert module.my_test_function() == 'gjwitlsSG42TG%' + + del sys.modules[f'{MODULE_NAME}.tester'] diff --git a/test/python/tokenizer/sanitizers/test_clean_housenumbers.py b/test/python/tokenizer/sanitizers/test_clean_housenumbers.py index 128e1201..11a71a5f 100644 --- a/test/python/tokenizer/sanitizers/test_clean_housenumbers.py +++ b/test/python/tokenizer/sanitizers/test_clean_housenumbers.py @@ -13,14 +13,14 @@ from nominatim.tokenizer.place_sanitizer import PlaceSanitizer from nominatim.data.place_info import PlaceInfo @pytest.fixture -def sanitize(request): +def sanitize(request, def_config): sanitizer_args = {'step': 'clean-housenumbers'} for mark in request.node.iter_markers(name="sanitizer_params"): sanitizer_args.update({k.replace('_', '-') : v for k,v in mark.kwargs.items()}) def _run(**kwargs): place = PlaceInfo({'address': kwargs}) - _, address = PlaceSanitizer([sanitizer_args]).process_names(place) + _, address = PlaceSanitizer([sanitizer_args], def_config).process_names(place) return sorted([(p.kind, p.name) for p in address]) @@ -45,24 +45,24 @@ def test_filter_kind(sanitize): @pytest.mark.parametrize('number', ('6523', 'n/a', '4')) -def test_convert_to_name_converted(number): +def test_convert_to_name_converted(def_config, number): sanitizer_args = {'step': 'clean-housenumbers', 'convert-to-name': (r'\d+', 'n/a')} place = PlaceInfo({'address': {'housenumber': number}}) - names, address = PlaceSanitizer([sanitizer_args]).process_names(place) + names, address = PlaceSanitizer([sanitizer_args], def_config).process_names(place) assert ('housenumber', number) in set((p.kind, p.name) for p in names) assert 'housenumber' not in set(p.kind for p in address) @pytest.mark.parametrize('number', ('a54', 'n.a', 'bow')) -def test_convert_to_name_unconverted(number): +def test_convert_to_name_unconverted(def_config, number): sanitizer_args = {'step': 'clean-housenumbers', 'convert-to-name': (r'\d+', 'n/a')} place = PlaceInfo({'address': {'housenumber': number}}) - names, address = PlaceSanitizer([sanitizer_args]).process_names(place) + names, address = PlaceSanitizer([sanitizer_args], def_config).process_names(place) assert 'housenumber' not in set(p.kind for p in names) assert ('housenumber', number) in set((p.kind, p.name) for p in address) diff --git a/test/python/tokenizer/sanitizers/test_clean_postcodes.py b/test/python/tokenizer/sanitizers/test_clean_postcodes.py index 237527f1..f2c965ad 100644 --- a/test/python/tokenizer/sanitizers/test_clean_postcodes.py +++ b/test/python/tokenizer/sanitizers/test_clean_postcodes.py @@ -25,7 +25,7 @@ def sanitize(def_config, request): if country is not None: pi['country_code'] = country - _, address = PlaceSanitizer([sanitizer_args]).process_names(PlaceInfo(pi)) + _, address = PlaceSanitizer([sanitizer_args], def_config).process_names(PlaceInfo(pi)) return sorted([(p.kind, p.name) for p in address]) diff --git a/test/python/tokenizer/sanitizers/test_split_name_list.py b/test/python/tokenizer/sanitizers/test_split_name_list.py index 67157fba..9ca539d5 100644 --- a/test/python/tokenizer/sanitizers/test_split_name_list.py +++ b/test/python/tokenizer/sanitizers/test_split_name_list.py @@ -14,58 +14,66 @@ from nominatim.data.place_info import PlaceInfo from nominatim.errors import UsageError -def run_sanitizer_on(**kwargs): - place = PlaceInfo({'name': kwargs}) - name, _ = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place) +class TestSplitName: - return sorted([(p.name, p.kind, p.suffix) for p in name]) + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config -def sanitize_with_delimiter(delimiter, name): - place = PlaceInfo({'name': {'name': name}}) - san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}]) - name, _ = san.process_names(place) + def run_sanitizer_on(self, **kwargs): + place = PlaceInfo({'name': kwargs}) + name, _ = PlaceSanitizer([{'step': 'split-name-list'}], self.config).process_names(place) - return sorted([p.name for p in name]) + return sorted([(p.name, p.kind, p.suffix) for p in name]) -def test_simple(): - assert run_sanitizer_on(name='ABC') == [('ABC', 'name', None)] - assert run_sanitizer_on(name='') == [('', 'name', None)] + def sanitize_with_delimiter(self, delimiter, name): + place = PlaceInfo({'name': {'name': name}}) + san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}], + self.config) + name, _ = san.process_names(place) + return sorted([p.name for p in name]) -def test_splits(): - assert run_sanitizer_on(name='A;B;C') == [('A', 'name', None), - ('B', 'name', None), - ('C', 'name', None)] - assert run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None), - ('boat', 'short_name', None)] + def test_simple(self): + assert self.run_sanitizer_on(name='ABC') == [('ABC', 'name', None)] + assert self.run_sanitizer_on(name='') == [('', 'name', None)] -def test_empty_fields(): - assert run_sanitizer_on(name='A;;B') == [('A', 'name', None), - ('B', 'name', None)] - assert run_sanitizer_on(name='A; ,B') == [('A', 'name', None), - ('B', 'name', None)] - assert run_sanitizer_on(name=' ;B') == [('B', 'name', None)] - assert run_sanitizer_on(name='B,') == [('B', 'name', None)] + def test_splits(self): + assert self.run_sanitizer_on(name='A;B;C') == [('A', 'name', None), + ('B', 'name', None), + ('C', 'name', None)] + assert self.run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None), + ('boat', 'short_name', None)] -def test_custom_delimiters(): - assert sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3'] - assert sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@'] - assert sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to'] - assert sanitize_with_delimiter(' ', 'morning sun') == ['morning', 'sun'] + def test_empty_fields(self): + assert self.run_sanitizer_on(name='A;;B') == [('A', 'name', None), + ('B', 'name', None)] + assert self.run_sanitizer_on(name='A; ,B') == [('A', 'name', None), + ('B', 'name', None)] + assert self.run_sanitizer_on(name=' ;B') == [('B', 'name', None)] + assert self.run_sanitizer_on(name='B,') == [('B', 'name', None)] -def test_empty_delimiter_set(): - with pytest.raises(UsageError): - sanitize_with_delimiter('', 'abc') + def test_custom_delimiters(self): + assert self.sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3'] + assert self.sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@'] + assert self.sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to'] + assert self.sanitize_with_delimiter(' ', 'morning sun') == ['morning', 'sun'] -def test_no_name_list(): + + def test_empty_delimiter_set(self): + with pytest.raises(UsageError): + self.sanitize_with_delimiter('', 'abc') + + +def test_no_name_list(def_config): place = PlaceInfo({'address': {'housenumber': '3'}}) - name, address = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place) + name, address = PlaceSanitizer([{'step': 'split-name-list'}], def_config).process_names(place) assert not name assert len(address) == 1 diff --git a/test/python/tokenizer/sanitizers/test_strip_brace_terms.py b/test/python/tokenizer/sanitizers/test_strip_brace_terms.py index eb554364..7fa0a018 100644 --- a/test/python/tokenizer/sanitizers/test_strip_brace_terms.py +++ b/test/python/tokenizer/sanitizers/test_strip_brace_terms.py @@ -12,39 +12,45 @@ import pytest from nominatim.tokenizer.place_sanitizer import PlaceSanitizer from nominatim.data.place_info import PlaceInfo -def run_sanitizer_on(**kwargs): - place = PlaceInfo({'name': kwargs}) - name, _ = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place) +class TestStripBrace: - return sorted([(p.name, p.kind, p.suffix) for p in name]) + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config + def run_sanitizer_on(self, **kwargs): + place = PlaceInfo({'name': kwargs}) + name, _ = PlaceSanitizer([{'step': 'strip-brace-terms'}], self.config).process_names(place) -def test_no_braces(): - assert run_sanitizer_on(name='foo', ref='23') == [('23', 'ref', None), - ('foo', 'name', None)] + return sorted([(p.name, p.kind, p.suffix) for p in name]) -def test_simple_braces(): - assert run_sanitizer_on(name='Halle (Saale)', ref='3')\ - == [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)] - assert run_sanitizer_on(name='ack ( bar')\ - == [('ack', 'name', None), ('ack ( bar', 'name', None)] + def test_no_braces(self): + assert self.run_sanitizer_on(name='foo', ref='23') == [('23', 'ref', None), + ('foo', 'name', None)] -def test_only_braces(): - assert run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)] + def test_simple_braces(self): + assert self.run_sanitizer_on(name='Halle (Saale)', ref='3')\ + == [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)] + assert self.run_sanitizer_on(name='ack ( bar')\ + == [('ack', 'name', None), ('ack ( bar', 'name', None)] -def test_double_braces(): - assert run_sanitizer_on(name='a((b))') == [('a', 'name', None), - ('a((b))', 'name', None)] - assert run_sanitizer_on(name='a (b) (c)') == [('a', 'name', None), - ('a (b) (c)', 'name', None)] + def test_only_braces(self): + assert self.run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)] -def test_no_names(): + def test_double_braces(self): + assert self.run_sanitizer_on(name='a((b))') == [('a', 'name', None), + ('a((b))', 'name', None)] + assert self.run_sanitizer_on(name='a (b) (c)') == [('a', 'name', None), + ('a (b) (c)', 'name', None)] + + +def test_no_names(def_config): place = PlaceInfo({'address': {'housenumber': '3'}}) - name, address = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place) + name, address = PlaceSanitizer([{'step': 'strip-brace-terms'}], def_config).process_names(place) assert not name assert len(address) == 1 diff --git a/test/python/tokenizer/sanitizers/test_tag_analyzer_by_language.py b/test/python/tokenizer/sanitizers/test_tag_analyzer_by_language.py index 306b8027..1feecf3f 100644 --- a/test/python/tokenizer/sanitizers/test_tag_analyzer_by_language.py +++ b/test/python/tokenizer/sanitizers/test_tag_analyzer_by_language.py @@ -15,11 +15,16 @@ from nominatim.data.country_info import setup_country_config class TestWithDefaults: - @staticmethod - def run_sanitizer_on(country, **kwargs): + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config + + + def run_sanitizer_on(self, country, **kwargs): place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, 'country_code': country}) - name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language'}]).process_names(place) + name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language'}], + self.config).process_names(place) return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name]) @@ -44,12 +49,17 @@ class TestWithDefaults: class TestFilterKind: - @staticmethod - def run_sanitizer_on(filt, **kwargs): + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config + + + def run_sanitizer_on(self, filt, **kwargs): place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, 'country_code': 'de'}) name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language', - 'filter-kind': filt}]).process_names(place) + 'filter-kind': filt}], + self.config).process_names(place) return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name]) @@ -94,14 +104,16 @@ class TestDefaultCountry: @pytest.fixture(autouse=True) def setup_country(self, def_config): setup_country_config(def_config) + self.config = def_config + - @staticmethod - def run_sanitizer_append(mode, country, **kwargs): + def run_sanitizer_append(self, mode, country, **kwargs): place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, 'country_code': country}) name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language', 'use-defaults': mode, - 'mode': 'append'}]).process_names(place) + 'mode': 'append'}], + self.config).process_names(place) assert all(isinstance(p.attr, dict) for p in name) assert all(len(p.attr) <= 1 for p in name) @@ -111,13 +123,13 @@ class TestDefaultCountry: return sorted([(p.name, p.attr.get('analyzer', '')) for p in name]) - @staticmethod - def run_sanitizer_replace(mode, country, **kwargs): + def run_sanitizer_replace(self, mode, country, **kwargs): place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, 'country_code': country}) name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language', 'use-defaults': mode, - 'mode': 'replace'}]).process_names(place) + 'mode': 'replace'}], + self.config).process_names(place) assert all(isinstance(p.attr, dict) for p in name) assert all(len(p.attr) <= 1 for p in name) @@ -131,7 +143,8 @@ class TestDefaultCountry: place = PlaceInfo({'name': {'name': 'something'}}) name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language', 'use-defaults': 'all', - 'mode': 'replace'}]).process_names(place) + 'mode': 'replace'}], + self.config).process_names(place) assert len(name) == 1 assert name[0].name == 'something' @@ -199,14 +212,19 @@ class TestDefaultCountry: class TestCountryWithWhitelist: - @staticmethod - def run_sanitizer_on(mode, country, **kwargs): + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config + + + def run_sanitizer_on(self, mode, country, **kwargs): place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, 'country_code': country}) name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language', 'use-defaults': mode, 'mode': 'replace', - 'whitelist': ['de', 'fr', 'ru']}]).process_names(place) + 'whitelist': ['de', 'fr', 'ru']}], + self.config).process_names(place) assert all(isinstance(p.attr, dict) for p in name) assert all(len(p.attr) <= 1 for p in name) @@ -238,12 +256,17 @@ class TestCountryWithWhitelist: class TestWhiteList: - @staticmethod - def run_sanitizer_on(whitelist, **kwargs): + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config + + + def run_sanitizer_on(self, whitelist, **kwargs): place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}}) name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language', 'mode': 'replace', - 'whitelist': whitelist}]).process_names(place) + 'whitelist': whitelist}], + self.config).process_names(place) assert all(isinstance(p.attr, dict) for p in name) assert all(len(p.attr) <= 1 for p in name) diff --git a/test/python/tokenizer/test_place_sanitizer.py b/test/python/tokenizer/test_place_sanitizer.py index 31401bd1..3dd3033c 100644 --- a/test/python/tokenizer/test_place_sanitizer.py +++ b/test/python/tokenizer/test_place_sanitizer.py @@ -47,8 +47,8 @@ def test_placeinfo_has_attr(): assert not place.has_attr('whatever') -def test_sanitizer_default(): - san = sanitizer.PlaceSanitizer([{'step': 'split-name-list'}]) +def test_sanitizer_default(def_config): + san = sanitizer.PlaceSanitizer([{'step': 'split-name-list'}], def_config) name, address = san.process_names(PlaceInfo({'name': {'name:de:de': '1;2;3'}, 'address': {'street': 'Bald'}})) @@ -63,8 +63,8 @@ def test_sanitizer_default(): @pytest.mark.parametrize('rules', [None, []]) -def test_sanitizer_empty_list(rules): - san = sanitizer.PlaceSanitizer(rules) +def test_sanitizer_empty_list(def_config, rules): + san = sanitizer.PlaceSanitizer(rules, def_config) name, address = san.process_names(PlaceInfo({'name': {'name:de:de': '1;2;3'}})) @@ -72,6 +72,6 @@ def test_sanitizer_empty_list(rules): assert all(isinstance(n, sanitizer.PlaceName) for n in name) -def test_sanitizer_missing_step_definition(): +def test_sanitizer_missing_step_definition(def_config): with pytest.raises(UsageError): - san = sanitizer.PlaceSanitizer([{'id': 'split-name-list'}]) + san = sanitizer.PlaceSanitizer([{'id': 'split-name-list'}], def_config)