From: Sarah Hoffmann Date: Thu, 7 Jul 2022 14:12:11 +0000 (+0200) Subject: Merge pull request #2760 from lonvia/reorganize-data-classes X-Git-Tag: v4.1.0~15 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/300612c5a8ebfa9eff99b7f88cfcf4e0ed2fbbfc?hp=69e51aebab23787b572998ba183910921a16bc02 Merge pull request #2760 from lonvia/reorganize-data-classes Code cleanup: move some common code into the data submodule --- diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index f0ec358b..73095468 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -63,8 +63,9 @@ class SetupAll: @staticmethod - def run(args): - from ..tools import database_import, refresh, postcodes, freeze, country_info + def run(args): # pylint: disable=too-many-statements + from ..data import country_info + from ..tools import database_import, refresh, postcodes, freeze from ..indexer.indexer import Indexer country_info.setup_country_config(args.config) diff --git a/nominatim/tools/country_info.py b/nominatim/data/country_info.py similarity index 100% rename from nominatim/tools/country_info.py rename to nominatim/data/country_info.py diff --git a/nominatim/indexer/place_info.py b/nominatim/data/place_info.py similarity index 87% rename from nominatim/indexer/place_info.py rename to nominatim/data/place_info.py index 87ecb731..d2ba3979 100644 --- a/nominatim/indexer/place_info.py +++ b/nominatim/data/place_info.py @@ -9,8 +9,6 @@ Wrapper around place information the indexer gets from the database and hands to the tokenizer. """ -import psycopg2.extras - class PlaceInfo: """ Data class containing all information the tokenizer gets about a place it should process the names for. @@ -20,13 +18,6 @@ class PlaceInfo: self._info = info - def analyze(self, analyzer): - """ Process this place with the given tokenizer and return the - result in psycopg2-compatible Json. - """ - return psycopg2.extras.Json(analyzer.process_place(self)) - - @property def name(self): """ A dictionary with the names of the place or None if the place diff --git a/nominatim/data/postcode_format.py b/nominatim/data/postcode_format.py index 6ae43b7d..366ea505 100644 --- a/nominatim/data/postcode_format.py +++ b/nominatim/data/postcode_format.py @@ -11,7 +11,7 @@ format. import re from nominatim.errors import UsageError -from nominatim.tools import country_info +from nominatim.data import country_info class CountryPostcodeMatcher: """ Matches and formats a postcode according to a format definition diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py index 9a30ffe6..c8495ee4 100644 --- a/nominatim/indexer/runners.py +++ b/nominatim/indexer/runners.py @@ -11,14 +11,17 @@ tasks. import functools from psycopg2 import sql as pysql +import psycopg2.extras -from nominatim.indexer.place_info import PlaceInfo +from nominatim.data.place_info import PlaceInfo # pylint: disable=C0111 def _mk_valuelist(template, num): return pysql.SQL(',').join([pysql.SQL(template)] * num) +def _analyze_place(place, analyzer): + return psycopg2.extras.Json(analyzer.process_place(PlaceInfo(place))) class AbstractPlacexRunner: """ Returns SQL commands for indexing of the placex table. @@ -56,7 +59,7 @@ class AbstractPlacexRunner: for place in places: for field in ('place_id', 'name', 'address', 'linked_place_id'): values.append(place[field]) - values.append(PlaceInfo(place).analyze(self.analyzer)) + values.append(_analyze_place(place, self.analyzer)) worker.perform(self._index_sql(len(places)), values) @@ -150,7 +153,7 @@ class InterpolationRunner: values = [] for place in places: values.extend((place[x] for x in ('place_id', 'address'))) - values.append(PlaceInfo(place).analyze(self.analyzer)) + values.append(_analyze_place(place, self.analyzer)) worker.perform(self._index_sql(len(places)), values) diff --git a/nominatim/tokenizer/base.py b/nominatim/tokenizer/base.py index f81b3bc2..70a54bfd 100644 --- a/nominatim/tokenizer/base.py +++ b/nominatim/tokenizer/base.py @@ -12,7 +12,7 @@ from abc import ABC, abstractmethod from typing import List, Tuple, Dict, Any from nominatim.config import Configuration -from nominatim.indexer.place_info import PlaceInfo +from nominatim.data.place_info import PlaceInfo # pylint: disable=unnecessary-pass diff --git a/nominatim/tokenizer/icu_rule_loader.py b/nominatim/tokenizer/icu_rule_loader.py index 8a564355..035b6698 100644 --- a/nominatim/tokenizer/icu_rule_loader.py +++ b/nominatim/tokenizer/icu_rule_loader.py @@ -17,7 +17,7 @@ from nominatim.db.properties import set_property, get_property from nominatim.errors import UsageError from nominatim.tokenizer.place_sanitizer import PlaceSanitizer from nominatim.tokenizer.icu_token_analysis import ICUTokenAnalysis -import nominatim.tools.country_info +import nominatim.data.country_info LOG = logging.getLogger() @@ -46,7 +46,7 @@ class ICURuleLoader: config='TOKENIZER_CONFIG') # Make sure country information is available to analyzers and sanitizers. - nominatim.tools.country_info.setup_country_config(config) + nominatim.data.country_info.setup_country_config(config) self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization') self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration') diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index a6ff08a4..171d4392 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -16,7 +16,7 @@ from textwrap import dedent from nominatim.db.connection import connect from nominatim.db.utils import CopyBuffer from nominatim.db.sql_preprocessor import SQLPreprocessor -from nominatim.indexer.place_info import PlaceInfo +from nominatim.data.place_info import PlaceInfo from nominatim.tokenizer.icu_rule_loader import ICURuleLoader from nominatim.tokenizer.base import AbstractAnalyzer, AbstractTokenizer diff --git a/nominatim/tokenizer/sanitizers/tag_analyzer_by_language.py b/nominatim/tokenizer/sanitizers/tag_analyzer_by_language.py index 9a99d127..d3413c1a 100644 --- a/nominatim/tokenizer/sanitizers/tag_analyzer_by_language.py +++ b/nominatim/tokenizer/sanitizers/tag_analyzer_by_language.py @@ -30,7 +30,7 @@ Arguments: any analyzer tagged) is retained. (default: replace) """ -from nominatim.tools import country_info +from nominatim.data import country_info class _AnalyzerByLanguage: """ Processor for tagging the language of names in a place. diff --git a/nominatim/tools/tiger_data.py b/nominatim/tools/tiger_data.py index 6e37df5e..e78dcd8f 100644 --- a/nominatim/tools/tiger_data.py +++ b/nominatim/tools/tiger_data.py @@ -13,11 +13,13 @@ import logging import os import tarfile +from psycopg2.extras import Json + from nominatim.db.connection import connect from nominatim.db.async_connection import WorkerPool from nominatim.db.sql_preprocessor import SQLPreprocessor from nominatim.errors import UsageError -from nominatim.indexer.place_info import PlaceInfo +from nominatim.data.place_info import PlaceInfo LOG = logging.getLogger() @@ -87,7 +89,7 @@ def handle_threaded_sql_statements(pool, fd, analyzer): address = dict(street=row['street'], postcode=row['postcode']) args = ('SRID=4326;' + row['geometry'], int(row['from']), int(row['to']), row['interpolation'], - PlaceInfo({'address': address}).analyze(analyzer), + Json(analyzer.process_place(PlaceInfo({'address': address}))), analyzer.normalize_postcode(row['postcode'])) except ValueError: continue diff --git a/test/python/cli/test_cmd_import.py b/test/python/cli/test_cmd_import.py index 84c7743a..d545c760 100644 --- a/test/python/cli/test_cmd_import.py +++ b/test/python/cli/test_cmd_import.py @@ -10,7 +10,7 @@ Tests for import command of the command-line interface wrapper. import pytest import nominatim.tools.database_import -import nominatim.tools.country_info +import nominatim.data.country_info import nominatim.tools.refresh import nominatim.tools.postcodes import nominatim.indexer.indexer @@ -37,7 +37,7 @@ class TestCliImportWithDb: def test_import_full(self, mock_func_factory, with_updates, place_table, property_table): mocks = [ mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'), - mock_func_factory(nominatim.tools.country_info, 'setup_country_tables'), + mock_func_factory(nominatim.data.country_info, 'setup_country_tables'), mock_func_factory(nominatim.tools.database_import, 'import_osm_data'), mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'), mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'), @@ -46,7 +46,7 @@ class TestCliImportWithDb: mock_func_factory(nominatim.tools.database_import, 'create_table_triggers'), mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'), mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), - mock_func_factory(nominatim.tools.country_info, 'create_country_names'), + mock_func_factory(nominatim.data.country_info, 'create_country_names'), mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_config'), mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), @@ -76,7 +76,7 @@ class TestCliImportWithDb: mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'), mock_func_factory(nominatim.tools.database_import, 'load_data'), mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), - mock_func_factory(nominatim.tools.country_info, 'create_country_names'), + mock_func_factory(nominatim.data.country_info, 'create_country_names'), mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), mock_func_factory(nominatim.tools.refresh, 'setup_website'), @@ -94,7 +94,7 @@ class TestCliImportWithDb: temp_db_conn): mocks = [ mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), - mock_func_factory(nominatim.tools.country_info, 'create_country_names'), + mock_func_factory(nominatim.data.country_info, 'create_country_names'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), mock_func_factory(nominatim.tools.refresh, 'setup_website'), mock_func_factory(nominatim.db.properties, 'set_property') @@ -115,7 +115,7 @@ class TestCliImportWithDb: def test_import_continue_postprocess(self, mock_func_factory): mocks = [ mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), - mock_func_factory(nominatim.tools.country_info, 'create_country_names'), + mock_func_factory(nominatim.data.country_info, 'create_country_names'), mock_func_factory(nominatim.tools.refresh, 'setup_website'), mock_func_factory(nominatim.db.properties, 'set_property') ] diff --git a/test/python/tools/test_country_info.py b/test/python/data/test_country_info.py similarity index 99% rename from test/python/tools/test_country_info.py rename to test/python/data/test_country_info.py index 3f00d54e..2234f40d 100644 --- a/test/python/tools/test_country_info.py +++ b/test/python/data/test_country_info.py @@ -10,7 +10,7 @@ Tests for function that handle country properties. from textwrap import dedent import pytest -from nominatim.tools import country_info +from nominatim.data import country_info @pytest.fixture def loaded_country(def_config): diff --git a/test/python/dummy_tokenizer.py b/test/python/dummy_tokenizer.py index 57bf2f2f..ed32c650 100644 --- a/test/python/dummy_tokenizer.py +++ b/test/python/dummy_tokenizer.py @@ -7,7 +7,7 @@ """ Tokenizer for testing. """ -from nominatim.indexer.place_info import PlaceInfo +from nominatim.data.place_info import PlaceInfo from nominatim.config import Configuration def create(dsn, data_dir): diff --git a/test/python/tokenizer/sanitizers/test_clean_housenumbers.py b/test/python/tokenizer/sanitizers/test_clean_housenumbers.py index 34cc7413..128e1201 100644 --- a/test/python/tokenizer/sanitizers/test_clean_housenumbers.py +++ b/test/python/tokenizer/sanitizers/test_clean_housenumbers.py @@ -10,7 +10,7 @@ Tests for the sanitizer that normalizes housenumbers. import pytest from nominatim.tokenizer.place_sanitizer import PlaceSanitizer -from nominatim.indexer.place_info import PlaceInfo +from nominatim.data.place_info import PlaceInfo @pytest.fixture def sanitize(request): diff --git a/test/python/tokenizer/sanitizers/test_clean_postcodes.py b/test/python/tokenizer/sanitizers/test_clean_postcodes.py index 44376196..237527f1 100644 --- a/test/python/tokenizer/sanitizers/test_clean_postcodes.py +++ b/test/python/tokenizer/sanitizers/test_clean_postcodes.py @@ -10,8 +10,8 @@ Tests for the sanitizer that normalizes postcodes. import pytest from nominatim.tokenizer.place_sanitizer import PlaceSanitizer -from nominatim.indexer.place_info import PlaceInfo -from nominatim.tools import country_info +from nominatim.data.place_info import PlaceInfo +from nominatim.data import country_info @pytest.fixture def sanitize(def_config, request): diff --git a/test/python/tokenizer/sanitizers/test_split_name_list.py b/test/python/tokenizer/sanitizers/test_split_name_list.py index 47bd1e44..67157fba 100644 --- a/test/python/tokenizer/sanitizers/test_split_name_list.py +++ b/test/python/tokenizer/sanitizers/test_split_name_list.py @@ -10,7 +10,7 @@ Tests for the sanitizer that splits multivalue lists. import pytest from nominatim.tokenizer.place_sanitizer import PlaceSanitizer -from nominatim.indexer.place_info import PlaceInfo +from nominatim.data.place_info import PlaceInfo from nominatim.errors import UsageError diff --git a/test/python/tokenizer/sanitizers/test_strip_brace_terms.py b/test/python/tokenizer/sanitizers/test_strip_brace_terms.py index c37562dd..eb554364 100644 --- a/test/python/tokenizer/sanitizers/test_strip_brace_terms.py +++ b/test/python/tokenizer/sanitizers/test_strip_brace_terms.py @@ -10,7 +10,7 @@ Tests for the sanitizer that handles braced suffixes. import pytest from nominatim.tokenizer.place_sanitizer import PlaceSanitizer -from nominatim.indexer.place_info import PlaceInfo +from nominatim.data.place_info import PlaceInfo def run_sanitizer_on(**kwargs): place = PlaceInfo({'name': kwargs}) diff --git a/test/python/tokenizer/sanitizers/test_tag_analyzer_by_language.py b/test/python/tokenizer/sanitizers/test_tag_analyzer_by_language.py index dfd25113..306b8027 100644 --- a/test/python/tokenizer/sanitizers/test_tag_analyzer_by_language.py +++ b/test/python/tokenizer/sanitizers/test_tag_analyzer_by_language.py @@ -9,9 +9,9 @@ Tests for the sanitizer that enables language-dependent analyzers. """ import pytest -from nominatim.indexer.place_info import PlaceInfo +from nominatim.data.place_info import PlaceInfo from nominatim.tokenizer.place_sanitizer import PlaceSanitizer -from nominatim.tools.country_info import setup_country_config +from nominatim.data.country_info import setup_country_config class TestWithDefaults: diff --git a/test/python/tokenizer/test_icu.py b/test/python/tokenizer/test_icu.py index b9de97bc..7f0ffce1 100644 --- a/test/python/tokenizer/test_icu.py +++ b/test/python/tokenizer/test_icu.py @@ -17,7 +17,7 @@ from nominatim.tokenizer import icu_tokenizer import nominatim.tokenizer.icu_rule_loader from nominatim.db import properties from nominatim.db.sql_preprocessor import SQLPreprocessor -from nominatim.indexer.place_info import PlaceInfo +from nominatim.data.place_info import PlaceInfo from mock_icu_word_table import MockIcuWordTable diff --git a/test/python/tokenizer/test_legacy.py b/test/python/tokenizer/test_legacy.py index 8f79e242..57a82b8a 100644 --- a/test/python/tokenizer/test_legacy.py +++ b/test/python/tokenizer/test_legacy.py @@ -12,7 +12,7 @@ import re import pytest -from nominatim.indexer.place_info import PlaceInfo +from nominatim.data.place_info import PlaceInfo from nominatim.tokenizer import legacy_tokenizer from nominatim.db import properties from nominatim.errors import UsageError diff --git a/test/python/tokenizer/test_place_sanitizer.py b/test/python/tokenizer/test_place_sanitizer.py index 2dd4e58a..31401bd1 100644 --- a/test/python/tokenizer/test_place_sanitizer.py +++ b/test/python/tokenizer/test_place_sanitizer.py @@ -11,7 +11,7 @@ import pytest from nominatim.errors import UsageError import nominatim.tokenizer.place_sanitizer as sanitizer -from nominatim.indexer.place_info import PlaceInfo +from nominatim.data.place_info import PlaceInfo def test_placeinfo_clone_new_name(): diff --git a/test/python/tools/test_postcodes.py b/test/python/tools/test_postcodes.py index 0c4b93fc..f5e8f3de 100644 --- a/test/python/tools/test_postcodes.py +++ b/test/python/tools/test_postcodes.py @@ -11,7 +11,8 @@ import subprocess import pytest -from nominatim.tools import postcodes, country_info +from nominatim.tools import postcodes +from nominatim.data import country_info import dummy_tokenizer class MockPostcodeTable: