From 9d83da830f349b746c72b1df2a7526d517552b72 Mon Sep 17 00:00:00 2001 From: AntoJvlt Date: Mon, 10 May 2021 23:09:00 +0200 Subject: [PATCH] Introduction of SPCsvLoader to load special phrases from a csv file --- nominatim/clicmd/special_phrases.py | 43 +++++++++------ nominatim/tools/__init__.py | 4 -- .../tools/special_phrases/sp_csv_loader.py | 54 +++++++++++++++++++ .../tools/special_phrases/sp_wiki_loader.py | 2 +- test/python/test_cli.py | 2 +- .../test_tools_import_special_phrases.py | 15 +++--- 6 files changed, 92 insertions(+), 28 deletions(-) create mode 100644 nominatim/tools/special_phrases/sp_csv_loader.py diff --git a/nominatim/clicmd/special_phrases.py b/nominatim/clicmd/special_phrases.py index 0de8b239..ecd01c91 100644 --- a/nominatim/clicmd/special_phrases.py +++ b/nominatim/clicmd/special_phrases.py @@ -2,11 +2,12 @@ Implementation of the 'special-phrases' command. """ import logging -from nominatim.errors import UsageError from pathlib import Path -from nominatim.tools import SPWikiLoader -from nominatim.tools import SPImporter +from nominatim.errors import UsageError from nominatim.db.connection import connect +from nominatim.tools.special_phrases.sp_importer import SPImporter +from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader +from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader LOG = logging.getLogger() @@ -24,23 +25,33 @@ class ImportSpecialPhrases: group = parser.add_argument_group('Input arguments') group.add_argument('--import-from-wiki', action='store_true', help='Import special phrases from the OSM wiki to the database.') - group.add_argument('--csv-file', metavar='FILE', - help='CSV file containing phrases to import.') + group.add_argument('--import-from-csv', metavar='FILE', + help='Import special phrases from a CSV file.') @staticmethod def run(args): - from ..tokenizer import factory as tokenizer_factory - if args.import_from_wiki: - tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) - with connect(args.config.get_libpq_dsn()) as db_connection: - SPImporter( - args.config, args.phplib_dir, db_connection, SPWikiLoader(args.config) - ).import_phrases(tokenizer) - - if args.csv_file: - if not Path(args.csv_file).is_file(): - LOG.fatal("CSV file '%s' does not exist.", args.csv_file) + ImportSpecialPhrases.start_import(args, SPWikiLoader(args.config)) + + if args.import_from_csv: + if not Path(args.import_from_csv).is_file(): + LOG.fatal("CSV file '%s' does not exist.", args.import_from_csv) raise UsageError('Cannot access file.') + ImportSpecialPhrases.start_import(args, SPCsvLoader(args.import_from_csv)) + return 0 + + @staticmethod + def start_import(args, loader): + """ + Create the SPImporter object containing the right + SPLoader and then start the import of special phrases. + """ + from ..tokenizer import factory as tokenizer_factory + + tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) + with connect(args.config.get_libpq_dsn()) as db_connection: + SPImporter( + args.config, args.phplib_dir, db_connection, loader + ).import_phrases(tokenizer) diff --git a/nominatim/tools/__init__.py b/nominatim/tools/__init__.py index 76feb3be..cab6fb8b 100644 --- a/nominatim/tools/__init__.py +++ b/nominatim/tools/__init__.py @@ -2,7 +2,3 @@ Module with functions for importing, updating Nominatim databases as well as general maintenance helpers. """ - -from nominatim.tools.special_phrases.sp_importer import SPImporter -from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader -from nominatim.tools.special_phrases.special_phrase import SpecialPhrase diff --git a/nominatim/tools/special_phrases/sp_csv_loader.py b/nominatim/tools/special_phrases/sp_csv_loader.py new file mode 100644 index 00000000..cd0c2a84 --- /dev/null +++ b/nominatim/tools/special_phrases/sp_csv_loader.py @@ -0,0 +1,54 @@ +""" + Module containing the SPCsvLoader class. + + The class allows to load phrases from a csv file. +""" +import csv +import os +from nominatim.tools.special_phrases.special_phrase import SpecialPhrase +from nominatim.tools.special_phrases.sp_loader import SPLoader +from nominatim.errors import UsageError + +class SPCsvLoader(SPLoader): + """ + Base class for special phrases loaders. + Handle the loading of special phrases from external sources. + """ + def __init__(self, csv_path): + super().__init__() + self.csv_path = csv_path + self.has_been_read = False + + def __next__(self): + if self.has_been_read: + raise StopIteration() + + self.has_been_read = True + SPCsvLoader.check_csv_validity(self.csv_path) + return SPCsvLoader.parse_csv(self.csv_path) + + @staticmethod + def parse_csv(csv_path): + """ + Open and parse the given csv file. + Create the corresponding SpecialPhrases. + """ + phrases = set() + + with open(csv_path) as file: + reader = csv.DictReader(file, delimiter=',') + for row in reader: + phrases.add( + SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator']) + ) + return phrases + + @staticmethod + def check_csv_validity(csv_path): + """ + Check that the csv file has the right extension. + """ + _, extension = os.path.splitext(csv_path) + + if extension != '.csv': + raise UsageError('The file {} is not a csv file.'.format(csv_path)) diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py index 11e59b45..4990eef2 100644 --- a/nominatim/tools/special_phrases/sp_wiki_loader.py +++ b/nominatim/tools/special_phrases/sp_wiki_loader.py @@ -1,8 +1,8 @@ """ Module containing the SPWikiLoader class. """ -import logging import re +import logging from nominatim.tools.special_phrases.special_phrase import SpecialPhrase from nominatim.tools.special_phrases.sp_loader import SPLoader from nominatim.tools.exec_utils import get_url diff --git a/test/python/test_cli.py b/test/python/test_cli.py index c43dce3b..f4271259 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -256,7 +256,7 @@ def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock, assert rank_mock.called == do_ranks def test_special_phrases_command(temp_db, mock_func_factory, tokenizer_mock): - func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_from_wiki') + func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases') call_nominatim('special-phrases', '--import-from-wiki') diff --git a/test/python/test_tools_import_special_phrases.py b/test/python/test_tools_import_special_phrases.py index f452c353..4cdfe921 100644 --- a/test/python/test_tools_import_special_phrases.py +++ b/test/python/test_tools_import_special_phrases.py @@ -2,14 +2,15 @@ Tests for import special phrases methods of the class SPImporter. """ -from nominatim.tools import SpecialPhrase -from nominatim.tools import SPWikiLoader from nominatim.errors import UsageError from pathlib import Path import tempfile from shutil import copyfile import pytest -from nominatim.tools import SPImporter +from nominatim.tools.special_phrases.sp_importer import SPImporter +from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader +from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader +from nominatim.tools.special_phrases.special_phrase import SpecialPhrase TEST_BASE_DIR = Path(__file__) / '..' / '..' @@ -187,7 +188,7 @@ def test_remove_non_existent_tables_from_db(sp_importer, default_phrases, def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, sp_importer, placex_table, tokenizer_mock): """ - Check that the main import_from_wiki() method is well executed. + Check that the main import_phrases() method is well executed. It should create the place_classtype table, the place_id and centroid indexes, grand access to the web user and executing the SQL functions for amenities. It should also update the database well by deleting or preserving existing entries @@ -200,7 +201,9 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, sp_importer, CREATE TABLE place_classtype_amenity_animal_shelter(); CREATE TABLE place_classtype_wrongclass_wrongtype();""") - monkeypatch.setattr('nominatim.tools.SPWikiLoader._get_wiki_content', mock_get_wiki_content) + monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content', + mock_get_wiki_content) + tokenizer = tokenizer_mock() sp_importer.import_phrases(tokenizer) @@ -244,7 +247,7 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, sp_importer, else: assert not temp_db_cursor.fetchone() -def mock_get_wiki_content(self, lang): +def mock_get_wiki_content(lang): """ Mock the _get_wiki_content() method to return static xml test file content. -- 2.45.1