From: Sarah Hoffmann Date: Thu, 7 Dec 2023 08:21:59 +0000 (+0100) Subject: Merge pull request #3268 from mtmail/wikipedia-file-path-warning X-Git-Tag: v4.4.0~39 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/3969ce0f558d7df0afe14a80af5bfe0eaa0fcc99?hp=1c1447e709d0b29b68284cfafbe2c3918bd176cd Merge pull request #3268 from mtmail/wikipedia-file-path-warning Improve error message when Wikipedia importance file is not found --- diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index a7d45260..42c03edc 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -349,3 +349,95 @@ jobs: - name: Clean up database (reverse-only import) run: nominatim refresh --postcodes --word-tokens working-directory: /home/nominatim/nominatim-project + + install-no-superuser: + runs-on: ubuntu-latest + needs: create-archive + + strategy: + matrix: + name: [Ubuntu-22] + include: + - name: Ubuntu-22 + image: "ubuntu:22.04" + ubuntu: 22 + install_mode: install-apache + + container: + image: ${{ matrix.image }} + env: + LANG: en_US.UTF-8 + + defaults: + run: + shell: sudo -Hu nominatim bash --noprofile --norc -eo pipefail {0} + + steps: + - name: Prepare container (Ubuntu) + run: | + export APT_LISTCHANGES_FRONTEND=none + export DEBIAN_FRONTEND=noninteractive + apt-get update -qq + apt-get install -y git sudo wget + ln -snf /usr/share/zoneinfo/$CONTAINER_TIMEZONE /etc/localtime && echo $CONTAINER_TIMEZONE > /etc/timezone + shell: bash + + - name: Setup import user + run: | + useradd -m nominatim + echo 'nominatim ALL=(ALL:ALL) NOPASSWD: ALL' > /etc/sudoers.d/nominiatim + echo "/home/nominatim/Nominatim/vagrant/Install-on-${OS}.sh no $INSTALL_MODE" > /home/nominatim/vagrant.sh + shell: bash + env: + OS: ${{ matrix.name }} + INSTALL_MODE: ${{ matrix.install_mode }} + + - uses: actions/download-artifact@v3 + with: + name: full-source + path: /home/nominatim + + - name: Install Nominatim + run: | + export USERNAME=nominatim + export USERHOME=/home/nominatim + export NOSYSTEMD=yes + export HAVE_SELINUX=no + tar xf nominatim-src.tar.bz2 + . vagrant.sh + working-directory: /home/nominatim + + - name: Prepare import environment + run: | + mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf + mv Nominatim/settings/flex-base.lua flex-base.lua + mv Nominatim/settings/import-extratags.lua import-extratags.lua + mv Nominatim/settings/taginfo.lua taginfo.lua + rm -rf Nominatim + mkdir data-env-reverse + working-directory: /home/nominatim + + - name: Prepare Database + run: | + nominatim import --prepare-database + working-directory: /home/nominatim/nominatim-project + + - name: Create import user + run: | + sudo -u postgres createuser -S osm-import + sudo -u postgres psql -c "ALTER USER \"osm-import\" WITH PASSWORD 'osm-import';" + working-directory: /home/nominatim/nominatim-project + + - name: Grant import user rights + run: | + sudo -u postgres psql -c "GRANT INSERT, UPDATE ON ALL TABLES IN SCHEMA public TO \"osm-import\";" + working-directory: /home/nominatim/nominatim-project + + - name: Run import + run: | + NOMINATIM_DATABASE_DSN="pgsql:host=127.0.0.1;dbname=nominatim;user=osm-import;password=osm-import" nominatim import --continue import-from-file --osm-file ../test.pbf + working-directory: /home/nominatim/nominatim-project + + - name: Check full import + run: nominatim admin --check-database + working-directory: /home/nominatim/nominatim-project \ No newline at end of file diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py index eb3a3b61..433435bc 100644 --- a/nominatim/clicmd/args.py +++ b/nominatim/clicmd/args.py @@ -87,6 +87,7 @@ class NominatimArgs: offline: bool ignore_errors: bool index_noanalyse: bool + prepare_database: bool # Arguments to 'index' boundaries_only: bool diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index 8464e151..3d212ff9 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -40,13 +40,15 @@ class SetupAll: def add_args(self, parser: argparse.ArgumentParser) -> None: group_name = parser.add_argument_group('Required arguments') - group1 = group_name.add_mutually_exclusive_group(required=True) + group1 = group_name.add_argument_group() group1.add_argument('--osm-file', metavar='FILE', action='append', help='OSM file to be imported' - ' (repeat for importing multiple files)') + ' (repeat for importing multiple files)', + default=None) group1.add_argument('--continue', dest='continue_at', - choices=['load-data', 'indexing', 'db-postprocess'], - help='Continue an import that was interrupted') + choices=['import-from-file', 'load-data', 'indexing', 'db-postprocess'], + help='Continue an import that was interrupted', + default=None) group2 = parser.add_argument_group('Optional arguments') group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, help='Size of cache to be used by osm2pgsql (in MB)') @@ -65,9 +67,11 @@ class SetupAll: help='Continue import even when errors in SQL are present') group3.add_argument('--index-noanalyse', action='store_true', help='Do not perform analyse operations during index (expert only)') + group3.add_argument('--prepare-database', action='store_true', + help='Create the database but do not import any data') - def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements + def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements, too-many-branches from ..data import country_info from ..tools import database_import, refresh, postcodes, freeze from ..indexer.indexer import Indexer @@ -76,43 +80,61 @@ class SetupAll: country_info.setup_country_config(args.config) - if args.continue_at is None: + if args.osm_file is None and args.continue_at is None and not args.prepare_database: + raise UsageError("No input files (use --osm-file).") + + if args.osm_file is not None and args.continue_at not in ('import-from-file', None): + raise UsageError(f"Cannot use --continue {args.continue_at} and --osm-file together.") + + if args.continue_at is not None and args.prepare_database: + raise UsageError( + "Cannot use --continue and --prepare-database together." + ) + + + if args.prepare_database or args.continue_at is None: + LOG.warning('Creating database') + database_import.setup_database_skeleton(args.config.get_libpq_dsn(), + rouser=args.config.DATABASE_WEBUSER) + if args.prepare_database: + return 0 + + if args.continue_at in (None, 'import-from-file'): files = args.get_osm_file_list() if not files: raise UsageError("No input files (use --osm-file).") - LOG.warning('Creating database') - database_import.setup_database_skeleton(args.config.get_libpq_dsn(), - rouser=args.config.DATABASE_WEBUSER) - - LOG.warning('Setting up country tables') - country_info.setup_country_tables(args.config.get_libpq_dsn(), - args.config.lib_dir.data, - args.no_partitions) - - LOG.warning('Importing OSM data file') - database_import.import_osm_data(files, - args.osm2pgsql_options(0, 1), - drop=args.no_updates, - ignore_errors=args.ignore_errors) - - LOG.warning('Importing wikipedia importance data') - data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir) - if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(), - data_path) > 0: - LOG.error('Wikipedia importance dump file not found. ' - 'Calculating importance values of locations will not ' - 'use Wikipedia importance data.') - - LOG.warning('Importing secondary importance raster data') - if refresh.import_secondary_importance(args.config.get_libpq_dsn(), - args.project_dir) != 0: - LOG.error('Secondary importance file not imported. ' - 'Falling back to default ranking.') - - self._setup_tables(args.config, args.reverse_only) - - if args.continue_at is None or args.continue_at == 'load-data': + if args.continue_at in ('import-from-file', None): + # Check if the correct plugins are installed + database_import.check_existing_database_plugins(args.config.get_libpq_dsn()) + LOG.warning('Setting up country tables') + country_info.setup_country_tables(args.config.get_libpq_dsn(), + args.config.lib_dir.data, + args.no_partitions) + + LOG.warning('Importing OSM data file') + database_import.import_osm_data(files, + args.osm2pgsql_options(0, 1), + drop=args.no_updates, + ignore_errors=args.ignore_errors) + + LOG.warning('Importing wikipedia importance data') + data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir) + if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(), + data_path) > 0: + LOG.error('Wikipedia importance dump file not found. ' + 'Calculating importance values of locations will not ' + 'use Wikipedia importance data.') + + LOG.warning('Importing secondary importance raster data') + if refresh.import_secondary_importance(args.config.get_libpq_dsn(), + args.project_dir) != 0: + LOG.error('Secondary importance file not imported. ' + 'Falling back to default ranking.') + + self._setup_tables(args.config, args.reverse_only) + + if args.continue_at in ('import-from-file', 'load-data', None): LOG.warning('Initialise tables') with connect(args.config.get_libpq_dsn()) as conn: database_import.truncate_data_tables(conn) @@ -123,12 +145,13 @@ class SetupAll: LOG.warning("Setting up tokenizer") tokenizer = self._get_tokenizer(args.continue_at, args.config) - if args.continue_at is None or args.continue_at == 'load-data': + if args.continue_at in ('import-from-file', 'load-data', None): LOG.warning('Calculate postcodes') postcodes.update_postcodes(args.config.get_libpq_dsn(), args.project_dir, tokenizer) - if args.continue_at is None or args.continue_at in ('load-data', 'indexing'): + if args.continue_at in \ + ('import-from-file', 'load-data', 'indexing', None): LOG.warning('Indexing places') indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads) indexer.index_full(analyse=not args.index_noanalyse) @@ -185,7 +208,7 @@ class SetupAll: """ from ..tokenizer import factory as tokenizer_factory - if continue_at is None or continue_at == 'load-data': + if continue_at in ('import-from-file', 'load-data', None): # (re)initialise the tokenizer data return tokenizer_factory.create_tokenizer(config) diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py index fce897bc..82801ae7 100644 --- a/nominatim/db/connection.py +++ b/nominatim/db/connection.py @@ -174,6 +174,15 @@ class Connection(psycopg2.extensions.connection): return (int(version_parts[0]), int(version_parts[1])) + + def extension_loaded(self, extension_name: str) -> bool: + """ Return True if the hstore extension is loaded in the database. + """ + with self.cursor() as cur: + cur.execute('SELECT extname FROM pg_extension WHERE extname = %s', (extension_name, )) + return cur.rowcount > 0 + + class ConnectionContext(ContextManager[Connection]): """ Context manager of the connection that also provides direct access to the underlying connection. diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index cb620d41..de7e6a4a 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -23,7 +23,8 @@ from nominatim.db.async_connection import DBConnection from nominatim.db.sql_preprocessor import SQLPreprocessor from nominatim.tools.exec_utils import run_osm2pgsql from nominatim.errors import UsageError -from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION +from nominatim.version import POSTGRESQL_REQUIRED_VERSION, \ + POSTGIS_REQUIRED_VERSION LOG = logging.getLogger() @@ -38,6 +39,25 @@ def _require_version(module: str, actual: Tuple[int, int], expected: Tuple[int, raise UsageError(f'{module} is too old.') +def _require_loaded(extension_name: str, conn: Connection) -> None: + """ Check that the given extension is loaded. """ + if not conn.extension_loaded(extension_name): + LOG.fatal('Required module %s is not loaded.', extension_name) + raise UsageError(f'{extension_name} is not loaded.') + + +def check_existing_database_plugins(dsn: str) -> None: + """ Check that the database has the required plugins installed.""" + with connect(dsn) as conn: + _require_version('PostgreSQL server', + conn.server_version_tuple(), + POSTGRESQL_REQUIRED_VERSION) + _require_version('PostGIS', + conn.postgis_version_tuple(), + POSTGIS_REQUIRED_VERSION) + _require_loaded('hstore', conn) + + def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None: """ Create a new database for Nominatim and populate it with the essential extensions.