From: Sarah Hoffmann Date: Fri, 15 Jan 2021 07:56:07 +0000 (+0100) Subject: Merge pull request #2135 from lonvia/python-frontend X-Git-Tag: v3.7.0~57 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/565356613afcc2f6556c8add64fe1304579bb3cd?hp=a74e736283c7871a2c47494f18cc65da6b43bccb Merge pull request #2135 from lonvia/python-frontend Introduce new 'nominatim' all-in-one command-line tool --- diff --git a/.github/actions/build-nominatim/action.yml b/.github/actions/build-nominatim/action.yml index 555d7ee2..3cd826af 100644 --- a/.github/actions/build-nominatim/action.yml +++ b/.github/actions/build-nominatim/action.yml @@ -6,7 +6,7 @@ runs: steps: - name: Install prerequisits run: | - sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev python3-psycopg2 python3-pyosmium php-symfony-dotenv + sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev python3-psycopg2 python3-pyosmium python3-dotenv shell: bash - name: Configure @@ -16,7 +16,7 @@ runs: - name: Build run: | make -j2 all - ./utils/setup.php --setup-website + ./nominatim refresh --website shell: bash working-directory: build diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 4b019bce..e57431c0 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -47,7 +47,7 @@ jobs: - name: Install test prerequsites run: | - sudo apt-get install -y -qq php-codesniffer python3-tidylib + sudo apt-get install -y -qq php-codesniffer sudo pip3 install behave - name: PHP linting @@ -101,25 +101,25 @@ jobs: shell: bash - name: Import - run: php ./utils/setup.php --osm-file ../monaco-latest.osm.pbf --osm2pgsql-cache 500 --all + run: ./nominatim import --osm-file ../monaco-latest.osm.pbf working-directory: build - name: Import special phrases - run: php ./utils/specialphrases.php --wiki-import | psql -d nominatim + run: ./nominatim special-phrases --from-wiki | psql -d nominatim working-directory: build - name: Check import - run: php ./utils/check_import_finished.php + run: ./nominatim check-database working-directory: build - name: Run update run: | - php ./utils/update.php --init-updates - php ./utils/update.php --import-osmosis + ./nominatim replication --init + ./nominatim replication --once working-directory: build - name: Run reverse-only import run : | dropdb nominatim - php ./utils/setup.php --osm-file ../monaco-latest.osm.pbf --reverse-only --all + php ./nominatim import --osm-file ../monaco-latest.osm.pbf --reverse-only working-directory: build diff --git a/CMakeLists.txt b/CMakeLists.txt index b4f4ddb8..1c274594 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,7 +98,6 @@ if (BUILD_IMPORTER) set(CUSTOMSCRIPTS check_import_finished.php country_languages.php - importWikipedia.php export.php query.php setup.php @@ -111,6 +110,9 @@ if (BUILD_IMPORTER) configure_file(${PROJECT_SOURCE_DIR}/cmake/script.tmpl ${PROJECT_BINARY_DIR}/utils/${script_source}) endforeach() + + configure_file(${PROJECT_SOURCE_DIR}/cmake/tool.tmpl + ${PROJECT_BINARY_DIR}/nominatim) endif() #----------------------------------------------------------------------------- diff --git a/cmake/script.tmpl b/cmake/script.tmpl index 5b7bc6f7..30b8717b 100755 --- a/cmake/script.tmpl +++ b/cmake/script.tmpl @@ -1,9 +1,12 @@ #!@PHP_BIN@ -Cq &1` with `${UPDATEFILE} --import-file ${UPDATEDIR}/tmp/combined.osm.pbf --index --index-instances N 2>&1` where N is the numbers of CPUs in your system. -### Setting up multiple regions - Run the following command from your Nominatim directory after configuring the file. bash ./utils/import_multiple_regions.sh diff --git a/docs/admin/Faq.md b/docs/admin/Faq.md index 6203959d..b8b900d0 100644 --- a/docs/admin/Faq.md +++ b/docs/admin/Faq.md @@ -16,7 +16,7 @@ was killed. If it looks like this: then you can resume with the following command: ```sh -./utils/setup.php --index --create-search-indices --create-country-names +nominatim import --continue indexing ``` If the reported rank is 26 or higher, you can also safely add `--index-noanalyse`. @@ -31,7 +31,7 @@ list for hints. If it happened during index creation you can try rerunning the step with ```sh -./utils/setup.php --create-search-indices --ignore-errors +nominatim import --continue indexing ``` Otherwise it's best to start the full setup from the beginning. @@ -93,7 +93,7 @@ on a non-managed machine. ### I see the error: "function transliteration(text) does not exist" -Reinstall the nominatim functions with `setup.php --create--functions` +Reinstall the nominatim functions with `nominatim refresh --functions` and check for any errors, e.g. a missing `nominatim.so` file. ### I see the error: "ERROR: mmap (remap) failed" @@ -113,7 +113,8 @@ Double-check clang is installed. Instead of `make` try running `make CLANG=true` ### nominatim UPDATE failed: ERROR: buffer 179261 is not owned by resource owner Portal -Several users [reported this](https://github.com/openstreetmap/Nominatim/issues/1168) during the initial import of the database. It's +Several users [reported this](https://github.com/openstreetmap/Nominatim/issues/1168) +during the initial import of the database. It's something PostgreSQL internal Nominatim doesn't control. And PostgreSQL forums suggest it's threading related but definitely some kind of crash of a process. Users reported either rebooting the server, different hardware or just trying @@ -202,7 +203,7 @@ See the installation instructions for a full list of required packages. ### I forgot to delete the flatnodes file before starting an import. That's fine. For each import the flatnodes file get overwritten. -See [https://help.openstreetmap.org/questions/52419/nominatim-flatnode-storage]() +See [https://help.openstreetmap.org/questions/52419/nominatim-flatnode-storage](https://help.openstreetmap.org/questions/52419/nominatim-flatnode-storage) for more information. diff --git a/docs/admin/Import.md b/docs/admin/Import.md index 41942d6f..01d01230 100644 --- a/docs/admin/Import.md +++ b/docs/admin/Import.md @@ -8,7 +8,7 @@ software itself, if not return to the [installation page](Installation.md). ## Configuration setup in `.env` The Nominatim server can be customized via a `.env` in the build directory. -This is a file in [dotenv](https://symfony.com/doc/4.3/components/dotenv.html) format +This is a file in [dotenv](https://github.com/theskumar/python-dotenv) format which looks the same as variable settings in a standard shell environment. You can also set the same configuration via environment variables. All settings have a `NOMINATIM_` prefix to avoid conflicts with other environment @@ -47,8 +47,7 @@ The file is about 400MB and adds around 4GB to the Nominatim database. !!! tip If you forgot to download the wikipedia rankings, you can also add importances after the import. Download the files, then run - `./utils/setup.php --import-wikipedia-articles` - and `./utils/update.php --recompute-importance`. + `./nominatim refresh --wiki-data --importance`. ### Great Britain, USA postcodes @@ -85,11 +84,14 @@ that Nominatim cannot compute the areas for some administrative areas. About half of the data in Nominatim's database is not really used for serving the API. It is only there to allow the data to be updated from the latest changes from OSM. For many uses these dynamic updates are not really required. -If you don't plan to apply updates, the dynamic part of the database can be -safely dropped using the following command: +If you don't plan to apply updates, you can run the import with the +`--no-updates` parameter. This will drop the dynamic part of the database as +soon as it is not required anymore. + +You can also drop the dynamic part later using the following command: ``` -./utils/setup.php --drop +./nominatim freeze ``` Note that you still need to provide for sufficient disk space for the initial @@ -155,7 +157,7 @@ Download the data to import. Then issue the following command from the **build directory** to start the import: ```sh -./utils/setup.php --osm-file --all 2>&1 | tee setup.log +./nominatim import --osm-file 2>&1 | tee setup.log ``` ### Notes on full planet imports @@ -196,19 +198,10 @@ reduce the cache size or even consider using a flatnode file. Run this script to verify all required tables and indices got created successfully. ```sh -./utils/check_import_finished.php +./nominatim check-database ``` -### Setting up the website - -Run the following command to set up the configuration file for the API frontend -`settings/settings-frontend.php`. These settings are used in website/*.php files. - -```sh -./utils/setup.php --setup-website -``` -!!! Note - This step is not necessary if you use `--all` option while setting up the DB. +### Testing the installation Now you can try out your installation by running: @@ -231,7 +224,7 @@ planner to make the right decisions. Recomputing them can improve the performanc of forward geocoding in particular under high load. To recompute word counts run: ```sh -./utils/update.php --recompute-word-counts +./nominatim refresh --word-counts ``` This will take a couple of hours for a full planet installation. You can @@ -243,7 +236,7 @@ If you want to be able to search for places by their type through [special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases) you also need to enable these key phrases like this: - ./utils/specialphrases.php --wiki-import > specialphrases.sql + ./nominatim special-phrases --from-wiki > specialphrases.sql psql -d nominatim -f specialphrases.sql Note that this command downloads the phrases from the wiki link above. You @@ -260,13 +253,12 @@ entire US adds about 10GB to your database. 1. Get preprocessed TIGER 2019 data and unpack it into the data directory in your Nominatim sources: - cd Nominatim/data wget https://nominatim.org/data/tiger2019-nominatim-preprocessed.tar.gz tar xf tiger2019-nominatim-preprocessed.tar.gz 2. Import the data into your Nominatim database: - ./utils/setup.php --import-tiger-data + ./nominatim add-data --tiger-data tiger 3. Enable use of the Tiger data in your `.env` by adding: @@ -275,7 +267,7 @@ entire US adds about 10GB to your database. 4. Apply the new settings: ```sh - ./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions + ./nominatim refresh --functions ``` diff --git a/docs/admin/Installation.md b/docs/admin/Installation.md index a577fdcf..c9d000b2 100644 --- a/docs/admin/Installation.md +++ b/docs/admin/Installation.md @@ -38,12 +38,12 @@ For running Nominatim: * [PostgreSQL](https://www.postgresql.org) (9.3+) * [PostGIS](https://postgis.net) (2.2+) - * [Python 3](https://www.python.org/) + * [Python 3](https://www.python.org/) (3.4+) * [Psycopg2](https://www.psycopg.org) * [PHP](https://php.net) (7.0 or later) * PHP-pgsql * PHP-intl (bundled with PHP) - * [PHP Symphony Dotenv](https://symfony.com/doc/4.3/components/dotenv.html) + * [Python Dotenv](https://github.com/theskumar/python-dotenv) For running continuous updates: diff --git a/docs/admin/Migration.md b/docs/admin/Migration.md index 3f330bb9..753fb500 100644 --- a/docs/admin/Migration.md +++ b/docs/admin/Migration.md @@ -6,6 +6,28 @@ to newer versions of Nominatim. SQL statements should be executed from the PostgreSQL commandline. Execute `psql nominatim` to enter command line mode. +## 3.6.0 -> master + +### Introducing `nominatim` command line tool + +The various php utilities have been replaced with a single `nominatim` +command line tool. Make sure to adapt any scripts. There is no direct 1:1 +matching between the old utilities and the commands of nominatim CLI. The +following list gives you a list of nominatim sub-commands that contain +functionality of each script: + +* ./utils/setup.php: `import`, `freeze`, `refresh` +* ./utils/update.php: `replication`, `add-data`, `index`, `refresh` +* ./utils/specialphrases.php: `special-phrases` +* ./utils/check_import_finished.php: `check-database` +* ./utils/warm.php: `warm` +* ./utils/export.php: `export` + +Try `nominatim --help` for more information about each subcommand. + +`./utils/query.php` no longer exists in its old form. `nominatim search` +provides a replacement. + ## 3.5.0 -> 3.6.0 ### Change of layout of search_name_* tables diff --git a/docs/admin/Update.md b/docs/admin/Update.md index 98f5244a..4b1a2be7 100644 --- a/docs/admin/Update.md +++ b/docs/admin/Update.md @@ -1,8 +1,10 @@ # Updating the Database There are many different ways to update your Nominatim database. -The following section describes how to keep it up-to-date with Pyosmium. -For a list of other methods see the output of `./utils/update.php --help`. +The following section describes how to keep it up-to-date using +an [online replication service for OpenStreetMap data](https://wiki.openstreetmap.org/wiki/Planet.osm/diffs) +For a list of other methods to add or update data see the output of +`nominatim add-data --help`. !!! important If you have configured a flatnode file for the import, then you @@ -44,23 +46,19 @@ diffs for Ireland from Geofabrik add the following: To set up the update process now run the following command: - ./utils/update.php --init-updates + ./nominatim replication --init It outputs the date where updates will start. Recheck that this date is what you expect. -The `--init-updates` command needs to be rerun whenever the replication service -is changed. +The `replication --init` command needs to be rerun whenever the replication +service is changed. #### Updating Nominatim The following command will keep your database constantly up to date: - ./utils/update.php --import-osmosis-all - -(Note that even though the old name "import-osmosis-all" has been kept for -compatibility reasons, Osmosis is not required to run this - it uses pyosmium -behind the scenes.) + ./nominatim replication If you have imported multiple country extracts and want to keep them up-to-date, [Advanced installations section](Advanced-Installations.md) contains instructions diff --git a/docs/api/Faq.md b/docs/api/Faq.md index 86f0e783..e9d34a70 100644 --- a/docs/api/Faq.md +++ b/docs/api/Faq.md @@ -58,4 +58,4 @@ The [Overpass API](https://wiki.openstreetmap.org/wiki/Overpass_API) is more suited for these kinds of queries. That said if you installed your own Nominatim instance you can use the -`/utils/export.php` PHP script as basis to return such lists. +`nominatim export` PHP script as basis to return such lists. diff --git a/docs/develop/Postcodes.md b/docs/develop/Postcodes.md index ff36b0dd..343b8de3 100644 --- a/docs/develop/Postcodes.md +++ b/docs/develop/Postcodes.md @@ -14,7 +14,7 @@ country's format, e.g. if Swiss postcodes are 4 digits. ## Regular updating calculated postcodes The script to rerun the calculation is -`build/utils/update.php --calculate-postcodes` +`nominatim refresh --postcodes` and runs once per night on nominatim.openstreetmap.org. diff --git a/utils/check_import_finished.php b/lib/admin/check_import_finished.php old mode 100755 new mode 100644 similarity index 98% rename from utils/check_import_finished.php rename to lib/admin/check_import_finished.php index bd5d7a00..f189fc9a --- a/utils/check_import_finished.php +++ b/lib/admin/check_import_finished.php @@ -1,4 +1,5 @@ connect(); diff --git a/lib/dotenv_loader.php b/lib/dotenv_loader.php new file mode 100644 index 00000000..919891a0 --- /dev/null +++ b/lib/dotenv_loader.php @@ -0,0 +1,13 @@ +load(CONST_DataDir.'/settings/env.defaults'); + + if (file_exists('.env')) { + $dotenv->load('.env'); + } +} diff --git a/lib/lib.php b/lib/lib.php index b4c415d9..7760f695 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -1,17 +1,13 @@ load(CONST_DataDir.'/settings/env.defaults'); - - if (file_exists($sProjectDir.'/.env')) { - $dotenv->load($sProjectDir.'/.env'); - } + // Temporary hack to set the direcory via environment instead of + // the installed scripts. Neither setting is part of the official + // set of settings. + defined('CONST_DataDir') or define('CONST_DataDir', $_SERVER['NOMINATIM_DATADIR']); + defined('CONST_BinDir') or define('CONST_BinDir', $_SERVER['NOMINATIM_BINDIR']); } function getSetting($sConfName, $sDefault = null) @@ -36,7 +32,7 @@ function getSettingBool($sConfName) function getSettingConfig($sConfName, $sSystemConfig) { - $sValue = $_ENV['NOMINATIM_'.$sConfName]; + $sValue = $_SERVER['NOMINATIM_'.$sConfName]; if (!$sValue) { return CONST_DataDir.'/settings/'.$sSystemConfig; diff --git a/lib/setup/SetupClass.php b/lib/setup/SetupClass.php index 92c36bf3..77b14a8a 100755 --- a/lib/setup/SetupClass.php +++ b/lib/setup/SetupClass.php @@ -42,7 +42,10 @@ class SetupFunctions $this->iCacheMemory = getCacheMemoryMB(); } - $this->sModulePath = getSetting('DATABASE_MODULE_PATH', CONST_Default_ModulePath); + $this->sModulePath = getSetting('DATABASE_MODULE_PATH'); + if (!$this->sModulePath) { + $this->sModulePath = CONST_Default_ModulePath; + } info('module path: ' . $this->sModulePath); // parse database string diff --git a/lib/setup_functions.php b/lib/setup_functions.php index dab6a8e7..dc84cf92 100755 --- a/lib/setup_functions.php +++ b/lib/setup_functions.php @@ -17,7 +17,9 @@ function checkInFile($sOSMFile) function getOsm2pgsqlBinary() { - return getSetting('OSM2PGSQL_BINARY', CONST_Default_Osm2pgsql); + $sBinary = getSetting('OSM2PGSQL_BINARY'); + + return $sBinary ? $sBinary : CONST_Default_Osm2pgsql; } function getImportStyle() diff --git a/nominatim/__init__.py b/nominatim/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/nominatim/admin/__init__.py b/nominatim/admin/__init__.py new file mode 100644 index 00000000..cab6fb8b --- /dev/null +++ b/nominatim/admin/__init__.py @@ -0,0 +1,4 @@ +""" +Module with functions for importing, updating Nominatim databases +as well as general maintenance helpers. +""" diff --git a/nominatim/admin/exec_utils.py b/nominatim/admin/exec_utils.py new file mode 100644 index 00000000..f3f59dea --- /dev/null +++ b/nominatim/admin/exec_utils.py @@ -0,0 +1,29 @@ +""" +Helper functions for executing external programs. +""" +import subprocess + +def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False): + """ Run a Nominatim PHP script with the given arguments. + + Returns the exit code of the script. If `throw_on_fail` is True + then throw a `CalledProcessError` on a non-zero exit. + """ + cmd = ['/usr/bin/env', 'php', '-Cq', + nominatim_env.phplib_dir / 'admin' / script] + cmd.extend([str(a) for a in args]) + + env = nominatim_env.config.get_os_env() + env['NOMINATIM_DATADIR'] = str(nominatim_env.data_dir) + env['NOMINATIM_BINDIR'] = str(nominatim_env.data_dir / 'utils') + if not env['NOMINATIM_DATABASE_MODULE_PATH']: + env['NOMINATIM_DATABASE_MODULE_PATH'] = nominatim_env.module_dir + if not env['NOMINATIM_OSM2PGSQL_BINARY']: + env['NOMINATIM_OSM2PGSQL_BINARY'] = nominatim_env.osm2pgsql_path + + proc = subprocess.run(cmd, cwd=str(nominatim_env.project_dir), env=env) + + if throw_on_fail: + proc.check_returncode() + + return proc.returncode diff --git a/nominatim/cli.py b/nominatim/cli.py new file mode 100644 index 00000000..8d4071db --- /dev/null +++ b/nominatim/cli.py @@ -0,0 +1,501 @@ +""" +Command-line interface to the Nominatim functions for import, update, +database administration and querying. +""" +import sys +import os +import argparse +import logging +from pathlib import Path + +from .config import Configuration +from .admin.exec_utils import run_legacy_script + +class CommandlineParser: + """ Wraps some of the common functions for parsing the command line + and setting up subcommands. + """ + def __init__(self, prog, description): + self.parser = argparse.ArgumentParser( + prog=prog, + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + + self.subs = self.parser.add_subparsers(title='available commands', + dest='subcommand') + + # Arguments added to every sub-command + self.default_args = argparse.ArgumentParser(add_help=False) + group = self.default_args.add_argument_group('Default arguments') + group.add_argument('-h', '--help', action='help', + help='Show this help message and exit') + group.add_argument('-q', '--quiet', action='store_const', const=0, + dest='verbose', default=1, + help='Print only error messages') + group.add_argument('-v', '--verbose', action='count', default=1, + help='Increase verboseness of output') + group.add_argument('--project-dir', metavar='DIR', default='.', + help='Base directory of the Nominatim installation (default:.)') + group.add_argument('-j', '--threads', metavar='NUM', type=int, + help='Number of parallel threads to use') + + + def add_subcommand(self, name, cmd): + """ Add a subcommand to the parser. The subcommand must be a class + with a function add_args() that adds the parameters for the + subcommand and a run() function that executes the command. + """ + parser = self.subs.add_parser(name, parents=[self.default_args], + help=cmd.__doc__.split('\n', 1)[0], + description=cmd.__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + add_help=False) + parser.set_defaults(command=cmd) + cmd.add_args(parser) + + def run(self, **kwargs): + """ Parse the command line arguments of the program and execute the + appropriate subcommand. + """ + args = self.parser.parse_args() + + if args.subcommand is None: + return self.parser.print_help() + + for arg in ('module_dir', 'osm2pgsql_path', 'phplib_dir', 'data_dir'): + setattr(args, arg, Path(kwargs[arg])) + args.project_dir = Path(args.project_dir) + + logging.basicConfig(stream=sys.stderr, + format='%(asctime)s %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + level=max(4 - args.verbose, 1) * 10) + + args.config = Configuration(args.project_dir, args.data_dir / 'settings') + + return args.command.run(args) + +##### Subcommand classes +# +# Each class needs to implement two functions: add_args() adds the CLI parameters +# for the subfunction, run() executes the subcommand. +# +# The class documentation doubles as the help text for the command. The +# first line is also used in the summary when calling the program without +# a subcommand. +# +# No need to document the functions each time. +# pylint: disable=C0111 + + +class SetupAll: + """\ + Create a new Nominatim database from an OSM file. + """ + + @staticmethod + def add_args(parser): + group_name = parser.add_argument_group('Required arguments') + group = group_name.add_mutually_exclusive_group(required=True) + group.add_argument('--osm-file', + help='OSM file to be imported.') + group.add_argument('--continue', dest='continue_at', + choices=['load-data', 'indexing', 'db-postprocess'], + help='Continue an import that was interrupted') + group = parser.add_argument_group('Optional arguments') + group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, + help='Size of cache to be used by osm2pgsql (in MB)') + group.add_argument('--reverse-only', action='store_true', + help='Do not create tables and indexes for searching') + group.add_argument('--enable-debug-statements', action='store_true', + help='Include debug warning statements in SQL code') + group.add_argument('--no-partitions', action='store_true', + help="""Do not partition search indices + (speeds up import of single country extracts)""") + group.add_argument('--no-updates', action='store_true', + help="""Do not keep tables that are only needed for + updating the database later""") + group = parser.add_argument_group('Expert options') + group.add_argument('--ignore-errors', action='store_true', + help='Continue import even when errors in SQL are present') + group.add_argument('--index-noanalyse', action='store_true', + help='Do not perform analyse operations during index') + + + @staticmethod + def run(args): + params = ['setup.php'] + if args.osm_file: + params.extend(('--all', '--osm-file', args.osm_file)) + else: + if args.continue_at == 'load-data': + params.append('--load-data') + if args.continue_at in ('load-data', 'indexing'): + params.append('--index') + params.extend(('--create-search-indices', '--create-country-names', + '--setup-website')) + if args.osm2pgsql_cache: + params.extend(('--osm2pgsql-cache', args.osm2pgsql_cache)) + if args.reverse_only: + params.append('--reverse-only') + if args.enable_debug_statements: + params.append('--enable-debug-statements') + if args.no_partitions: + params.append('--no-partitions') + if args.no_updates: + params.append('--drop') + if args.ignore_errors: + params.append('--ignore-errors') + if args.index_noanalyse: + params.append('--index-noanalyse') + + return run_legacy_script(*params, nominatim_env=args) + + +class SetupFreeze: + """\ + Make database read-only. + + About half of data in the Nominatim database is kept only to be able to + keep the data up-to-date with new changes made in OpenStreetMap. This + command drops all this data and only keeps the part needed for geocoding + itself. + + This command has the same effect as the `--no-updates` option for imports. + """ + + @staticmethod + def add_args(parser): + pass # No options + + @staticmethod + def run(args): + return run_legacy_script('setup.php', '--drop', nominatim_env=args) + + +class SetupSpecialPhrases: + """\ + Maintain special phrases. + """ + + @staticmethod + def add_args(parser): + group = parser.add_argument_group('Input arguments') + group.add_argument('--from-wiki', action='store_true', + help='Pull special phrases from the OSM wiki.') + group = parser.add_argument_group('Output arguments') + group.add_argument('-o', '--output', default='-', + type=argparse.FileType('w', encoding='UTF-8'), + help="""File to write the preprocessed phrases to. + If omitted, it will be written to stdout.""") + + @staticmethod + def run(args): + if args.output.name != '': + raise NotImplementedError('Only output to stdout is currently implemented.') + return run_legacy_script('specialphrases.php', '--wiki-import', nominatim_env=args) + + +class UpdateReplication: + """\ + Update the database using an online replication service. + """ + + @staticmethod + def add_args(parser): + group = parser.add_argument_group('Arguments for initialisation') + group.add_argument('--init', action='store_true', + help='Initialise the update process') + group.add_argument('--no-update-functions', dest='update_functions', + action='store_false', + help="""Do not update the trigger function to + support differential updates.""") + group = parser.add_argument_group('Arguments for updates') + group.add_argument('--check-for-updates', action='store_true', + help='Check if new updates are available and exit') + group.add_argument('--once', action='store_true', + help="""Download and apply updates only once. When + not set, updates are continuously applied""") + group.add_argument('--no-index', action='store_false', dest='do_index', + help="""Do not index the new data. Only applicable + together with --once""") + + @staticmethod + def run(args): + params = ['update.php'] + if args.init: + params.append('--init-updates') + if not args.update_functions: + params.append('--no-update-functions') + elif args.check_for_updates: + params.append('--check-for-updates') + else: + if args.once: + params.append('--import-osmosis') + else: + params.append('--import-osmosis-all') + if not args.do_index: + params.append('--no-index') + + return run_legacy_script(*params, nominatim_env=args) + + +class UpdateAddData: + """\ + Add additional data from a file or an online source. + + Data is only imported, not indexed. You need to call `nominatim-update index` + to complete the process. + """ + + @staticmethod + def add_args(parser): + group_name = parser.add_argument_group('Source') + group = group_name.add_mutually_exclusive_group(required=True) + group.add_argument('--file', metavar='FILE', + help='Import data from an OSM file') + group.add_argument('--diff', metavar='FILE', + help='Import data from an OSM diff file') + group.add_argument('--node', metavar='ID', type=int, + help='Import a single node from the API') + group.add_argument('--way', metavar='ID', type=int, + help='Import a single way from the API') + group.add_argument('--relation', metavar='ID', type=int, + help='Import a single relation from the API') + group.add_argument('--tiger-data', metavar='DIR', + help='Add housenumbers from the US TIGER census database.') + group = parser.add_argument_group('Extra arguments') + group.add_argument('--use-main-api', action='store_true', + help='Use OSM API instead of Overpass to download objects') + + @staticmethod + def run(args): + if args.tiger_data: + os.environ['NOMINATIM_TIGER_DATA_PATH'] = args.tiger_data + return run_legacy_script('setup.php', '--import-tiger-data', nominatim_env=args) + + params = ['update.php'] + if args.file: + params.extend(('--import-file', args.file)) + elif args.diff: + params.extend(('--import-diff', args.diff)) + elif args.node: + params.extend(('--import-node', args.node)) + elif args.way: + params.extend(('--import-way', args.way)) + elif args.relation: + params.extend(('--import-relation', args.relation)) + if args.use_main_api: + params.append('--use-main-api') + return run_legacy_script(*params, nominatim_env=args) + + +class UpdateIndex: + """\ + Reindex all new and modified data. + """ + + @staticmethod + def add_args(parser): + pass + + @staticmethod + def run(args): + return run_legacy_script('update.php', '--index', nominatim_env=args) + + +class UpdateRefresh: + """\ + Recompute auxiliary data used by the indexing process. + + These functions must not be run in parallel with other update commands. + """ + + @staticmethod + def add_args(parser): + group = parser.add_argument_group('Data arguments') + group.add_argument('--postcodes', action='store_true', + help='Update postcode centroid table') + group.add_argument('--word-counts', action='store_true', + help='Compute frequency of full-word search terms') + group.add_argument('--address-levels', action='store_true', + help='Reimport address level configuration') + group.add_argument('--functions', action='store_true', + help='Update the PL/pgSQL functions in the database') + group.add_argument('--wiki-data', action='store_true', + help='Update Wikipedia/data importance numbers.') + group.add_argument('--importance', action='store_true', + help='Recompute place importances (expensive!)') + group.add_argument('--website', action='store_true', + help='Refresh the directory that serves the scripts for the web API') + group = parser.add_argument_group('Arguments for function refresh') + group.add_argument('--no-diff-updates', action='store_false', dest='diffs', + help='Do not enable code for propagating updates') + group.add_argument('--enable-debug-statements', action='store_true', + help='Enable debug warning statements in functions') + + @staticmethod + def run(args): + if args.postcodes: + run_legacy_script('update.php', '--calculate-postcodes', + nominatim_env=args, throw_on_fail=True) + if args.word_counts: + run_legacy_script('update.php', '--recompute-word-counts', + nominatim_env=args, throw_on_fail=True) + if args.address_levels: + run_legacy_script('update.php', '--update-address-levels', + nominatim_env=args, throw_on_fail=True) + if args.functions: + params = ['setup.php', '--create-functions', '--create-partition-functions'] + if args.diffs: + params.append('--enable-diff-updates') + if args.enable_debug_statements: + params.append('--enable-debug-statements') + run_legacy_script(*params, nominatim_env=args, throw_on_fail=True) + if args.wiki_data: + run_legacy_script('setup.php', '--import-wikipedia-articles', + nominatim_env=args, throw_on_fail=True) + # Attention: importance MUST come after wiki data import. + if args.importance: + run_legacy_script('update.php', '--recompute-importance', + nominatim_env=args, throw_on_fail=True) + if args.website: + run_legacy_script('setup.php', '--setup-website', + nominatim_env=args, throw_on_fail=True) + + +class AdminCheckDatabase: + """\ + Check that the database is complete and operational. + """ + + @staticmethod + def add_args(parser): + pass # No options + + @staticmethod + def run(args): + return run_legacy_script('check_import_finished.php', nominatim_env=args) + + +class AdminWarm: + """\ + Warm database caches for search and reverse queries. + """ + + @staticmethod + def add_args(parser): + group = parser.add_argument_group('Target arguments') + group.add_argument('--search-only', action='store_const', dest='target', + const='search', + help="Only pre-warm tables for search queries") + group.add_argument('--reverse-only', action='store_const', dest='target', + const='reverse', + help="Only pre-warm tables for reverse queries") + + @staticmethod + def run(args): + params = ['warm.php'] + if args.target == 'reverse': + params.append('--reverse-only') + if args.target == 'search': + params.append('--search-only') + return run_legacy_script(*params, nominatim_env=args) + + +class QueryExport: + """\ + Export addresses as CSV file from a Nominatim database. + """ + + @staticmethod + def add_args(parser): + group = parser.add_argument_group('Output arguments') + group.add_argument('--output-type', default='street', + choices=('continent', 'country', 'state', 'county', + 'city', 'suburb', 'street', 'path'), + help='Type of places to output (default: street)') + group.add_argument('--output-format', + default='street;suburb;city;county;state;country', + help="""Semicolon-separated list of address types + (see --output-type). Multiple ranks can be + merged into one column by simply using a + comma-separated list.""") + group.add_argument('--output-all-postcodes', action='store_true', + help="""List all postcodes for address instead of + just the most likely one""") + group.add_argument('--language', + help="""Preferred language for output + (use local name, if omitted)""") + group = parser.add_argument_group('Filter arguments') + group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE', + help='Export only objects within country') + group.add_argument('--restrict-to-osm-node', metavar='ID', type=int, + help='Export only children of this OSM node') + group.add_argument('--restrict-to-osm-way', metavar='ID', type=int, + help='Export only children of this OSM way') + group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int, + help='Export only children of this OSM relation') + + + @staticmethod + def run(args): + params = ['export.php', + '--output-type', args.output_type, + '--output-format', args.output_format] + if args.output_all_postcodes: + params.append('--output-all-postcodes') + if args.language: + params.extend(('--language', args.language)) + if args.restrict_to_country: + params.extend(('--restrict-to-country', args.restrict_to_country)) + if args.restrict_to_osm_node: + params.extend(('--restrict-to-osm-node', args.restrict_to_osm_node)) + if args.restrict_to_osm_way: + params.extend(('--restrict-to-osm-way', args.restrict_to_osm_way)) + if args.restrict_to_osm_relation: + params.extend(('--restrict-to-osm-relation', args.restrict_to_osm_relation)) + + return run_legacy_script(*params, nominatim_env=args) + +class QueryTodo: + """\ + Todo + """ + @staticmethod + def add_args(parser): + pass + + @staticmethod + def run(args): # pylint: disable=W0613 + print("TODO: searching") + + +def nominatim(**kwargs): + """\ + Command-line tools for importing, updating, administrating and + querying the Nominatim database. + """ + parser = CommandlineParser('nominatim', nominatim.__doc__) + + parser.add_subcommand('import', SetupAll) + parser.add_subcommand('freeze', SetupFreeze) + parser.add_subcommand('replication', UpdateReplication) + + parser.add_subcommand('check-database', AdminCheckDatabase) + parser.add_subcommand('warm', AdminWarm) + + parser.add_subcommand('special-phrases', SetupSpecialPhrases) + + parser.add_subcommand('add-data', UpdateAddData) + parser.add_subcommand('index', UpdateIndex) + parser.add_subcommand('refresh', UpdateRefresh) + + parser.add_subcommand('export', QueryExport) + parser.add_subcommand('search', QueryTodo) + parser.add_subcommand('reverse', QueryTodo) + parser.add_subcommand('lookup', QueryTodo) + parser.add_subcommand('details', QueryTodo) + parser.add_subcommand('status', QueryTodo) + + return parser.run(**kwargs) diff --git a/nominatim/config.py b/nominatim/config.py new file mode 100644 index 00000000..911c7ddf --- /dev/null +++ b/nominatim/config.py @@ -0,0 +1,39 @@ +""" +Nominatim configuration accessor. +""" +import os + +from dotenv import dotenv_values + +class Configuration: + """ Load and manage the project configuration. + + Nominatim uses dotenv to configure the software. Configuration options + are resolved in the following order: + + * from the OS environment + * from the .env file in the project directory of the installation + * from the default installation in the configuration directory + + All Nominatim configuration options are prefixed with 'NOMINATIM_' to + avoid conflicts with other environment variables. + """ + + def __init__(self, project_dir, config_dir): + self._config = dotenv_values(str((config_dir / 'env.defaults').resolve())) + if project_dir is not None: + self._config.update(dotenv_values(str((project_dir / '.env').resolve()))) + + def __getattr__(self, name): + name = 'NOMINATIM_' + name + + return os.environ.get(name) or self._config[name] + + def get_os_env(self): + """ Return a copy of the OS environment with the Nominatim configuration + merged in. + """ + env = dict(self._config) + env.update(os.environ) + + return env diff --git a/test/bdd/environment.py b/test/bdd/environment.py index cadfda18..30ea30a2 100644 --- a/test/bdd/environment.py +++ b/test/bdd/environment.py @@ -1,6 +1,7 @@ -from behave import * from pathlib import Path +from behave import * + from steps.geometry_factory import GeometryFactory from steps.nominatim_environment import NominatimEnvironment diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py index e799306a..68d7b2f4 100644 --- a/test/bdd/steps/nominatim_environment.py +++ b/test/bdd/steps/nominatim_environment.py @@ -1,10 +1,13 @@ -import os from pathlib import Path +import sys import tempfile import psycopg2 import psycopg2.extras +sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve())) + +from nominatim.config import Configuration from steps.utils import run_script class NominatimEnvironment: @@ -28,6 +31,7 @@ class NominatimEnvironment: self.code_coverage_path = config['PHPCOV'] self.code_coverage_id = 1 + self.default_config = Configuration(None, self.src_dir / 'settings').get_os_env() self.test_env = None self.template_db_done = False self.api_db_done = False @@ -78,11 +82,15 @@ class NominatimEnvironment: and dsn == self.test_env['NOMINATIM_DATABASE_DSN']: return # environment already set uo - self.test_env = os.environ + self.test_env = dict(self.default_config) self.test_env['NOMINATIM_DATABASE_DSN'] = dsn self.test_env['NOMINATIM_FLATNODE_FILE'] = '' self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full' self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes' + self.test_env['NOMINATIM_DATADIR'] = self.src_dir + self.test_env['NOMINATIM_BINDIR'] = self.src_dir / 'utils' + self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.build_dir / 'module' + self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = self.build_dir / 'osm2pgsql' / 'osm2pgsql' if self.server_module_path: self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path @@ -253,7 +261,7 @@ class NominatimEnvironment: """ Run one of the Nominatim utility scripts with the given arguments. """ cmd = ['/usr/bin/env', 'php', '-Cq'] - cmd.append((Path(self.build_dir) / 'utils' / '{}.php'.format(script)).resolve()) + cmd.append((Path(self.src_dir) / 'lib' / 'admin' / '{}.php'.format(script)).resolve()) cmd.extend(['--' + x for x in args]) for k, v in kwargs.items(): cmd.extend(('--' + k.replace('_', '-'), str(v))) @@ -261,7 +269,7 @@ class NominatimEnvironment: if self.website_dir is not None: cwd = self.website_dir.name else: - cwd = self.build_dir + cwd = None run_script(cmd, cwd=cwd, env=self.test_env) diff --git a/test/bdd/steps/steps_api_queries.py b/test/bdd/steps/steps_api_queries.py index 7b3597da..a56081c0 100644 --- a/test/bdd/steps/steps_api_queries.py +++ b/test/bdd/steps/steps_api_queries.py @@ -59,7 +59,7 @@ def query_cmd(context, query, dups): """ Query directly via PHP script. """ cmd = ['/usr/bin/env', 'php'] - cmd.append(os.path.join(context.nominatim.build_dir, 'utils', 'query.php')) + cmd.append(context.nominatim.src_dir / 'lib' / 'admin' / 'query.php') if query: cmd.extend(['--search', query]) # add more parameters in table form @@ -72,7 +72,8 @@ def query_cmd(context, query, dups): if dups: cmd.extend(('--dedupe', '0')) - outp, err = run_script(cmd, cwd=context.nominatim.build_dir) + outp, err = run_script(cmd, cwd=context.nominatim.website_dir.name, + env=context.nominatim.test_env) context.response = SearchResponse(outp, 'json') diff --git a/vagrant/Install-on-Centos-7.sh b/vagrant/Install-on-Centos-7.sh index 24b17663..00915272 100755 --- a/vagrant/Install-on-Centos-7.sh +++ b/vagrant/Install-on-Centos-7.sh @@ -6,10 +6,6 @@ # Installing the Required Software # ================================ # -# !!! caution -# These instructions are currently broken because they do not -# include installation of the required PHP library symfony-dotenv. -# # These instructions expect that you have a freshly installed CentOS version 7. # Make sure all packages are up-to-date by running: # @@ -46,7 +42,7 @@ python3-pip python3-setuptools python3-devel \ expat-devel zlib-devel - pip3 install --user psycopg2 + pip3 install --user psycopg2 python-dotenv # diff --git a/vagrant/Install-on-Centos-8.sh b/vagrant/Install-on-Centos-8.sh index 517a5156..548f8c9c 100755 --- a/vagrant/Install-on-Centos-8.sh +++ b/vagrant/Install-on-Centos-8.sh @@ -6,10 +6,6 @@ # Installing the Required Software # ================================ # -# !!! caution -# These instructions are currently broken because they do not -# include installation of the required PHP library symfony-dotenv. -# # These instructions expect that you have a freshly installed CentOS version 8. # Make sure all packages are up-to-date by running: # @@ -39,7 +35,7 @@ python3-pip python3-setuptools python3-devel \ expat-devel zlib-devel - pip3 install --user psycopg2 + pip3 install --user psycopg2 python-dotenv # diff --git a/vagrant/Install-on-Ubuntu-18.sh b/vagrant/Install-on-Ubuntu-18.sh index e7233114..8a4d2b73 100755 --- a/vagrant/Install-on-Ubuntu-18.sh +++ b/vagrant/Install-on-Ubuntu-18.sh @@ -29,7 +29,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: libbz2-dev libpq-dev libproj-dev \ postgresql-server-dev-10 postgresql-10-postgis-2.4 \ postgresql-contrib-10 postgresql-10-postgis-scripts \ - php php-pgsql php-intl php-symfony-dotenv \ + php php-pgsql php-intl python3-dotenv \ python3-psycopg2 git diff --git a/vagrant/Install-on-Ubuntu-20.sh b/vagrant/Install-on-Ubuntu-20.sh index 292714e9..94afca78 100644 --- a/vagrant/Install-on-Ubuntu-20.sh +++ b/vagrant/Install-on-Ubuntu-20.sh @@ -32,7 +32,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: libbz2-dev libpq-dev libproj-dev \ postgresql-server-dev-12 postgresql-12-postgis-3 \ postgresql-contrib-12 postgresql-12-postgis-3-scripts \ - php php-pgsql php-intl php-symfony-dotenv \ + php php-pgsql php-intl python3-dotenv \ python3-psycopg2 git #