From: Sarah Hoffmann Date: Mon, 26 Jul 2021 10:38:56 +0000 (+0200) Subject: Merge pull request #2401 from lonvia/port-add-data-to-python X-Git-Tag: v4.0.0~47 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/5d7d7f15d9ca4241669300729d4ac6dea7e3a537?hp=8096a1d67f02d63fee1e94722673ce3c686e8d33 Merge pull request #2401 from lonvia/port-add-data-to-python Port add-data functions from PHP to Python --- diff --git a/lib-php/admin/update.php b/lib-php/admin/update.php deleted file mode 100644 index 3075070a..00000000 --- a/lib-php/admin/update.php +++ /dev/null @@ -1,150 +0,0 @@ -connect(); -$fPostgresVersion = $oDB->getPostgresVersion(); - -$aDSNInfo = Nominatim\DB::parseDSN(getSetting('DATABASE_DSN')); -if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) { - $aDSNInfo['port'] = 5432; -} - -// cache memory to be used by osm2pgsql, should not be more than the available memory -$iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000); -if ($iCacheMemory + 500 > getTotalMemoryMB()) { - $iCacheMemory = getCacheMemoryMB(); - echo "WARNING: resetting cache memory to $iCacheMemory\n"; -} - -$oOsm2pgsqlCmd = (new \Nominatim\Shell(getOsm2pgsqlBinary())) - ->addParams('--hstore') - ->addParams('--latlong') - ->addParams('--append') - ->addParams('--slim') - ->addParams('--with-forward-dependencies', 'false') - ->addParams('--log-progress', 'true') - ->addParams('--number-processes', 1) - ->addParams('--cache', $iCacheMemory) - ->addParams('--output', 'gazetteer') - ->addParams('--style', getImportStyle()) - ->addParams('--database', $aDSNInfo['database']) - ->addParams('--port', $aDSNInfo['port']); - -if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) { - $oOsm2pgsqlCmd->addParams('--host', $aDSNInfo['hostspec']); -} -if (isset($aDSNInfo['username']) && $aDSNInfo['username']) { - $oOsm2pgsqlCmd->addParams('--user', $aDSNInfo['username']); -} -if (isset($aDSNInfo['password']) && $aDSNInfo['password']) { - $oOsm2pgsqlCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']); -} -if (getSetting('FLATNODE_FILE')) { - $oOsm2pgsqlCmd->addParams('--flat-nodes', getSetting('FLATNODE_FILE')); -} -if ($fPostgresVersion >= 11.0) { - $oOsm2pgsqlCmd->addEnvPair( - 'PGOPTIONS', - '-c jit=off -c max_parallel_workers_per_gather=0' - ); -} - -if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { - // import diffs and files directly (e.g. from osmosis --rri) - $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file']; - - if (!file_exists($sNextFile)) { - fail("Cannot open $sNextFile\n"); - } - - // Import the file - $oCMD = (clone $oOsm2pgsqlCmd)->addParams($sNextFile); - echo $oCMD->escapedCmd()."\n"; - $iRet = $oCMD->run(); - - if ($iRet) { - fail("Error from osm2pgsql, $iRet\n"); - } - - // Don't update the import status - we don't know what this file contains -} - -$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc'; -$bHaveDiff = false; -$bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api']; -$sContentURL = ''; -if (isset($aResult['import-node']) && $aResult['import-node']) { - if ($bUseOSMApi) { - $sContentURL = 'https://www.openstreetmap.org/api/0.6/node/'.$aResult['import-node']; - } else { - $sContentURL = 'https://overpass-api.de/api/interpreter?data=node('.$aResult['import-node'].');out%20meta;'; - } -} - -if (isset($aResult['import-way']) && $aResult['import-way']) { - if ($bUseOSMApi) { - $sContentURL = 'https://www.openstreetmap.org/api/0.6/way/'.$aResult['import-way'].'/full'; - } else { - $sContentURL = 'https://overpass-api.de/api/interpreter?data=(way('.$aResult['import-way'].');%3E;);out%20meta;'; - } -} - -if (isset($aResult['import-relation']) && $aResult['import-relation']) { - if ($bUseOSMApi) { - $sContentURL = 'https://www.openstreetmap.org/api/0.6/relation/'.$aResult['import-relation'].'/full'; - } else { - $sContentURL = 'https://overpass-api.de/api/interpreter?data=(rel(id:'.$aResult['import-relation'].');%3E;);out%20meta;'; - } -} - -if ($sContentURL) { - file_put_contents($sTemporaryFile, file_get_contents($sContentURL)); - $bHaveDiff = true; -} - -if ($bHaveDiff) { - // import generated change file - - $oCMD = (clone $oOsm2pgsqlCmd)->addParams($sTemporaryFile); - echo $oCMD->escapedCmd()."\n"; - - $iRet = $oCMD->run(); - if ($iRet) { - fail("osm2pgsql exited with error level $iRet\n"); - } -} diff --git a/nominatim/cli.py b/nominatim/cli.py index 5626deb4..7fae205b 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -114,63 +114,6 @@ class CommandlineParser: # # No need to document the functions each time. # pylint: disable=C0111 -# Using non-top-level imports to make pyosmium optional for replication only. -# pylint: disable=E0012,C0415 -class UpdateAddData: - """\ - Add additional data from a file or an online source. - - Data is only imported, not indexed. You need to call `nominatim index` - to complete the process. - """ - - @staticmethod - def add_args(parser): - group_name = parser.add_argument_group('Source') - group = group_name.add_mutually_exclusive_group(required=True) - group.add_argument('--file', metavar='FILE', - help='Import data from an OSM file') - group.add_argument('--diff', metavar='FILE', - help='Import data from an OSM diff file') - group.add_argument('--node', metavar='ID', type=int, - help='Import a single node from the API') - group.add_argument('--way', metavar='ID', type=int, - help='Import a single way from the API') - group.add_argument('--relation', metavar='ID', type=int, - help='Import a single relation from the API') - group.add_argument('--tiger-data', metavar='DIR', - help='Add housenumbers from the US TIGER census database.') - group = parser.add_argument_group('Extra arguments') - group.add_argument('--use-main-api', action='store_true', - help='Use OSM API instead of Overpass to download objects') - - @staticmethod - def run(args): - from nominatim.tokenizer import factory as tokenizer_factory - from nominatim.tools import tiger_data - - if args.tiger_data: - tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) - return tiger_data.add_tiger_data(args.tiger_data, - args.config, args.threads or 1, - tokenizer) - - params = ['update.php'] - if args.file: - params.extend(('--import-file', args.file)) - elif args.diff: - params.extend(('--import-diff', args.diff)) - elif args.node: - params.extend(('--import-node', args.node)) - elif args.way: - params.extend(('--import-way', args.way)) - elif args.relation: - params.extend(('--import-relation', args.relation)) - if args.use_main_api: - params.append('--use-main-api') - return run_legacy_script(*params, nominatim_env=args) - - class QueryExport: """\ Export addresses as CSV file from the database. @@ -261,7 +204,7 @@ def get_set_parser(**kwargs): parser.add_subcommand('special-phrases', clicmd.ImportSpecialPhrases) - parser.add_subcommand('add-data', UpdateAddData) + parser.add_subcommand('add-data', clicmd.UpdateAddData) parser.add_subcommand('index', clicmd.UpdateIndex) parser.add_subcommand('refresh', clicmd.UpdateRefresh()) diff --git a/nominatim/clicmd/__init__.py b/nominatim/clicmd/__init__.py index f905fed1..ac2cae5b 100644 --- a/nominatim/clicmd/__init__.py +++ b/nominatim/clicmd/__init__.py @@ -7,6 +7,7 @@ from nominatim.clicmd.replication import UpdateReplication from nominatim.clicmd.api import APISearch, APIReverse, APILookup, APIDetails, APIStatus from nominatim.clicmd.index import UpdateIndex from nominatim.clicmd.refresh import UpdateRefresh +from nominatim.clicmd.add_data import UpdateAddData from nominatim.clicmd.admin import AdminFuncs from nominatim.clicmd.freeze import SetupFreeze from nominatim.clicmd.special_phrases import ImportSpecialPhrases diff --git a/nominatim/clicmd/add_data.py b/nominatim/clicmd/add_data.py new file mode 100644 index 00000000..d13f46d9 --- /dev/null +++ b/nominatim/clicmd/add_data.py @@ -0,0 +1,76 @@ +""" +Implementation of the 'add-data' subcommand. +""" +import logging + +# Do not repeat documentation of subcommand classes. +# pylint: disable=C0111 +# Using non-top-level imports to avoid eventually unused imports. +# pylint: disable=E0012,C0415 + +LOG = logging.getLogger() + +class UpdateAddData: + """\ + Add additional data from a file or an online source. + + Data is only imported, not indexed. You need to call `nominatim index` + to complete the process. + """ + + @staticmethod + def add_args(parser): + group_name = parser.add_argument_group('Source') + group = group_name.add_mutually_exclusive_group(required=True) + group.add_argument('--file', metavar='FILE', + help='Import data from an OSM file or diff file') + group.add_argument('--diff', metavar='FILE', + help='Import data from an OSM diff file (deprecated: use --file)') + group.add_argument('--node', metavar='ID', type=int, + help='Import a single node from the API') + group.add_argument('--way', metavar='ID', type=int, + help='Import a single way from the API') + group.add_argument('--relation', metavar='ID', type=int, + help='Import a single relation from the API') + group.add_argument('--tiger-data', metavar='DIR', + help='Add housenumbers from the US TIGER census database.') + group = parser.add_argument_group('Extra arguments') + group.add_argument('--use-main-api', action='store_true', + help='Use OSM API instead of Overpass to download objects') + group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, + help='Size of cache to be used by osm2pgsql (in MB)') + group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60, + help='Set timeout for file downloads.') + + @staticmethod + def run(args): + from nominatim.tokenizer import factory as tokenizer_factory + from nominatim.tools import tiger_data, add_osm_data + + if args.tiger_data: + tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) + return tiger_data.add_tiger_data(args.tiger_data, + args.config, args.threads or 1, + tokenizer) + + osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1) + if args.file or args.diff: + return add_osm_data.add_data_from_file(args.file or args.diff, + osm2pgsql_params) + + if args.node: + return add_osm_data.add_osm_object('node', args.node, + args.use_main_api, + osm2pgsql_params) + + if args.way: + return add_osm_data.add_osm_object('way', args.way, + args.use_main_api, + osm2pgsql_params) + + if args.relation: + return add_osm_data.add_osm_object('relation', args.relation, + args.use_main_api, + osm2pgsql_params) + + return 0 diff --git a/nominatim/tools/add_osm_data.py b/nominatim/tools/add_osm_data.py new file mode 100644 index 00000000..fa356673 --- /dev/null +++ b/nominatim/tools/add_osm_data.py @@ -0,0 +1,46 @@ +""" +Function to add additional OSM data from a file or the API into the database. +""" +from pathlib import Path +import logging +import urllib + +from nominatim.tools.exec_utils import run_osm2pgsql, get_url + +LOG = logging.getLogger() + +def add_data_from_file(fname, options): + """ Adds data from a OSM file to the database. The file may be a normal + OSM file or a diff file in all formats supported by libosmium. + """ + options['import_file'] = Path(fname) + options['append'] = True + run_osm2pgsql(options) + + # No status update. We don't know where the file came from. + return 0 + + +def add_osm_object(osm_type, osm_id, use_main_api, options): + """ Add or update a single OSM object from the latest version of the + API. + """ + if use_main_api: + base_url = f'https://www.openstreetmap.org/api/0.6/{osm_type}/{osm_id}' + if osm_type in ('way', 'relation'): + base_url += '/full' + else: + # use Overpass API + if osm_type == 'node': + data = f'node({osm_id});out meta;' + elif osm_type == 'way': + data = f'(way({osm_id});>;);out meta;' + else: + data = f'(rel(id:{osm_id});>;);out meta;' + base_url = 'https://overpass-api.de/api/interpreter?' \ + + urllib.parse.urlencode({'data': data}) + + options['append'] = True + options['import_data'] = get_url(base_url).encode('utf-8') + + run_osm2pgsql(options) diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index 72d252b7..6177b15f 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -128,9 +128,14 @@ def run_osm2pgsql(options): if options.get('disable_jit', False): env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0' - cmd.append(str(options['import_file'])) + if 'import_data' in options: + cmd.extend(('-r', 'xml', '-')) + else: + cmd.append(str(options['import_file'])) - subprocess.run(cmd, cwd=options.get('cwd', '.'), env=env, check=True) + subprocess.run(cmd, cwd=options.get('cwd', '.'), + input=options.get('import_data'), + env=env, check=True) def get_url(url): diff --git a/test/python/test_cli.py b/test/python/test_cli.py index d9e01040..bd5182e3 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -15,6 +15,7 @@ import nominatim.clicmd.admin import nominatim.clicmd.setup import nominatim.indexer.indexer import nominatim.tools.admin +import nominatim.tools.add_osm_data import nominatim.tools.check_database import nominatim.tools.database_import import nominatim.tools.freeze @@ -60,7 +61,6 @@ class TestCli: @pytest.mark.parametrize("command,script", [ - (('add-data', '--file', 'foo.osm'), 'update'), (('export',), 'export') ]) def test_legacy_commands_simple(self, mock_run_legacy, command, script): @@ -88,13 +88,20 @@ class TestCli: assert mock.called == 1 - @pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc'), - ('node', 12), ('way', 8), ('relation', 32)]) - def test_add_data_command(self, mock_run_legacy, name, oid): + @pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc')]) + def test_add_data_file_command(self, mock_func_factory, name, oid): + mock_run_legacy = mock_func_factory(nominatim.tools.add_osm_data, 'add_data_from_file') + assert self.call_nominatim('add-data', '--' + name, str(oid)) == 0 + + assert mock_run_legacy.called == 1 + + + @pytest.mark.parametrize("name,oid", [('node', 12), ('way', 8), ('relation', 32)]) + def test_add_data_object_command(self, mock_func_factory, name, oid): + mock_run_legacy = mock_func_factory(nominatim.tools.add_osm_data, 'add_osm_object') assert self.call_nominatim('add-data', '--' + name, str(oid)) == 0 assert mock_run_legacy.called == 1 - assert mock_run_legacy.last_args == ('update.php', '--import-' + name, oid) def test_serve_command(self, mock_func_factory):