]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tools/exec_utils.py
force a fixed pool size and make it configurable
[nominatim.git] / nominatim / tools / exec_utils.py
index 560bb78166c54ba7859c5ea275522f8c70804f40..566ac06edc57d4374b4979095866c64c1e959d60 100644 (file)
@@ -1,42 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
 """
 Helper functions for executing external programs.
 """
+from typing import Any, Union, Optional, Mapping, IO
+from pathlib import Path
 import logging
+import os
 import subprocess
 import urllib.request as urlrequest
 from urllib.parse import urlencode
 
+from nominatim.config import Configuration
+from nominatim.typing import StrPath
 from nominatim.version import NOMINATIM_VERSION
 from nominatim.db.connection import get_pg_env
 
 LOG = logging.getLogger()
 
-def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False):
+def run_legacy_script(script: StrPath, *args: Union[int, str],
+                      config: Configuration,
+                      throw_on_fail: bool = False) -> int:
     """ Run a Nominatim PHP script with the given arguments.
 
         Returns the exit code of the script. If `throw_on_fail` is True
         then throw a `CalledProcessError` on a non-zero exit.
     """
     cmd = ['/usr/bin/env', 'php', '-Cq',
-           str(nominatim_env.phplib_dir / 'admin' / script)]
+           str(config.lib_dir.php / 'admin' / script)]
     cmd.extend([str(a) for a in args])
 
-    env = nominatim_env.config.get_os_env()
-    env['NOMINATIM_DATADIR'] = str(nominatim_env.data_dir)
-    env['NOMINATIM_SQLDIR'] = str(nominatim_env.sqllib_dir)
-    env['NOMINATIM_CONFIGDIR'] = str(nominatim_env.config_dir)
-    env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(nominatim_env.module_dir)
+    env = config.get_os_env()
+    env['NOMINATIM_DATADIR'] = str(config.lib_dir.data)
+    env['NOMINATIM_SQLDIR'] = str(config.lib_dir.sql)
+    env['NOMINATIM_CONFIGDIR'] = str(config.config_dir)
+    env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(config.lib_dir.module)
     if not env['NOMINATIM_OSM2PGSQL_BINARY']:
-        env['NOMINATIM_OSM2PGSQL_BINARY'] = str(nominatim_env.osm2pgsql_path)
+        env['NOMINATIM_OSM2PGSQL_BINARY'] = str(config.lib_dir.osm2pgsql)
 
-    proc = subprocess.run(cmd, cwd=str(nominatim_env.project_dir), env=env,
+    proc = subprocess.run(cmd, cwd=str(config.project_dir), env=env,
                           check=throw_on_fail)
 
     return proc.returncode
 
-def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
-                   params=None):
-    """ Execute a Nominiatim API function.
+def run_api_script(endpoint: str, project_dir: Path,
+                   extra_env: Optional[Mapping[str, str]] = None,
+                   phpcgi_bin: Optional[Path] = None,
+                   params: Optional[Mapping[str, Any]] = None) -> int:
+    """ Execute a Nominatim API function.
 
         The function needs a project directory that contains the website
         directory with the scripts to be executed. The scripts will be run
@@ -49,10 +64,10 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
     query_string = urlencode(params or {})
 
     env = dict(QUERY_STRING=query_string,
-               SCRIPT_NAME='/{}.php'.format(endpoint),
-               REQUEST_URI='/{}.php?{}'.format(endpoint, query_string),
+               SCRIPT_NAME=f'/{endpoint}.php',
+               REQUEST_URI=f'/{endpoint}.php?{query_string}',
                CONTEXT_DOCUMENT_ROOT=webdir,
-               SCRIPT_FILENAME='{}/{}.php'.format(webdir, endpoint),
+               SCRIPT_FILENAME=f'{webdir}/{endpoint}.php',
                HTTP_HOST='localhost',
                HTTP_USER_AGENT='nominatim-tool',
                REMOTE_ADDR='0.0.0.0',
@@ -90,30 +105,33 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
     return 0
 
 
-def run_php_server(server_address, base_dir):
+def run_php_server(server_address: str, base_dir: StrPath) -> None:
     """ Run the built-in server from the given directory.
     """
     subprocess.run(['/usr/bin/env', 'php', '-S', server_address],
                    cwd=str(base_dir), check=True)
 
 
-def run_osm2pgsql(options):
+def run_osm2pgsql(options: Mapping[str, Any]) -> None:
     """ Run osm2pgsql with the given options.
     """
     env = get_pg_env(options['dsn'])
     cmd = [str(options['osm2pgsql']),
            '--hstore', '--latlon', '--slim',
-           '--with-forward-dependencies', 'false',
            '--log-progress', 'true',
-           '--number-processes', str(options['threads']),
+           '--number-processes', '1' if options['append'] else str(options['threads']),
            '--cache', str(options['osm2pgsql_cache']),
-           '--output', 'gazetteer',
            '--style', str(options['osm2pgsql_style'])
           ]
-    if options['append']:
-        cmd.append('--append')
+
+    if str(options['osm2pgsql_style']).endswith('.lua'):
+        env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / '?.lua'),
+                                    os.environ.get('LUAPATH', ';')))
+        cmd.extend(('--output', 'flex'))
     else:
-        cmd.append('--create')
+        cmd.extend(('--output', 'gazetteer'))
+
+    cmd.append('--append' if options['append'] else '--create')
 
     if options['flatnode_file']:
         cmd.extend(('--flat-nodes', options['flatnode_file']))
@@ -128,19 +146,28 @@ def run_osm2pgsql(options):
     if options.get('disable_jit', False):
         env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'
 
-    cmd.append(str(options['import_file']))
+    if 'import_data' in options:
+        cmd.extend(('-r', 'xml', '-'))
+    elif isinstance(options['import_file'], list):
+        for fname in options['import_file']:
+            cmd.append(str(fname))
+    else:
+        cmd.append(str(options['import_file']))
 
-    subprocess.run(cmd, cwd=options.get('cwd', '.'), env=env, check=True)
+    subprocess.run(cmd, cwd=options.get('cwd', '.'),
+                   input=options.get('import_data'),
+                   env=env, check=True)
 
 
-def get_url(url):
+def get_url(url: str) -> str:
     """ Get the contents from the given URL and return it as a UTF-8 string.
     """
-    headers = {"User-Agent" : "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)}
+    headers = {"User-Agent": f"Nominatim/{NOMINATIM_VERSION!s}"}
 
     try:
-        with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
+        request = urlrequest.Request(url, headers=headers)
+        with urlrequest.urlopen(request) as response: # type: IO[bytes]
             return response.read().decode('utf-8')
-    except:
+    except Exception:
         LOG.fatal('Failed to load URL: %s', url)
         raise