From a849f3c9ec6b5db193bdce5930ebec69b55545d4 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 17 Jul 2022 18:31:51 +0200 Subject: [PATCH] add type annotations for command line functions --- nominatim/cli.py | 75 ++++++------- nominatim/clicmd/__init__.py | 25 +++-- nominatim/clicmd/add_data.py | 55 +++++----- nominatim/clicmd/admin.py | 13 ++- nominatim/clicmd/api.py | 42 ++++---- nominatim/clicmd/args.py | 160 +++++++++++++++++++++++++++- nominatim/clicmd/freeze.py | 9 +- nominatim/clicmd/index.py | 10 +- nominatim/clicmd/refresh.py | 21 ++-- nominatim/clicmd/replication.py | 42 ++++---- nominatim/clicmd/setup.py | 62 ++++++----- nominatim/clicmd/special_phrases.py | 25 ++--- nominatim/tools/add_osm_data.py | 4 +- nominatim/tools/database_import.py | 2 +- nominatim/tools/exec_utils.py | 4 +- nominatim/tools/tiger_data.py | 6 +- 16 files changed, 368 insertions(+), 187 deletions(-) diff --git a/nominatim/cli.py b/nominatim/cli.py index f911023b..8c2136f4 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -8,6 +8,7 @@ Command-line interface to the Nominatim functions for import, update, database administration and querying. """ +from typing import Optional, Any, List, Union import logging import os import sys @@ -19,16 +20,15 @@ from nominatim.tools.exec_utils import run_legacy_script, run_php_server from nominatim.errors import UsageError from nominatim import clicmd from nominatim import version -from nominatim.clicmd.args import NominatimArgs +from nominatim.clicmd.args import NominatimArgs, Subcommand LOG = logging.getLogger() - class CommandlineParser: """ Wraps some of the common functions for parsing the command line and setting up subcommands. """ - def __init__(self, prog, description): + def __init__(self, prog: str, description: Optional[str]): self.parser = argparse.ArgumentParser( prog=prog, description=description, @@ -56,8 +56,8 @@ class CommandlineParser: group.add_argument('-j', '--threads', metavar='NUM', type=int, help='Number of parallel threads to use') - @staticmethod - def nominatim_version_text(): + + def nominatim_version_text(self) -> str: """ Program name and version number as string """ text = f'Nominatim version {version.version_str()}' @@ -65,11 +65,14 @@ class CommandlineParser: text += f' ({version.GIT_COMMIT_HASH})' return text - def add_subcommand(self, name, cmd): + + def add_subcommand(self, name: str, cmd: Subcommand) -> None: """ Add a subcommand to the parser. The subcommand must be a class with a function add_args() that adds the parameters for the subcommand and a run() function that executes the command. """ + assert cmd.__doc__ is not None + parser = self.subs.add_parser(name, parents=[self.default_args], help=cmd.__doc__.split('\n', 1)[0], description=cmd.__doc__, @@ -78,7 +81,8 @@ class CommandlineParser: parser.set_defaults(command=cmd) cmd.add_args(parser) - def run(self, **kwargs): + + def run(self, **kwargs: Any) -> int: """ Parse the command line arguments of the program and execute the appropriate subcommand. """ @@ -89,7 +93,7 @@ class CommandlineParser: return 1 if args.version: - print(CommandlineParser.nominatim_version_text()) + print(self.nominatim_version_text()) return 0 if args.subcommand is None: @@ -145,8 +149,7 @@ class QueryExport: Export addresses as CSV file from the database. """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Output arguments') group.add_argument('--output-type', default='street', choices=('continent', 'country', 'state', 'county', @@ -175,11 +178,10 @@ class QueryExport: help='Export only children of this OSM relation') - @staticmethod - def run(args): - params = ['export.php', - '--output-type', args.output_type, - '--output-format', args.output_format] + def run(self, args: NominatimArgs) -> int: + params: List[Union[int, str]] = [ + '--output-type', args.output_type, + '--output-format', args.output_format] if args.output_all_postcodes: params.append('--output-all-postcodes') if args.language: @@ -193,7 +195,7 @@ class QueryExport: if args.restrict_to_osm_relation: params.extend(('--restrict-to-osm-relation', args.restrict_to_osm_relation)) - return run_legacy_script(*params, nominatim_env=args) + return run_legacy_script('export.php', *params, nominatim_env=args) class AdminServe: @@ -207,51 +209,52 @@ class AdminServe: By the default, the webserver can be accessed at: http://127.0.0.1:8088 """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Server arguments') group.add_argument('--server', default='127.0.0.1:8088', help='The address the server will listen to.') - @staticmethod - def run(args): + + def run(self, args: NominatimArgs) -> int: run_php_server(args.server, args.project_dir / 'website') + return 0 + -def get_set_parser(**kwargs): +def get_set_parser(**kwargs: Any) -> CommandlineParser: """\ Initializes the parser and adds various subcommands for nominatim cli. """ parser = CommandlineParser('nominatim', nominatim.__doc__) - parser.add_subcommand('import', clicmd.SetupAll) - parser.add_subcommand('freeze', clicmd.SetupFreeze) - parser.add_subcommand('replication', clicmd.UpdateReplication) + parser.add_subcommand('import', clicmd.SetupAll()) + parser.add_subcommand('freeze', clicmd.SetupFreeze()) + parser.add_subcommand('replication', clicmd.UpdateReplication()) - parser.add_subcommand('special-phrases', clicmd.ImportSpecialPhrases) + parser.add_subcommand('special-phrases', clicmd.ImportSpecialPhrases()) - parser.add_subcommand('add-data', clicmd.UpdateAddData) - parser.add_subcommand('index', clicmd.UpdateIndex) + parser.add_subcommand('add-data', clicmd.UpdateAddData()) + parser.add_subcommand('index', clicmd.UpdateIndex()) parser.add_subcommand('refresh', clicmd.UpdateRefresh()) - parser.add_subcommand('admin', clicmd.AdminFuncs) + parser.add_subcommand('admin', clicmd.AdminFuncs()) - parser.add_subcommand('export', QueryExport) - parser.add_subcommand('serve', AdminServe) + parser.add_subcommand('export', QueryExport()) + parser.add_subcommand('serve', AdminServe()) if kwargs.get('phpcgi_path'): - parser.add_subcommand('search', clicmd.APISearch) - parser.add_subcommand('reverse', clicmd.APIReverse) - parser.add_subcommand('lookup', clicmd.APILookup) - parser.add_subcommand('details', clicmd.APIDetails) - parser.add_subcommand('status', clicmd.APIStatus) + parser.add_subcommand('search', clicmd.APISearch()) + parser.add_subcommand('reverse', clicmd.APIReverse()) + parser.add_subcommand('lookup', clicmd.APILookup()) + parser.add_subcommand('details', clicmd.APIDetails()) + parser.add_subcommand('status', clicmd.APIStatus()) else: parser.parser.epilog = 'php-cgi not found. Query commands not available.' return parser -def nominatim(**kwargs): +def nominatim(**kwargs: Any) -> int: """\ Command-line tools for importing, updating, administrating and querying the Nominatim database. diff --git a/nominatim/clicmd/__init__.py b/nominatim/clicmd/__init__.py index de541134..bdd9bafe 100644 --- a/nominatim/clicmd/__init__.py +++ b/nominatim/clicmd/__init__.py @@ -7,13 +7,20 @@ """ Subcommand definitions for the command-line tool. """ +# mypy and pylint disagree about the style of explicit exports, +# see https://github.com/PyCQA/pylint/issues/6006. +# pylint: disable=useless-import-alias -from nominatim.clicmd.setup import SetupAll -from nominatim.clicmd.replication import UpdateReplication -from nominatim.clicmd.api import APISearch, APIReverse, APILookup, APIDetails, APIStatus -from nominatim.clicmd.index import UpdateIndex -from nominatim.clicmd.refresh import UpdateRefresh -from nominatim.clicmd.add_data import UpdateAddData -from nominatim.clicmd.admin import AdminFuncs -from nominatim.clicmd.freeze import SetupFreeze -from nominatim.clicmd.special_phrases import ImportSpecialPhrases +from nominatim.clicmd.setup import SetupAll as SetupAll +from nominatim.clicmd.replication import UpdateReplication as UpdateReplication +from nominatim.clicmd.api import (APISearch as APISearch, + APIReverse as APIReverse, + APILookup as APILookup, + APIDetails as APIDetails, + APIStatus as APIStatus) +from nominatim.clicmd.index import UpdateIndex as UpdateIndex +from nominatim.clicmd.refresh import UpdateRefresh as UpdateRefresh +from nominatim.clicmd.add_data import UpdateAddData as UpdateAddData +from nominatim.clicmd.admin import AdminFuncs as AdminFuncs +from nominatim.clicmd.freeze import SetupFreeze as SetupFreeze +from nominatim.clicmd.special_phrases import ImportSpecialPhrases as ImportSpecialPhrases diff --git a/nominatim/clicmd/add_data.py b/nominatim/clicmd/add_data.py index 013d5310..8905bc21 100644 --- a/nominatim/clicmd/add_data.py +++ b/nominatim/clicmd/add_data.py @@ -7,10 +7,14 @@ """ Implementation of the 'add-data' subcommand. """ +from typing import cast +import argparse import logging import psutil +from nominatim.clicmd.args import NominatimArgs + # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 # Using non-top-level imports to avoid eventually unused imports. @@ -35,32 +39,31 @@ class UpdateAddData: for more information. """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group_name = parser.add_argument_group('Source') - group = group_name.add_mutually_exclusive_group(required=True) - group.add_argument('--file', metavar='FILE', - help='Import data from an OSM file or diff file') - group.add_argument('--diff', metavar='FILE', - help='Import data from an OSM diff file (deprecated: use --file)') - group.add_argument('--node', metavar='ID', type=int, - help='Import a single node from the API') - group.add_argument('--way', metavar='ID', type=int, - help='Import a single way from the API') - group.add_argument('--relation', metavar='ID', type=int, - help='Import a single relation from the API') - group.add_argument('--tiger-data', metavar='DIR', - help='Add housenumbers from the US TIGER census database') - group = parser.add_argument_group('Extra arguments') - group.add_argument('--use-main-api', action='store_true', - help='Use OSM API instead of Overpass to download objects') - group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, - help='Size of cache to be used by osm2pgsql (in MB)') - group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60, - help='Set timeout for file downloads') - - @staticmethod - def run(args): + group1 = group_name.add_mutually_exclusive_group(required=True) + group1.add_argument('--file', metavar='FILE', + help='Import data from an OSM file or diff file') + group1.add_argument('--diff', metavar='FILE', + help='Import data from an OSM diff file (deprecated: use --file)') + group1.add_argument('--node', metavar='ID', type=int, + help='Import a single node from the API') + group1.add_argument('--way', metavar='ID', type=int, + help='Import a single way from the API') + group1.add_argument('--relation', metavar='ID', type=int, + help='Import a single relation from the API') + group1.add_argument('--tiger-data', metavar='DIR', + help='Add housenumbers from the US TIGER census database') + group2 = parser.add_argument_group('Extra arguments') + group2.add_argument('--use-main-api', action='store_true', + help='Use OSM API instead of Overpass to download objects') + group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, + help='Size of cache to be used by osm2pgsql (in MB)') + group2.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60, + help='Set timeout for file downloads') + + + def run(self, args: NominatimArgs) -> int: from nominatim.tokenizer import factory as tokenizer_factory from nominatim.tools import tiger_data, add_osm_data @@ -73,7 +76,7 @@ class UpdateAddData: osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1) if args.file or args.diff: - return add_osm_data.add_data_from_file(args.file or args.diff, + return add_osm_data.add_data_from_file(cast(str, args.file or args.diff), osm2pgsql_params) if args.node: diff --git a/nominatim/clicmd/admin.py b/nominatim/clicmd/admin.py index 1ed0ac9b..ad900579 100644 --- a/nominatim/clicmd/admin.py +++ b/nominatim/clicmd/admin.py @@ -8,8 +8,10 @@ Implementation of the 'admin' subcommand. """ import logging +import argparse from nominatim.tools.exec_utils import run_legacy_script +from nominatim.clicmd.args import NominatimArgs # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 @@ -23,8 +25,7 @@ class AdminFuncs: Analyse and maintain the database. """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Admin tasks') objs = group.add_mutually_exclusive_group(required=True) objs.add_argument('--warm', action='store_true', @@ -49,10 +50,9 @@ class AdminFuncs: mgroup.add_argument('--place-id', type=int, help='Analyse indexing of the given Nominatim object') - @staticmethod - def run(args): + def run(self, args: NominatimArgs) -> int: if args.warm: - return AdminFuncs._warm(args) + return self._warm(args) if args.check_database: LOG.warning('Checking database') @@ -73,8 +73,7 @@ class AdminFuncs: return 1 - @staticmethod - def _warm(args): + def _warm(self, args: NominatimArgs) -> int: LOG.warning('Warming database caches') params = ['warm.php'] if args.target == 'reverse': diff --git a/nominatim/clicmd/api.py b/nominatim/clicmd/api.py index ab7d1658..b899afad 100644 --- a/nominatim/clicmd/api.py +++ b/nominatim/clicmd/api.py @@ -7,10 +7,13 @@ """ Subcommand definitions for API calls from the command line. """ +from typing import Mapping, Dict +import argparse import logging from nominatim.tools.exec_utils import run_api_script from nominatim.errors import UsageError +from nominatim.clicmd.args import NominatimArgs # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 @@ -42,7 +45,7 @@ DETAILS_SWITCHES = ( ('polygon_geojson', 'Include geometry of result') ) -def _add_api_output_arguments(parser): +def _add_api_output_arguments(parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Output arguments') group.add_argument('--format', default='jsonv2', choices=['xml', 'json', 'jsonv2', 'geojson', 'geocodejson'], @@ -60,7 +63,7 @@ def _add_api_output_arguments(parser): "Parameter is difference tolerance in degrees.")) -def _run_api(endpoint, args, params): +def _run_api(endpoint: str, args: NominatimArgs, params: Mapping[str, object]) -> int: script_file = args.project_dir / 'website' / (endpoint + '.php') if not script_file.exists(): @@ -82,8 +85,7 @@ class APISearch: https://nominatim.org/release-docs/latest/api/Search/ """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Query arguments') group.add_argument('--query', help='Free-form query string') @@ -109,8 +111,8 @@ class APISearch: help='Do not remove duplicates from the result list') - @staticmethod - def run(args): + def run(self, args: NominatimArgs) -> int: + params: Dict[str, object] if args.query: params = dict(q=args.query) else: @@ -145,8 +147,7 @@ class APIReverse: https://nominatim.org/release-docs/latest/api/Reverse/ """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Query arguments') group.add_argument('--lat', type=float, required=True, help='Latitude of coordinate to look up (in WGS84)') @@ -158,8 +159,7 @@ class APIReverse: _add_api_output_arguments(parser) - @staticmethod - def run(args): + def run(self, args: NominatimArgs) -> int: params = dict(lat=args.lat, lon=args.lon, format=args.format) if args.zoom is not None: params['zoom'] = args.zoom @@ -187,8 +187,7 @@ class APILookup: https://nominatim.org/release-docs/latest/api/Lookup/ """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Query arguments') group.add_argument('--id', metavar='OSMID', action='append', required=True, dest='ids', @@ -197,9 +196,8 @@ class APILookup: _add_api_output_arguments(parser) - @staticmethod - def run(args): - params = dict(osm_ids=','.join(args.ids), format=args.format) + def run(self, args: NominatimArgs) -> int: + params: Dict[str, object] = dict(osm_ids=','.join(args.ids), format=args.format) for param, _ in EXTRADATA_PARAMS: if getattr(args, param): @@ -224,8 +222,7 @@ class APIDetails: https://nominatim.org/release-docs/latest/api/Details/ """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Query arguments') objs = group.add_mutually_exclusive_group(required=True) objs.add_argument('--node', '-n', type=int, @@ -246,8 +243,8 @@ class APIDetails: group.add_argument('--lang', '--accept-language', metavar='LANGS', help='Preferred language order for presenting search results') - @staticmethod - def run(args): + + def run(self, args: NominatimArgs) -> int: if args.node: params = dict(osmtype='N', osmid=args.node) elif args.way: @@ -276,12 +273,11 @@ class APIStatus: https://nominatim.org/release-docs/latest/api/Status/ """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('API parameters') group.add_argument('--format', default='text', choices=['text', 'json'], help='Format of result') - @staticmethod - def run(args): + + def run(self, args: NominatimArgs) -> int: return _run_api('status', args, dict(format=args.format)) diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py index 6a3c8387..f5f1b98e 100644 --- a/nominatim/clicmd/args.py +++ b/nominatim/clicmd/args.py @@ -7,19 +7,175 @@ """ Provides custom functions over command-line arguments. """ +from typing import Optional, List, Dict, Any, Sequence, Tuple +import argparse import logging from pathlib import Path +from typing_extensions import Protocol + from nominatim.errors import UsageError +from nominatim.config import Configuration LOG = logging.getLogger() +class Subcommand(Protocol): + """ + Interface to be implemented by classes implementing a CLI subcommand. + """ + + def add_args(self, parser: argparse.ArgumentParser) -> None: + """ + Fill the given parser for the subcommand with the appropriate + parameters. + """ + + def run(self, args: 'NominatimArgs') -> int: + """ + Run the subcommand with the given parsed arguments. + """ + + class NominatimArgs: """ Customized namespace class for the nominatim command line tool to receive the command-line arguments. """ + # Basic environment set by root program. + config: Configuration + project_dir: Path + module_dir: Path + osm2pgsql_path: Path + phplib_dir: Path + sqllib_dir: Path + data_dir: Path + config_dir: Path + phpcgi_path: Path + + # Global switches + version: bool + subcommand: Optional[str] + command: Subcommand + + # Shared parameters + osm2pgsql_cache: Optional[int] + socket_timeout: int + + # Arguments added to all subcommands. + verbose: int + threads: Optional[int] + + # Arguments to 'add-data' + file: Optional[str] + diff: Optional[str] + node: Optional[int] + way: Optional[int] + relation: Optional[int] + tiger_data: Optional[str] + use_main_api: bool + + # Arguments to 'admin' + warm: bool + check_database: bool + migrate: bool + analyse_indexing: bool + target: Optional[str] + osm_id: Optional[str] + place_id: Optional[int] + + # Arguments to 'import' + osm_file: List[str] + continue_at: Optional[str] + reverse_only: bool + no_partitions: bool + no_updates: bool + offline: bool + ignore_errors: bool + index_noanalyse: bool + + # Arguments to 'index' + boundaries_only: bool + no_boundaries: bool + minrank: int + maxrank: int + + # Arguments to 'export' + output_type: str + output_format: str + output_all_postcodes: bool + language: Optional[str] + restrict_to_country: Optional[str] + restrict_to_osm_node: Optional[int] + restrict_to_osm_way: Optional[int] + restrict_to_osm_relation: Optional[int] + + # Arguments to 'refresh' + postcodes: bool + word_tokens: bool + word_counts: bool + address_levels: bool + functions: bool + wiki_data: bool + importance: bool + website: bool + diffs: bool + enable_debug_statements: bool + data_object: Sequence[Tuple[str, int]] + data_area: Sequence[Tuple[str, int]] + + # Arguments to 'replication' + init: bool + update_functions: bool + check_for_updates: bool + once: bool + catch_up: bool + do_index: bool + + # Arguments to 'serve' + server: str + + # Arguments to 'special-phrases + import_from_wiki: bool + import_from_csv: Optional[str] + no_replace: bool + + # Arguments to all query functions + format: str + addressdetails: bool + extratags: bool + namedetails: bool + lang: Optional[str] + polygon_output: Optional[str] + polygon_threshold: Optional[float] + + # Arguments to 'search' + query: Optional[str] + street: Optional[str] + city: Optional[str] + county: Optional[str] + state: Optional[str] + country: Optional[str] + postalcode: Optional[str] + countrycodes: Optional[str] + exclude_place_ids: Optional[str] + limit: Optional[int] + viewbox: Optional[str] + bounded: bool + dedupe: bool + + # Arguments to 'reverse' + lat: float + lon: float + zoom: Optional[int] + + # Arguments to 'lookup' + ids: Sequence[str] + + # Arguments to 'details' + object_class: Optional[str] + - def osm2pgsql_options(self, default_cache, default_threads): + def osm2pgsql_options(self, default_cache: int, + default_threads: int) -> Dict[str, Any]: """ Return the standard osm2pgsql options that can be derived from the command line arguments. The resulting dict can be further customized and then used in `run_osm2pgsql()`. @@ -38,7 +194,7 @@ class NominatimArgs: ) - def get_osm_file_list(self): + def get_osm_file_list(self) -> Optional[List[Path]]: """ Return the --osm-file argument as a list of Paths or None if no argument was given. The function also checks if the files exist and raises a UsageError if one cannot be found. diff --git a/nominatim/clicmd/freeze.py b/nominatim/clicmd/freeze.py index b11880ca..5dfdd255 100644 --- a/nominatim/clicmd/freeze.py +++ b/nominatim/clicmd/freeze.py @@ -7,8 +7,10 @@ """ Implementation of the 'freeze' subcommand. """ +import argparse from nominatim.db.connection import connect +from nominatim.clicmd.args import NominatimArgs # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 @@ -27,12 +29,11 @@ class SetupFreeze: This command has the same effect as the `--no-updates` option for imports. """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: pass # No options - @staticmethod - def run(args): + + def run(self, args: NominatimArgs) -> int: from ..tools import freeze with connect(args.config.get_libpq_dsn()) as conn: diff --git a/nominatim/clicmd/index.py b/nominatim/clicmd/index.py index 73258be2..16b5311c 100644 --- a/nominatim/clicmd/index.py +++ b/nominatim/clicmd/index.py @@ -7,10 +7,13 @@ """ Implementation of the 'index' subcommand. """ +import argparse + import psutil from nominatim.db import status from nominatim.db.connection import connect +from nominatim.clicmd.args import NominatimArgs # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 @@ -28,8 +31,7 @@ class UpdateIndex: of indexing. For other cases, this function allows to run indexing manually. """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Filter arguments') group.add_argument('--boundaries-only', action='store_true', help="""Index only administrative boundaries.""") @@ -40,8 +42,8 @@ class UpdateIndex: group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30, help='Maximum/finishing rank') - @staticmethod - def run(args): + + def run(self, args: NominatimArgs) -> int: from ..indexer.indexer import Indexer from ..tokenizer import factory as tokenizer_factory diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index ecc7498e..dce28d98 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -7,11 +7,15 @@ """ Implementation of 'refresh' subcommand. """ -from argparse import ArgumentTypeError +from typing import Tuple, Optional +import argparse import logging from pathlib import Path +from nominatim.config import Configuration from nominatim.db.connection import connect +from nominatim.tokenizer.base import AbstractTokenizer +from nominatim.clicmd.args import NominatimArgs # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 @@ -20,12 +24,12 @@ from nominatim.db.connection import connect LOG = logging.getLogger() -def _parse_osm_object(obj): +def _parse_osm_object(obj: str) -> Tuple[str, int]: """ Parse the given argument into a tuple of OSM type and ID. Raises an ArgumentError if the format is not recognized. """ if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit(): - raise ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R].") + raise argparse.ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R].") return (obj[0].upper(), int(obj[1:])) @@ -42,11 +46,10 @@ class UpdateRefresh: Warning: the 'update' command must not be run in parallel with other update commands like 'replication' or 'add-data'. """ - def __init__(self): - self.tokenizer = None + def __init__(self) -> None: + self.tokenizer: Optional[AbstractTokenizer] = None - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Data arguments') group.add_argument('--postcodes', action='store_true', help='Update postcode centroid table') @@ -80,7 +83,7 @@ class UpdateRefresh: help='Enable debug warning statements in functions') - def run(self, args): #pylint: disable=too-many-branches + def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches from ..tools import refresh, postcodes from ..indexer.indexer import Indexer @@ -155,7 +158,7 @@ class UpdateRefresh: return 0 - def _get_tokenizer(self, config): + def _get_tokenizer(self, config: Configuration) -> AbstractTokenizer: if self.tokenizer is None: from ..tokenizer import factory as tokenizer_factory diff --git a/nominatim/clicmd/replication.py b/nominatim/clicmd/replication.py index 9d946304..2d6396a1 100644 --- a/nominatim/clicmd/replication.py +++ b/nominatim/clicmd/replication.py @@ -7,6 +7,8 @@ """ Implementation of the 'replication' sub-command. """ +from typing import Optional +import argparse import datetime as dt import logging import socket @@ -15,6 +17,7 @@ import time from nominatim.db import status from nominatim.db.connection import connect from nominatim.errors import UsageError +from nominatim.clicmd.args import NominatimArgs LOG = logging.getLogger() @@ -41,8 +44,7 @@ class UpdateReplication: downloads and imports the next batch of updates. """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Arguments for initialisation') group.add_argument('--init', action='store_true', help='Initialise the update process') @@ -68,8 +70,8 @@ class UpdateReplication: group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60, help='Set timeout for file downloads') - @staticmethod - def _init_replication(args): + + def _init_replication(self, args: NominatimArgs) -> int: from ..tools import replication, refresh LOG.warning("Initialising replication updates") @@ -81,16 +83,17 @@ class UpdateReplication: return 0 - @staticmethod - def _check_for_updates(args): + def _check_for_updates(self, args: NominatimArgs) -> int: from ..tools import replication with connect(args.config.get_libpq_dsn()) as conn: return replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL) - @staticmethod - def _report_update(batchdate, start_import, start_index): - def round_time(delta): + + def _report_update(self, batchdate: dt.datetime, + start_import: dt.datetime, + start_index: Optional[dt.datetime]) -> None: + def round_time(delta: dt.timedelta) -> dt.timedelta: return dt.timedelta(seconds=int(delta.total_seconds())) end = dt.datetime.now(dt.timezone.utc) @@ -101,8 +104,7 @@ class UpdateReplication: round_time(end - batchdate)) - @staticmethod - def _compute_update_interval(args): + def _compute_update_interval(self, args: NominatimArgs) -> int: if args.catch_up: return 0 @@ -119,13 +121,13 @@ class UpdateReplication: return update_interval - @staticmethod - def _update(args): + def _update(self, args: NominatimArgs) -> None: + # pylint: disable=too-many-locals from ..tools import replication from ..indexer.indexer import Indexer from ..tokenizer import factory as tokenizer_factory - update_interval = UpdateReplication._compute_update_interval(args) + update_interval = self._compute_update_interval(args) params = args.osm2pgsql_options(default_cache=2000, default_threads=1) params.update(base_url=args.config.REPLICATION_URL, @@ -169,7 +171,8 @@ class UpdateReplication: indexer.index_full(analyse=False) if LOG.isEnabledFor(logging.WARNING): - UpdateReplication._report_update(batchdate, start, index_start) + assert batchdate is not None + self._report_update(batchdate, start, index_start) if args.once or (args.catch_up and state is replication.UpdateState.NO_CHANGES): break @@ -179,15 +182,14 @@ class UpdateReplication: time.sleep(recheck_interval) - @staticmethod - def run(args): + def run(self, args: NominatimArgs) -> int: socket.setdefaulttimeout(args.socket_timeout) if args.init: - return UpdateReplication._init_replication(args) + return self._init_replication(args) if args.check_for_updates: - return UpdateReplication._check_for_updates(args) + return self._check_for_updates(args) - UpdateReplication._update(args) + self._update(args) return 0 diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index 73095468..6ffa7afb 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -7,14 +7,20 @@ """ Implementation of the 'import' subcommand. """ +from typing import Optional +import argparse import logging from pathlib import Path import psutil -from nominatim.db.connection import connect +from nominatim.config import Configuration +from nominatim.db.connection import connect, Connection from nominatim.db import status, properties +from nominatim.tokenizer.base import AbstractTokenizer from nominatim.version import version_str +from nominatim.clicmd.args import NominatimArgs +from nominatim.errors import UsageError # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 @@ -32,38 +38,36 @@ class SetupAll: needs superuser rights on the database. """ - @staticmethod - def add_args(parser): + def add_args(self, parser: argparse.ArgumentParser) -> None: group_name = parser.add_argument_group('Required arguments') - group = group_name.add_mutually_exclusive_group(required=True) - group.add_argument('--osm-file', metavar='FILE', action='append', + group1 = group_name.add_mutually_exclusive_group(required=True) + group1.add_argument('--osm-file', metavar='FILE', action='append', help='OSM file to be imported' ' (repeat for importing multiple files)') - group.add_argument('--continue', dest='continue_at', + group1.add_argument('--continue', dest='continue_at', choices=['load-data', 'indexing', 'db-postprocess'], help='Continue an import that was interrupted') - group = parser.add_argument_group('Optional arguments') - group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, + group2 = parser.add_argument_group('Optional arguments') + group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, help='Size of cache to be used by osm2pgsql (in MB)') - group.add_argument('--reverse-only', action='store_true', + group2.add_argument('--reverse-only', action='store_true', help='Do not create tables and indexes for searching') - group.add_argument('--no-partitions', action='store_true', + group2.add_argument('--no-partitions', action='store_true', help=("Do not partition search indices " "(speeds up import of single country extracts)")) - group.add_argument('--no-updates', action='store_true', + group2.add_argument('--no-updates', action='store_true', help="Do not keep tables that are only needed for " "updating the database later") - group.add_argument('--offline', action='store_true', + group2.add_argument('--offline', action='store_true', help="Do not attempt to load any additional data from the internet") - group = parser.add_argument_group('Expert options') - group.add_argument('--ignore-errors', action='store_true', + group3 = parser.add_argument_group('Expert options') + group3.add_argument('--ignore-errors', action='store_true', help='Continue import even when errors in SQL are present') - group.add_argument('--index-noanalyse', action='store_true', + group3.add_argument('--index-noanalyse', action='store_true', help='Do not perform analyse operations during index (expert only)') - @staticmethod - def run(args): # pylint: disable=too-many-statements + def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements from ..data import country_info from ..tools import database_import, refresh, postcodes, freeze from ..indexer.indexer import Indexer @@ -72,6 +76,8 @@ class SetupAll: if args.continue_at is None: files = args.get_osm_file_list() + if not files: + raise UsageError("No input files (use --osm-file).") LOG.warning('Creating database') database_import.setup_database_skeleton(args.config.get_libpq_dsn(), @@ -88,7 +94,7 @@ class SetupAll: drop=args.no_updates, ignore_errors=args.ignore_errors) - SetupAll._setup_tables(args.config, args.reverse_only) + self._setup_tables(args.config, args.reverse_only) LOG.warning('Importing wikipedia importance data') data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir) @@ -107,7 +113,7 @@ class SetupAll: args.threads or psutil.cpu_count() or 1) LOG.warning("Setting up tokenizer") - tokenizer = SetupAll._get_tokenizer(args.continue_at, args.config) + tokenizer = self._get_tokenizer(args.continue_at, args.config) if args.continue_at is None or args.continue_at == 'load-data': LOG.warning('Calculate postcodes') @@ -117,7 +123,7 @@ class SetupAll: if args.continue_at is None or args.continue_at in ('load-data', 'indexing'): if args.continue_at is not None and args.continue_at != 'load-data': with connect(args.config.get_libpq_dsn()) as conn: - SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX) + self._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX) LOG.warning('Indexing places') indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, args.threads or psutil.cpu_count() or 1) @@ -142,13 +148,12 @@ class SetupAll: with connect(args.config.get_libpq_dsn()) as conn: refresh.setup_website(webdir, args.config, conn) - SetupAll._finalize_database(args.config.get_libpq_dsn(), args.offline) + self._finalize_database(args.config.get_libpq_dsn(), args.offline) return 0 - @staticmethod - def _setup_tables(config, reverse_only): + def _setup_tables(self, config: Configuration, reverse_only: bool) -> None: """ Set up the basic database layout: tables, indexes and functions. """ from ..tools import database_import, refresh @@ -169,8 +174,8 @@ class SetupAll: refresh.create_functions(conn, config, False, False) - @staticmethod - def _get_tokenizer(continue_at, config): + def _get_tokenizer(self, continue_at: Optional[str], + config: Configuration) -> AbstractTokenizer: """ Set up a new tokenizer or load an already initialised one. """ from ..tokenizer import factory as tokenizer_factory @@ -182,8 +187,8 @@ class SetupAll: # just load the tokenizer return tokenizer_factory.get_tokenizer_for_db(config) - @staticmethod - def _create_pending_index(conn, tablespace): + + def _create_pending_index(self, conn: Connection, tablespace: str) -> None: """ Add a supporting index for finding places still to be indexed. This index is normally created at the end of the import process @@ -204,8 +209,7 @@ class SetupAll: conn.commit() - @staticmethod - def _finalize_database(dsn, offline): + def _finalize_database(self, dsn: str, offline: bool) -> None: """ Determine the database date and set the status accordingly. """ with connect(dsn) as conn: diff --git a/nominatim/clicmd/special_phrases.py b/nominatim/clicmd/special_phrases.py index a2c346de..beac0c84 100644 --- a/nominatim/clicmd/special_phrases.py +++ b/nominatim/clicmd/special_phrases.py @@ -7,13 +7,16 @@ """ Implementation of the 'special-phrases' command. """ +import argparse import logging from pathlib import Path + from nominatim.errors import UsageError from nominatim.db.connection import connect -from nominatim.tools.special_phrases.sp_importer import SPImporter +from nominatim.tools.special_phrases.sp_importer import SPImporter, SpecialPhraseLoader from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader +from nominatim.clicmd.args import NominatimArgs LOG = logging.getLogger() @@ -49,8 +52,8 @@ class ImportSpecialPhrases: with custom rules into the project directory or by using the `--config` option to point to another configuration file. """ - @staticmethod - def add_args(parser): + + def add_args(self, parser: argparse.ArgumentParser) -> None: group = parser.add_argument_group('Input arguments') group.add_argument('--import-from-wiki', action='store_true', help='Import special phrases from the OSM wiki to the database') @@ -58,26 +61,24 @@ class ImportSpecialPhrases: help='Import special phrases from a CSV file') group.add_argument('--no-replace', action='store_true', help='Keep the old phrases and only add the new ones') - group.add_argument('--config', action='store', - help='Configuration file for black/white listing ' - '(default: phrase-settings.json)') - @staticmethod - def run(args): + + def run(self, args: NominatimArgs) -> int: + if args.import_from_wiki: - ImportSpecialPhrases.start_import(args, SPWikiLoader(args.config)) + self.start_import(args, SPWikiLoader(args.config)) if args.import_from_csv: if not Path(args.import_from_csv).is_file(): LOG.fatal("CSV file '%s' does not exist.", args.import_from_csv) raise UsageError('Cannot access file.') - ImportSpecialPhrases.start_import(args, SPCsvLoader(args.import_from_csv)) + self.start_import(args, SPCsvLoader(args.import_from_csv)) return 0 - @staticmethod - def start_import(args, loader): + + def start_import(self, args: NominatimArgs, loader: SpecialPhraseLoader) -> None: """ Create the SPImporter object containing the right sp loader and then start the import of special phrases. diff --git a/nominatim/tools/add_osm_data.py b/nominatim/tools/add_osm_data.py index d5d01754..fc016fec 100644 --- a/nominatim/tools/add_osm_data.py +++ b/nominatim/tools/add_osm_data.py @@ -29,7 +29,7 @@ def add_data_from_file(fname: str, options: MutableMapping[str, Any]) -> int: def add_osm_object(osm_type: str, osm_id: int, use_main_api: bool, - options: MutableMapping[str, Any]) -> None: + options: MutableMapping[str, Any]) -> int: """ Add or update a single OSM object from the latest version of the API. """ @@ -52,3 +52,5 @@ def add_osm_object(osm_type: str, osm_id: int, use_main_api: bool, options['import_data'] = get_url(base_url).encode('utf-8') run_osm2pgsql(options) + + return 0 diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index eda01013..fa60abf2 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -82,7 +82,7 @@ def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None: POSTGIS_REQUIRED_VERSION) -def import_osm_data(osm_files: Union[str, Sequence[str]], +def import_osm_data(osm_files: Union[Path, Sequence[Path]], options: MutableMapping[str, Any], drop: bool = False, ignore_errors: bool = False) -> None: """ Import the given OSM files. 'options' contains the list of diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index 4e6afdc2..610e2182 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -47,8 +47,8 @@ def run_legacy_script(script: StrPath, *args: Union[int, str], def run_api_script(endpoint: str, project_dir: Path, extra_env: Optional[Mapping[str, str]] = None, - phpcgi_bin: Optional[str] = None, - params: Optional[Mapping[str, str]] = None) -> int: + phpcgi_bin: Optional[Path] = None, + params: Optional[Mapping[str, Any]] = None) -> int: """ Execute a Nominatim API function. The function needs a project directory that contains the website diff --git a/nominatim/tools/tiger_data.py b/nominatim/tools/tiger_data.py index 4988e33c..4a32bb1e 100644 --- a/nominatim/tools/tiger_data.py +++ b/nominatim/tools/tiger_data.py @@ -108,14 +108,14 @@ def handle_threaded_sql_statements(pool: WorkerPool, fd: TextIO, def add_tiger_data(data_dir: str, config: Configuration, threads: int, - tokenizer: AbstractTokenizer) -> None: + tokenizer: AbstractTokenizer) -> int: """ Import tiger data from directory or tar file `data dir`. """ dsn = config.get_libpq_dsn() with TigerInput(data_dir) as tar: if not tar: - return + return 1 with connect(dsn) as conn: sql = SQLPreprocessor(conn, config) @@ -137,3 +137,5 @@ def add_tiger_data(data_dir: str, config: Configuration, threads: int, with connect(dsn) as conn: sql = SQLPreprocessor(conn, config) sql.run_sql_file(conn, 'tiger_import_finish.sql') + + return 0 -- 2.45.2