1 # SPDX-License-Identifier: GPL-3.0-or-later
 
   3 # This file is part of Nominatim. (https://nominatim.org)
 
   5 # Copyright (C) 2023 by the Nominatim developer community.
 
   6 # For a full list of authors see the git log.
 
   8 Implementation of the 'export' subcommand.
 
  10 from typing import Optional, List, cast
 
  17 import sqlalchemy as sa
 
  19 from nominatim.clicmd.args import NominatimArgs
 
  20 import nominatim.api as napi
 
  21 from nominatim.api.results import create_from_placex_row, ReverseResult, add_result_details
 
  22 from nominatim.api.types import LookupDetails
 
  23 from nominatim.errors import UsageError
 
  25 # Do not repeat documentation of subcommand classes.
 
  26 # pylint: disable=C0111
 
  27 # Using non-top-level imports to avoid eventually unused imports.
 
  28 # pylint: disable=E0012,C0415
 
  29 # Needed for SQLAlchemy
 
  30 # pylint: disable=singleton-comparison
 
  32 LOG = logging.getLogger()
 
  44 RANK_TO_OUTPUT_MAP = {
 
  46     5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
 
  47     10: 'county', 11: 'county', 12: 'county',
 
  48     13: 'city', 14: 'city', 15: 'city', 16: 'city',
 
  49     17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
 
  50     26: 'street', 27: 'path'}
 
  54     Export places as CSV file from the database.
 
  59     def add_args(self, parser: argparse.ArgumentParser) -> None:
 
  60         group = parser.add_argument_group('Output arguments')
 
  61         group.add_argument('--output-type', default='street',
 
  62                            choices=('country', 'state', 'county',
 
  63                                     'city', 'suburb', 'street', 'path'),
 
  64                            help='Type of places to output (default: street)')
 
  65         group.add_argument('--output-format',
 
  66                            default='street;suburb;city;county;state;country',
 
  67                            help=("Semicolon-separated list of address types "
 
  68                                  "(see --output-type). Additionally accepts:"
 
  70         group.add_argument('--language',
 
  71                            help=("Preferred language for output "
 
  72                                  "(use local name, if omitted)"))
 
  73         group = parser.add_argument_group('Filter arguments')
 
  74         group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
 
  75                            help='Export only objects within country')
 
  76         group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
 
  78                            help='Export only children of this OSM node')
 
  79         group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
 
  81                            help='Export only children of this OSM way')
 
  82         group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
 
  84                            help='Export only children of this OSM relation')
 
  87     def run(self, args: NominatimArgs) -> int:
 
  88         return asyncio.run(export(args))
 
  91 async def export(args: NominatimArgs) -> int:
 
  92     """ The actual export as a asynchronous function.
 
  95     api = napi.NominatimAPIAsync(args.project_dir)
 
  98         output_range = RANK_RANGE_MAP[args.output_type]
 
 100         writer = init_csv_writer(args.output_format)
 
 102         async with api.begin() as conn, api.begin() as detail_conn:
 
 105             sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
 
 106                         t.c.class_, t.c.type, t.c.admin_level,
 
 107                         t.c.address, t.c.extratags,
 
 108                         t.c.housenumber, t.c.postcode, t.c.country_code,
 
 109                         t.c.importance, t.c.wikipedia, t.c.indexed_date,
 
 110                         t.c.rank_address, t.c.rank_search,
 
 112                      .where(t.c.linked_place_id == None)\
 
 113                      .where(t.c.rank_address.between(*output_range))
 
 115             parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
 
 117                 taddr = conn.t.addressline
 
 119                 sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
 
 120                          .where(taddr.c.address_place_id == parent_place_id)\
 
 121                          .where(taddr.c.isaddress)
 
 123             if args.restrict_to_country:
 
 124                 sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
 
 127             for row in await conn.execute(sql):
 
 128                 result = create_from_placex_row(row, ReverseResult)
 
 129                 if result is not None:
 
 130                     results.append(result)
 
 132                 if len(results) == 1000:
 
 133                     await dump_results(detail_conn, results, writer, args.language)
 
 137                 await dump_results(detail_conn, results, writer, args.language)
 
 144 def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
 
 145     fields = output_format.split(';')
 
 146     writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
 
 152 async def dump_results(conn: napi.SearchConnection,
 
 153                        results: List[ReverseResult],
 
 154                        writer: 'csv.DictWriter[str]',
 
 155                        lang: Optional[str]) -> None:
 
 156     await add_result_details(conn, results,
 
 157                              LookupDetails(address_details=True))
 
 160     locale = napi.Locales([lang] if lang else None)
 
 162     for result in results:
 
 163         data = {'placeid': result.place_id,
 
 164                 'postcode': result.postcode}
 
 166         result.localize(locale)
 
 167         for line in (result.address_rows or []):
 
 168             if line.isaddress and line.local_name:
 
 169                 if line.category[1] == 'postcode':
 
 170                     data['postcode'] = line.local_name
 
 171                 elif line.rank_address in RANK_TO_OUTPUT_MAP:
 
 172                     data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
 
 174         writer.writerow(data)
 
 177 async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
 
 178                         way_id: Optional[int],
 
 179                         relation_id: Optional[int]) -> Optional[int]:
 
 180     """ Get the place ID for the given OSM object.
 
 182     if node_id is not None:
 
 183         osm_type, osm_id = 'N', node_id
 
 184     elif way_id is not None:
 
 185         osm_type, osm_id = 'W', way_id
 
 186     elif relation_id is not None:
 
 187         osm_type, osm_id = 'R', relation_id
 
 192     sql = sa.select(t.c.place_id).limit(1)\
 
 193             .where(t.c.osm_type == osm_type)\
 
 194             .where(t.c.osm_id == osm_id)\
 
 195             .where(t.c.rank_address > 0)\
 
 196             .order_by(t.c.rank_address)
 
 198     for result in await conn.execute(sql):
 
 199         return cast(int, result[0])
 
 201     raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')