1 # SPDX-License-Identifier: GPL-3.0-or-later
 
   3 # This file is part of Nominatim. (https://nominatim.org)
 
   5 # Copyright (C) 2024 by the Nominatim developer community.
 
   6 # For a full list of authors see the git log.
 
   8 Implementation of the 'export' subcommand.
 
  10 from typing import Optional, List, cast
 
  17 import nominatim_api as napi
 
  18 from nominatim_api.results import create_from_placex_row, ReverseResult, add_result_details
 
  19 from nominatim_api.types import LookupDetails
 
  21 import sqlalchemy as sa
 
  23 from ..errors import UsageError
 
  24 from .args import NominatimArgs
 
  27 LOG = logging.getLogger()
 
  41 RANK_TO_OUTPUT_MAP = {
 
  43     5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
 
  44     10: 'county', 11: 'county', 12: 'county',
 
  45     13: 'city', 14: 'city', 15: 'city', 16: 'city',
 
  46     17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
 
  47     26: 'street', 27: 'path'}
 
  52     Export places as CSV file from the database.
 
  57     def add_args(self, parser: argparse.ArgumentParser) -> None:
 
  58         group = parser.add_argument_group('Output arguments')
 
  59         group.add_argument('--output-type', default='street',
 
  60                            choices=('country', 'state', 'county',
 
  61                                     'city', 'suburb', 'street', 'path'),
 
  62                            help='Type of places to output (default: street)')
 
  63         group.add_argument('--output-format',
 
  64                            default='street;suburb;city;county;state;country',
 
  65                            help=("Semicolon-separated list of address types "
 
  66                                  "(see --output-type). Additionally accepts:"
 
  68         group.add_argument('--language',
 
  69                            help=("Preferred language for output "
 
  70                                  "(use local name, if omitted)"))
 
  71         group = parser.add_argument_group('Filter arguments')
 
  72         group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
 
  73                            help='Export only objects within country')
 
  74         group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
 
  76                            help='Export only children of this OSM node')
 
  77         group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
 
  79                            help='Export only children of this OSM way')
 
  80         group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
 
  82                            help='Export only children of this OSM relation')
 
  84     def run(self, args: NominatimArgs) -> int:
 
  85         return asyncio.run(export(args))
 
  88 async def export(args: NominatimArgs) -> int:
 
  89     """ The actual export as a asynchronous function.
 
  92     api = napi.NominatimAPIAsync(args.project_dir)
 
  95         output_range = RANK_RANGE_MAP[args.output_type]
 
  97         writer = init_csv_writer(args.output_format)
 
  99         async with api.begin() as conn, api.begin() as detail_conn:
 
 102             sql = sa.select(t.c.place_id, t.c.parent_place_id,
 
 103                             t.c.osm_type, t.c.osm_id, t.c.name,
 
 104                             t.c.class_, t.c.type, t.c.admin_level,
 
 105                             t.c.address, t.c.extratags,
 
 106                             t.c.housenumber, t.c.postcode, t.c.country_code,
 
 107                             t.c.importance, t.c.wikipedia, t.c.indexed_date,
 
 108                             t.c.rank_address, t.c.rank_search,
 
 110                     .where(t.c.linked_place_id == None)\
 
 111                     .where(t.c.rank_address.between(*output_range))
 
 113             parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
 
 115                 taddr = conn.t.addressline
 
 117                 sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
 
 118                          .where(taddr.c.address_place_id == parent_place_id)\
 
 119                          .where(taddr.c.isaddress)
 
 121             if args.restrict_to_country:
 
 122                 sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
 
 125             for row in await conn.execute(sql):
 
 126                 result = create_from_placex_row(row, ReverseResult)
 
 127                 if result is not None:
 
 128                     results.append(result)
 
 130                 if len(results) == 1000:
 
 131                     await dump_results(detail_conn, results, writer, args.language)
 
 135                 await dump_results(detail_conn, results, writer, args.language)
 
 142 def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
 
 143     fields = output_format.split(';')
 
 144     writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
 
 150 async def dump_results(conn: napi.SearchConnection,
 
 151                        results: List[ReverseResult],
 
 152                        writer: 'csv.DictWriter[str]',
 
 153                        lang: Optional[str]) -> None:
 
 154     await add_result_details(conn, results,
 
 155                              LookupDetails(address_details=True))
 
 157     locale = napi.Locales([lang] if lang else None)
 
 158     locale.localize_results(results)
 
 160     for result in results:
 
 161         data = {'placeid': result.place_id,
 
 162                 'postcode': result.postcode}
 
 164         for line in (result.address_rows or []):
 
 165             if line.isaddress and line.local_name:
 
 166                 if line.category[1] == 'postcode':
 
 167                     data['postcode'] = line.local_name
 
 168                 elif line.rank_address in RANK_TO_OUTPUT_MAP:
 
 169                     data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
 
 171         writer.writerow(data)
 
 174 async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
 
 175                         way_id: Optional[int],
 
 176                         relation_id: Optional[int]) -> Optional[int]:
 
 177     """ Get the place ID for the given OSM object.
 
 179     if node_id is not None:
 
 180         osm_type, osm_id = 'N', node_id
 
 181     elif way_id is not None:
 
 182         osm_type, osm_id = 'W', way_id
 
 183     elif relation_id is not None:
 
 184         osm_type, osm_id = 'R', relation_id
 
 189     sql = sa.select(t.c.place_id).limit(1)\
 
 190             .where(t.c.osm_type == osm_type)\
 
 191             .where(t.c.osm_id == osm_id)\
 
 192             .where(t.c.rank_address > 0)\
 
 193             .order_by(t.c.rank_address)
 
 195     for result in await conn.execute(sql):
 
 196         return cast(int, result[0])
 
 198     raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')