2 Implementation of 'refresh' subcommand.
 
   5 from pathlib import Path
 
   7 from nominatim.db.connection import connect
 
   9 # Do not repeat documentation of subcommand classes.
 
  10 # pylint: disable=C0111
 
  11 # Using non-top-level imports to avoid eventually unused imports.
 
  12 # pylint: disable=E0012,C0415
 
  14 LOG = logging.getLogger()
 
  18     Recompute auxiliary data used by the indexing process.
 
  20     This sub-commands updates various static data and functions in the database.
 
  21     It usually needs to be run after changing various aspects of the
 
  22     configuration. The configuration documentation will mention the exact
 
  23     command to use in such case.
 
  25     Warning: the 'update' command must not be run in parallel with other update
 
  26              commands like 'replication' or 'add-data'.
 
  33         group = parser.add_argument_group('Data arguments')
 
  34         group.add_argument('--postcodes', action='store_true',
 
  35                            help='Update postcode centroid table')
 
  36         group.add_argument('--word-counts', action='store_true',
 
  37                            help='Compute frequency of full-word search terms')
 
  38         group.add_argument('--address-levels', action='store_true',
 
  39                            help='Reimport address level configuration')
 
  40         group.add_argument('--functions', action='store_true',
 
  41                            help='Update the PL/pgSQL functions in the database')
 
  42         group.add_argument('--wiki-data', action='store_true',
 
  43                            help='Update Wikipedia/data importance numbers')
 
  44         group.add_argument('--importance', action='store_true',
 
  45                            help='Recompute place importances (expensive!)')
 
  46         group.add_argument('--website', action='store_true',
 
  47                            help='Refresh the directory that serves the scripts for the web API')
 
  48         group = parser.add_argument_group('Arguments for function refresh')
 
  49         group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
 
  50                            help='Do not enable code for propagating updates')
 
  51         group.add_argument('--enable-debug-statements', action='store_true',
 
  52                            help='Enable debug warning statements in functions')
 
  56         from ..tools import refresh, postcodes
 
  57         from ..indexer.indexer import Indexer
 
  61             if postcodes.can_compute(args.config.get_libpq_dsn()):
 
  62                 LOG.warning("Update postcodes centroid")
 
  63                 tokenizer = self._get_tokenizer(args.config)
 
  64                 postcodes.update_postcodes(args.config.get_libpq_dsn(),
 
  65                                            args.project_dir, tokenizer)
 
  66                 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
 
  68                 indexer.index_postcodes()
 
  70                 LOG.error("The place table doesn't exist. "
 
  71                           "Postcode updates on a frozen database is not possible.")
 
  74             LOG.warning('Recompute word statistics')
 
  75             self._get_tokenizer(args.config).update_statistics()
 
  77         if args.address_levels:
 
  78             LOG.warning('Updating address levels')
 
  79             with connect(args.config.get_libpq_dsn()) as conn:
 
  80                 refresh.load_address_levels_from_config(conn, args.config)
 
  83             LOG.warning('Create functions')
 
  84             with connect(args.config.get_libpq_dsn()) as conn:
 
  85                 refresh.create_functions(conn, args.config,
 
  86                                          args.diffs, args.enable_debug_statements)
 
  87                 self._get_tokenizer(args.config).update_sql_functions(args.config)
 
  90             data_path = Path(args.config.WIKIPEDIA_DATA_PATH
 
  92             LOG.warning('Import wikipdia article importance from %s', data_path)
 
  93             if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
 
  95                 LOG.fatal('FATAL: Wikipedia importance dump file not found')
 
  98         # Attention: importance MUST come after wiki data import.
 
 100             LOG.warning('Update importance values for database')
 
 101             with connect(args.config.get_libpq_dsn()) as conn:
 
 102                 refresh.recompute_importance(conn)
 
 105             webdir = args.project_dir / 'website'
 
 106             LOG.warning('Setting up website directory at %s', webdir)
 
 107             with connect(args.config.get_libpq_dsn()) as conn:
 
 108                 refresh.setup_website(webdir, args.config, conn)
 
 113     def _get_tokenizer(self, config):
 
 114         if self.tokenizer is None:
 
 115             from ..tokenizer import factory as tokenizer_factory
 
 117             self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
 
 119         return self.tokenizer