]> git.openstreetmap.org Git - nominatim.git/commitdiff
implementaion of 'nominatim index'
authorSarah Hoffmann <lonvia@denofr.de>
Sun, 17 Jan 2021 19:05:41 +0000 (20:05 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 18 Jan 2021 14:43:27 +0000 (15:43 +0100)
nominatim/cli.py
nominatim/db/async_connection.py
nominatim/indexer/indexer.py

index 8d4071db973a117f5ba5c501b70dafed6bdd033a..acb6839fa303937847bc54616f226713149ad836 100644 (file)
@@ -11,6 +11,17 @@ from pathlib import Path
 from .config import Configuration
 from .admin.exec_utils import run_legacy_script
 
+from .indexer.indexer import Indexer
+
+def _num_system_cpus():
+    try:
+        cpus = len(os.sched_getaffinity(0))
+    except NotImplementedError:
+        cpus = None
+
+    return cpus or os.cpu_count()
+
+
 class CommandlineParser:
     """ Wraps some of the common functions for parsing the command line
         and setting up subcommands.
@@ -297,11 +308,27 @@ class UpdateIndex:
 
     @staticmethod
     def add_args(parser):
-        pass
+        group = parser.add_argument_group('Filter arguments')
+        group.add_argument('--boundaries-only', action='store_true',
+                           help="""Index only administrative boundaries.""")
+        group.add_argument('--no-boundaries', action='store_true',
+                           help="""Index everything except administrative boundaries.""")
+        group.add_argument('--minrank', '-r', type=int, metavar='RANK', default=0,
+                           help='Minimum/starting rank')
+        group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30,
+                           help='Maximum/finishing rank')
 
     @staticmethod
     def run(args):
-        return run_legacy_script('update.php', '--index', nominatim_env=args)
+        indexer = Indexer(args.config.get_libpq_dsn(),
+                          args.threads or _num_system_cpus() or 1)
+
+        if not args.no_boundaries:
+            indexer.index_boundaries(args.minrank, args.maxrank)
+        if not args.boundaries_only:
+            indexer.index_by_rank(args.minrank, args.maxrank)
+
+        return 0
 
 
 class UpdateRefresh:
index 85b844312ba271a638be8ef084dbe34a9274889d..45e83664663ba835419db49304209b9da6491d35 100644 (file)
@@ -11,26 +11,14 @@ from psycopg2.extras import wait_select
 
 LOG = logging.getLogger()
 
-def make_connection(options, asynchronous=False):
-    """ Create a psycopg2 connection from the given options.
-    """
-    params = {'dbname' : options.dbname,
-              'user' : options.user,
-              'password' : options.password,
-              'host' : options.host,
-              'port' : options.port,
-              'async' : asynchronous}
-
-    return psycopg2.connect(**params)
-
 class DBConnection:
     """ A single non-blocking database connection.
     """
 
-    def __init__(self, options):
+    def __init__(self, dsn):
         self.current_query = None
         self.current_params = None
-        self.options = options
+        self.dsn = dsn
 
         self.conn = None
         self.cursor = None
@@ -46,7 +34,9 @@ class DBConnection:
             self.cursor.close()
             self.conn.close()
 
-        self.conn = make_connection(self.options, asynchronous=True)
+        # Use a dict to hand in the parameters because async is a reserved
+        # word in Python3.
+        self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
         self.wait()
 
         self.cursor = self.conn.cursor()
index 520464565bebc96f04769b336c04604a531bab8f..d86303c4d018a3a91667414e0c36c1b7db40a93f 100644 (file)
@@ -5,8 +5,10 @@ Main work horse for indexing (computing addresses) the database.
 import logging
 import select
 
+import psycopg2
+
 from .progress import ProgressLogger
-from db.async_connection import DBConnection, make_connection
+from ..db.async_connection import DBConnection
 
 LOG = logging.getLogger()
 
@@ -94,34 +96,33 @@ class Indexer:
     """ Main indexing routine.
     """
 
-    def __init__(self, opts):
-        self.minrank = max(1, opts.minrank)
-        self.maxrank = min(30, opts.maxrank)
-        self.conn = make_connection(opts)
-        self.threads = [DBConnection(opts) for _ in range(opts.threads)]
+    def __init__(self, dsn, num_threads):
+        self.conn = psycopg2.connect(dsn)
+        self.threads = [DBConnection(dsn) for _ in range(num_threads)]
 
-    def index_boundaries(self):
+    def index_boundaries(self, minrank, maxrank):
         LOG.warning("Starting indexing boundaries using %s threads",
                     len(self.threads))
 
-        for rank in range(max(self.minrank, 5), min(self.maxrank, 26)):
+        for rank in range(max(minrank, 5), min(maxrank, 26)):
             self.index(BoundaryRunner(rank))
 
-    def index_by_rank(self):
+    def index_by_rank(self, minrank, maxrank):
         """ Run classic indexing by rank.
         """
+        maxrank = min(maxrank, 30)
         LOG.warning("Starting indexing rank (%i to %i) using %i threads",
-                    self.minrank, self.maxrank, len(self.threads))
+                    minrank, maxrank, len(self.threads))
 
-        for rank in range(max(1, self.minrank), self.maxrank):
+        for rank in range(max(1, minrank), maxrank):
             self.index(RankRunner(rank))
 
-        if self.maxrank == 30:
+        if maxrank == 30:
             self.index(RankRunner(0))
             self.index(InterpolationRunner(), 20)
-            self.index(RankRunner(self.maxrank), 20)
+            self.index(RankRunner(30), 20)
         else:
-            self.index(RankRunner(self.maxrank))
+            self.index(RankRunner(maxrank))
 
     def index(self, obj, batch=1):
         """ Index a single rank or table. `obj` describes the SQL to use