]> git.openstreetmap.org Git - nominatim.git/commitdiff
run final index creation in parallel
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 14 Sep 2022 13:37:39 +0000 (15:37 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 19 Sep 2022 09:55:25 +0000 (11:55 +0200)
lib-sql/indices.sql
nominatim/clicmd/setup.py
nominatim/tools/database_import.py

index 9bbc7527fdf65b89f667a228c9b950002970c922..4de0137f0b5dbaa998dad06530d5bab03d7ec0ff 100644 (file)
 
 CREATE INDEX IF NOT EXISTS idx_place_addressline_address_place_id
   ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
-
+---
 CREATE INDEX IF NOT EXISTS idx_placex_rank_search
   ON placex USING BTREE (rank_search) {{db.tablespace.search_index}};
-
+---
 CREATE INDEX IF NOT EXISTS idx_placex_rank_address
   ON placex USING BTREE (rank_address) {{db.tablespace.search_index}};
-
+---
 CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id
   ON placex USING BTREE (parent_place_id) {{db.tablespace.search_index}}
   WHERE parent_place_id IS NOT NULL;
-
+---
 CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
   ON placex USING gist (geometry) {{db.tablespace.search_index}}
   WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon')
     AND rank_address between 4 and 25 AND type != 'postcode'
     AND name is not null AND indexed_status = 0 AND linked_place_id is null;
-
+---
 CREATE INDEX IF NOT EXISTS idx_osmline_parent_place_id
   ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}}
   WHERE parent_place_id is not null;
-
+---
 CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id
   ON location_property_osmline USING BTREE (osm_id) {{db.tablespace.search_index}};
-
+---
 CREATE INDEX IF NOT EXISTS idx_postcode_postcode
   ON location_postcode USING BTREE (postcode) {{db.tablespace.search_index}};
-
 -- Indices only needed for updating.
 
 {% if not drop %}
+---
   CREATE INDEX IF NOT EXISTS idx_placex_pendingsector
     ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}}
     WHERE indexed_status > 0;
-
+---
   CREATE INDEX IF NOT EXISTS idx_location_area_country_place_id
     ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}};
-
+---
   CREATE UNIQUE INDEX IF NOT EXISTS idx_place_osm_unique
     ON place USING btree(osm_id, osm_type, class, type) {{db.tablespace.address_index}};
 {% endif %}
 
 -- Indices only needed for search.
-
 {% if 'search_name' in db.tables %}
+---
   CREATE INDEX IF NOT EXISTS idx_search_name_nameaddress_vector
     ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
+---
   CREATE INDEX IF NOT EXISTS idx_search_name_name_vector
     ON search_name USING GIN (name_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
+---
   CREATE INDEX IF NOT EXISTS idx_search_name_centroid
     ON search_name USING GIST (centroid) {{db.tablespace.search_index}};
 
   {% if postgres.has_index_non_key_column %}
+---
     CREATE INDEX IF NOT EXISTS idx_placex_housenumber
       ON placex USING btree (parent_place_id)
       INCLUDE (housenumber) {{db.tablespace.search_index}}
       WHERE housenumber is not null;
+---
     CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr
       ON location_property_osmline USING btree(parent_place_id)
       INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}}
index 6ffa7afb8babb91c58d7f40c0af16cfbb32f110d..b4dde6fe862939c804fae590c62486a0b85cc362 100644 (file)
@@ -72,6 +72,8 @@ class SetupAll:
         from ..tools import database_import, refresh, postcodes, freeze
         from ..indexer.indexer import Indexer
 
+        num_threads = args.threads or psutil.cpu_count() or 1
+
         country_info.setup_country_config(args.config)
 
         if args.continue_at is None:
@@ -109,8 +111,7 @@ class SetupAll:
                 database_import.truncate_data_tables(conn)
 
             LOG.warning('Load data into placex table')
-            database_import.load_data(args.config.get_libpq_dsn(),
-                                      args.threads or psutil.cpu_count() or 1)
+            database_import.load_data(args.config.get_libpq_dsn(), num_threads)
 
         LOG.warning("Setting up tokenizer")
         tokenizer = self._get_tokenizer(args.continue_at, args.config)
@@ -125,14 +126,14 @@ class SetupAll:
                 with connect(args.config.get_libpq_dsn()) as conn:
                     self._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
             LOG.warning('Indexing places')
-            indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
-                              args.threads or psutil.cpu_count() or 1)
+            indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
             indexer.index_full(analyse=not args.index_noanalyse)
 
         LOG.warning('Post-process tables')
         with connect(args.config.get_libpq_dsn()) as conn:
             database_import.create_search_indices(conn, args.config,
-                                                  drop=args.no_updates)
+                                                  drop=args.no_updates,
+                                                  threads=num_threads)
             LOG.warning('Create search index for default country names.')
             country_info.create_country_names(conn, tokenizer,
                                               args.config.get_str_list('LANGUAGES'))
index 447e90f1d5c7d547ced545a1fcad6a4958ceb317..f6ebe90d0983588ccc101988f5085d6692f405a0 100644 (file)
@@ -225,7 +225,8 @@ def load_data(dsn: str, threads: int) -> None:
             cur.execute('ANALYSE')
 
 
-def create_search_indices(conn: Connection, config: Configuration, drop: bool = False) -> None:
+def create_search_indices(conn: Connection, config: Configuration,
+                          drop: bool = False, threads: int = 1) -> None:
     """ Create tables that have explicit partitioning.
     """
 
@@ -243,4 +244,5 @@ def create_search_indices(conn: Connection, config: Configuration, drop: bool =
 
     sql = SQLPreprocessor(conn, config)
 
-    sql.run_sql_file(conn, 'indices.sql', drop=drop)
+    sql.run_parallel_sql_file(config.get_libpq_dsn(),
+                              'indices.sql', min(8, threads), drop=drop)