]> git.openstreetmap.org Git - nominatim.git/commitdiff
port index creation to python
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 4 Mar 2021 09:55:24 +0000 (10:55 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 4 Mar 2021 10:11:47 +0000 (11:11 +0100)
Also switches to jinja-based preprocessing, which allows to
simplify the SQL files. Use 'if not exists' where possible
so that the step can be rerun to fix missing indexes.

12 files changed:
lib-php/admin/setup.php
lib-php/setup/SetupClass.php
lib-sql/indices.sql [new file with mode: 0644]
lib-sql/indices.src.sql [deleted file]
lib-sql/indices_search.src.sql [deleted file]
lib-sql/indices_updates.src.sql [deleted file]
lib-sql/tables.sql
nominatim/clicmd/setup.py
nominatim/clicmd/transition.py
nominatim/db/sql_preprocessor.py
nominatim/tools/database_import.py
test/python/test_cli.py

index 0888fe64791edbf2971461e95e708afc9912af3a..f81c0ca89e915925b1b1e0da30c9647771fec5e8 100644 (file)
@@ -123,7 +123,7 @@ if ($aCMDResult['import-data'] || $aCMDResult['all']) {
 
 if ($aCMDResult['create-functions'] || $aCMDResult['all']) {
     $bDidSomething = true;
-    $oSetup->createFunctions();
+    $oSetup->createSqlFunctions();
 }
 
 if ($aCMDResult['create-tables'] || $aCMDResult['all']) {
@@ -144,7 +144,7 @@ if ($aCMDResult['create-partition-tables'] || $aCMDResult['all']) {
 
 if ($aCMDResult['create-partition-functions'] || $aCMDResult['all']) {
     $bDidSomething = true;
-    $oSetup->createFunctions(); // also create partition functions
+    $oSetup->createSqlFunctions(); // also create partition functions
 }
 
 if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) {
@@ -186,7 +186,14 @@ if ($aCMDResult['drop']) {
 
 if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) {
     $bDidSomething = true;
-    $oSetup->createSearchIndices();
+
+    $oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-search-indices');
+
+    if ($aCMDResult['drop'] ?? false) {
+        $oCmd->addParams('--drop');
+    }
+
+    run($oCmd);
 }
 
 if ($aCMDResult['create-country-names'] || $aCMDResult['all']) {
index b4875ebfa23d9d7112783d6982aecd7f41226fa2..4b6439a95c9142b0ce0724627cfb243cc3e8e210 100755 (executable)
@@ -67,16 +67,6 @@ class SetupFunctions
         }
     }
 
-    public function createFunctions()
-    {
-        info('Create Functions');
-
-        // Try accessing the C module, so we know early if something is wrong
-        $this->checkModulePresence(); // raises exception on failure
-
-        $this->createSqlFunctions();
-    }
-
     public function importTigerData($sTigerPath)
     {
         info('Import Tiger data');
@@ -210,31 +200,6 @@ class SetupFunctions
         $this->db()->exec($sSQL);
     }
 
-    public function createSearchIndices()
-    {
-        info('Create Search indices');
-
-        $sSQL = 'SELECT relname FROM pg_class, pg_index ';
-        $sSQL .= 'WHERE pg_index.indisvalid = false AND pg_index.indexrelid = pg_class.oid';
-        $aInvalidIndices = $this->db()->getCol($sSQL);
-
-        foreach ($aInvalidIndices as $sIndexName) {
-            info("Cleaning up invalid index $sIndexName");
-            $this->db()->exec("DROP INDEX $sIndexName;");
-        }
-
-        $sTemplate = file_get_contents(CONST_SqlDir.'/indices.src.sql');
-        if (!$this->bDrop) {
-            $sTemplate .= file_get_contents(CONST_SqlDir.'/indices_updates.src.sql');
-        }
-        if (!$this->dbReverseOnly()) {
-            $sTemplate .= file_get_contents(CONST_SqlDir.'/indices_search.src.sql');
-        }
-        $sTemplate = $this->replaceSqlPatterns($sTemplate);
-
-        $this->pgsqlRunScript($sTemplate);
-    }
-
     public function createCountryNames()
     {
         info('Create search index for default country names');
@@ -290,7 +255,7 @@ class SetupFunctions
         );
     }
 
-    private function createSqlFunctions()
+    public function createSqlFunctions()
     {
         $oCmd = (clone($this->oNominatimCmd))
                 ->addParams('refresh', '--functions');
@@ -390,44 +355,4 @@ class SetupFunctions
 
         return $sSql;
     }
-
-    /**
-     * Drop table with the given name if it exists.
-     *
-     * @param string $sName Name of table to remove.
-     *
-     * @return null
-     */
-    private function dropTable($sName)
-    {
-        if ($this->bVerbose) echo "Dropping table $sName\n";
-        $this->db()->deleteTable($sName);
-    }
-
-    /**
-     * Check if the database is in reverse-only mode.
-     *
-     * @return True if there is no search_name table and infrastructure.
-     */
-    private function dbReverseOnly()
-    {
-        return !($this->db()->tableExists('search_name'));
-    }
-
-    /**
-     * Try accessing the C module, so we know early if something is wrong.
-     *
-     * Raises Nominatim\DatabaseError on failure
-     */
-    private function checkModulePresence()
-    {
-        $sModulePath = getSetting('DATABASE_MODULE_PATH', CONST_InstallDir.'/module');
-        $sSQL = "CREATE FUNCTION nominatim_test_import_func(text) RETURNS text AS '";
-        $sSQL .= $sModulePath . "/nominatim.so', 'transliteration' LANGUAGE c IMMUTABLE STRICT";
-        $sSQL .= ';DROP FUNCTION nominatim_test_import_func(text);';
-
-        $oDB = new \Nominatim\DB();
-        $oDB->connect();
-        $oDB->exec($sSQL, null, 'Database server failed to load '.$sModulePath.'/nominatim.so module');
-    }
 }
diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql
new file mode 100644 (file)
index 0000000..cb77e02
--- /dev/null
@@ -0,0 +1,67 @@
+-- Indices used only during search and update.
+-- These indices are created only after the indexing process is done.
+
+CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id
+  ON word USING BTREE (word_id) {{db.tablespace.search_index}};
+
+CREATE INDEX {{sql.if_index_not_exists}} idx_place_addressline_address_place_id
+  ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
+
+CREATE INDEX {{sql.if_index_not_exists}} idx_placex_rank_search
+  ON placex USING BTREE (rank_search) {{db.tablespace.search_index}};
+
+CREATE INDEX {{sql.if_index_not_exists}} idx_placex_rank_address
+  ON placex USING BTREE (rank_address) {{db.tablespace.search_index}};
+
+CREATE INDEX {{sql.if_index_not_exists}} idx_placex_parent_place_id
+  ON placex USING BTREE (parent_place_id) {{db.tablespace.search_index}}
+  WHERE parent_place_id IS NOT NULL;
+
+CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_lookupPolygon
+  ON placex USING gist (geometry) {{db.tablespace.search_index}}
+  WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon')
+    AND rank_address between 4 and 25 AND type != 'postcode'
+    AND name is not null AND indexed_status = 0 AND linked_place_id is null;
+
+CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_placeNode
+  ON placex USING gist (geometry) {{db.tablespace.search_index}}
+  WHERE osm_type = 'N' AND rank_search between 5 and 25
+    AND class = 'place' AND type != 'postcode'
+    AND name is not null AND indexed_status = 0 AND linked_place_id is null;
+
+CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_place_id
+  ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}};
+
+CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_osm_id
+  ON location_property_osmline USING BTREE (osm_id) {{db.tablespace.search_index}};
+
+CREATE UNIQUE INDEX {{sql.if_index_not_exists}} idx_postcode_id
+  ON location_postcode USING BTREE (place_id) {{db.tablespace.search_index}};
+
+CREATE INDEX {{sql.if_index_not_exists}} idx_postcode_postcode
+  ON location_postcode USING BTREE (postcode) {{db.tablespace.search_index}};
+
+-- Indices only needed for updating.
+
+{% if not drop %}
+  CREATE INDEX {{sql.if_index_not_exists}} idx_placex_pendingsector
+    ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}}
+    WHERE indexed_status > 0;
+
+  CREATE INDEX {{sql.if_index_not_exists}} idx_location_area_country_place_id
+    ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}};
+
+  CREATE UNIQUE INDEX {{sql.if_index_not_exists}} idx_place_osm_unique
+    ON place USING btree(osm_id, osm_type, class, type) {{db.tablespace.address_index}};
+{% endif %}
+
+-- Indices only needed for search.
+
+{% if 'search_name' in db.tables %}
+  CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_nameaddress_vector
+    ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
+  CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_name_vector
+    ON search_name USING GIN (name_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
+  CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_centroid
+    ON search_name USING GIST (centroid) {{db.tablespace.search_index}};
+{% endif %}
diff --git a/lib-sql/indices.src.sql b/lib-sql/indices.src.sql
deleted file mode 100644 (file)
index 7fcd965..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
--- Indices used only during search and update.
--- These indices are created only after the indexing process is done.
-
-CREATE INDEX idx_word_word_id on word USING BTREE (word_id) {ts:search-index};
-
-CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) {ts:search-index};
-
-DROP INDEX IF EXISTS idx_placex_rank_search;
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) {ts:search-index};
-CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) {ts:search-index};
-CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) {ts:search-index} where parent_place_id IS NOT NULL;
-
-CREATE INDEX idx_placex_geometry_reverse_lookupPolygon
-  ON placex USING gist (geometry) {ts:search-index}
-  WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon')
-    AND rank_address between 4 and 25 AND type != 'postcode'
-    AND name is not null AND indexed_status = 0 AND linked_place_id is null;
-CREATE INDEX idx_placex_geometry_reverse_placeNode
-  ON placex USING gist (geometry) {ts:search-index}
-  WHERE osm_type = 'N' AND rank_search between 5 and 25
-    AND class = 'place' AND type != 'postcode'
-    AND name is not null AND indexed_status = 0 AND linked_place_id is null;
-
-GRANT SELECT ON table country_osm_grid to "{www-user}";
-
-CREATE INDEX idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {ts:search-index};
-CREATE INDEX idx_osmline_parent_osm_id ON location_property_osmline USING BTREE (osm_id) {ts:search-index};
-
-CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id) {ts:search-index};
-CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode) {ts:search-index};
diff --git a/lib-sql/indices_search.src.sql b/lib-sql/indices_search.src.sql
deleted file mode 100644 (file)
index d1363fc..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
--- Indices used for /search API.
--- These indices are created only after the indexing process is done.
-
-CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {ts:search-index};
-CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) {ts:search-index};
-CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) {ts:search-index};
diff --git a/lib-sql/indices_updates.src.sql b/lib-sql/indices_updates.src.sql
deleted file mode 100644 (file)
index 6d4c968..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
--- Indices used only during search and update.
--- These indices are created only after the indexing process is done.
-
-CREATE INDEX CONCURRENTLY idx_placex_pendingsector ON placex USING BTREE (rank_address,geometry_sector) {ts:address-index} where indexed_status > 0;
-
-CREATE INDEX CONCURRENTLY idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) {ts:address-index};
-
-DROP INDEX CONCURRENTLY IF EXISTS place_id_idx;
-CREATE UNIQUE INDEX CONCURRENTLY idx_place_osm_unique on place using btree(osm_id,osm_type,class,type) {ts:address-index};
index 53362ce468f39dd9090bc532f8e0a5ee93958fea..0895c6dd3b83b812fb0575384c2ea85ff5f47f4e 100644 (file)
@@ -270,3 +270,5 @@ ALTER TABLE ONLY wikipedia_redirect ADD CONSTRAINT wikipedia_redirect_pkey PRIMA
 -- osm2pgsql does not create indexes on the middle tables for Nominatim
 -- Add one for lookup of associated street relations.
 CREATE INDEX planet_osm_rels_parts_associated_idx ON planet_osm_rels USING gin(parts) WHERE tags @> ARRAY['associatedStreet'];
+
+GRANT SELECT ON table country_osm_grid to "{{config.DATABASE_WEBUSER}}";
index 662bc8ceb3f462627be88759a884f3ebc41919d3..71980739756c2e0f3a885689e3ad547e6ae1e6e7 100644 (file)
@@ -126,10 +126,12 @@ class SetupAll:
             indexer.index_full(analyse=not args.index_noanalyse)
 
         LOG.warning('Post-process tables')
-        params = ['setup.php', '--create-search-indices', '--create-country-names']
-        if args.no_updates:
-            params.append('--drop')
-        run_legacy_script(*params, nominatim_env=args, throw_on_fail=not args.ignore_errors)
+        with connect(args.config.get_libpq_dsn()) as conn:
+            database_import.create_search_indices(conn, args.config,
+                                                  args.sqllib_dir,
+                                                  drop=args.no_updates)
+        run_legacy_script('setup.php', '--create-country-names',
+                          nominatim_env=args, throw_on_fail=not args.ignore_errors)
 
         webdir = args.project_dir / 'website'
         LOG.warning('Setup website at %s', webdir)
index e8e40646b8a87688935b36aae1b3f077c3b42933..b8db1a38ee745fd2062f2dd63aa49d21707e3710 100644 (file)
@@ -41,6 +41,8 @@ class AdminTransition:
                            help='Create required partition tables')
         group.add_argument('--index', action='store_true',
                            help='Index the data')
+        group.add_argument('--create-search-indices', action='store_true',
+                           help='Create additional indices required for search and update')
         group = parser.add_argument_group('Options')
         group.add_argument('--no-partitions', action='store_true',
                            help='Do not partition search indices')
@@ -120,3 +122,8 @@ class AdminTransition:
             from ..indexer.indexer import Indexer
             indexer = Indexer(args.config.get_libpq_dsn(), args.threads or 1)
             indexer.index_full()
+
+        if args.create_search_indices:
+            LOG.warning('Create Search indices')
+            with connect(args.config.get_libpq_dsn()) as conn:
+                database_import.create_search_indices(conn, args.config, args.sqllib_dir, args.drop)
index 6f83ff3262ae41ae2fddda93dc01b722cd92721d..852447525ab26fcb0dc80edf69bfc5096b9c2368 100644 (file)
@@ -25,6 +25,36 @@ def _get_tables(conn):
 
         return set((row[0] for row in list(cur)))
 
+
+def _setup_tablespace_sql(config):
+    """ Returns a dict with tablespace expressions for the different tablespace
+        kinds depending on whether a tablespace is configured or not.
+    """
+    out = {}
+    for subset in ('ADDRESS', 'SEARCH', 'AUX'):
+        for kind in ('DATA', 'INDEX'):
+            tspace = getattr(config, 'TABLESPACE_{}_{}'.format(subset, kind))
+            if tspace:
+                tspace = 'TABLESPACE "{}"'.format(tspace)
+            out['{}_{}'.format(subset.lower, kind.lower())] = tspace
+
+    return out
+
+
+def _setup_postgres_sql(conn):
+    """ Set up a dictionary with various Postgresql/Postgis SQL terms which
+        are dependent on the database version in use.
+    """
+    out = {}
+    pg_version = conn.server_version_tuple()
+    # CREATE INDEX IF NOT EXISTS was introduced in PG9.5.
+    # Note that you need to ignore failures on older versions when
+    # unsing this construct.
+    out['if_index_not_exists'] = ' IF NOT EXISTS ' if pg_version >= (9, 5, 0) else ''
+
+    return out
+
+
 class SQLPreprocessor: # pylint: disable=too-few-public-methods
     """ A environment for preprocessing SQL files from the
         lib-sql directory.
@@ -44,17 +74,11 @@ class SQLPreprocessor: # pylint: disable=too-few-public-methods
         db_info['partitions'] = _get_partitions(conn)
         db_info['tables'] = _get_tables(conn)
         db_info['reverse_only'] = 'search_name' not in db_info['tables']
-
-        db_info['tablespace'] = {}
-        for subset in ('ADDRESS', 'SEARCH', 'AUX'):
-            for kind in ('DATA', 'INDEX'):
-                tspace = getattr(config, 'TABLESPACE_{}_{}'.format(subset, kind))
-                if tspace:
-                    tspace = 'TABLESPACE "{}"'.format(tspace)
-                db_info['tablespace']['{}_{}'.format(subset.lower, kind.lower())] = tspace
+        db_info['tablespace'] = _setup_tablespace_sql(config)
 
         self.env.globals['config'] = config
         self.env.globals['db'] = db_info
+        self.env.globals['sql'] = _setup_postgres_sql(conn)
         self.env.globals['modulepath'] = config.DATABASE_MODULE_PATH or \
                                          str((config.project_dir / 'module').resolve())
 
index 069157000568fea2ecc59452f86c605ab9459041..017c74b6186781e6ba49970d3bcfd9ffd65e30ae 100644 (file)
@@ -199,7 +199,7 @@ def create_table_triggers(conn, config, sqllib_dir):
 
 
 def create_partition_tables(conn, config, sqllib_dir):
-    """ Create tables that have explicit partioning.
+    """ Create tables that have explicit partitioning.
     """
     sql = SQLPreprocessor(conn, config, sqllib_dir)
     sql.run_sql_file(conn, 'partition-tables.src.sql')
@@ -285,3 +285,24 @@ def load_data(dsn, data_dir, threads):
     with connect(dsn) as conn:
         with conn.cursor() as cur:
             cur.execute('ANALYSE')
+
+
+def create_search_indices(conn, config, sqllib_dir, drop=False):
+    """ Create tables that have explicit partitioning.
+    """
+
+    # If index creation failed and left an index invalid, they need to be
+    # cleaned out first, so that the script recreates them.
+    with conn.cursor() as cur:
+        cur.execute("""SELECT relname FROM pg_class, pg_index
+                       WHERE pg_index.indisvalid = false
+                             AND pg_index.indexrelid = pg_class.oid""")
+        bad_indices = [row[0] for row in list(cur)]
+        for idx in bad_indices:
+            LOG.info("Drop invalid index %s.", idx)
+            cur.execute('DROP INDEX "{}"'.format(idx))
+    conn.commit()
+
+    sql = SQLPreprocessor(conn, config, sqllib_dir)
+
+    sql.run_sql_file(conn, 'indices.sql', drop=drop)
index e94e114fef7a927358346ed2b7c75e650bbface8..418f4bcfdeb7b1f4ded6a1b8c474e6c59c45d4d5 100644 (file)
@@ -95,6 +95,7 @@ def test_import_full(temp_db, mock_func_factory):
         mock_func_factory(nominatim.tools.database_import, 'create_tables'),
         mock_func_factory(nominatim.tools.database_import, 'create_table_triggers'),
         mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'),
+        mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
         mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'),
         mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
         mock_func_factory(nominatim.tools.refresh, 'setup_website'),