From: Sarah Hoffmann Date: Thu, 4 Mar 2021 09:55:24 +0000 (+0100) Subject: port index creation to python X-Git-Tag: v3.7.0~22^2 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/09f4d767e4498368f798d8c1b579b1bf730f4baf port index creation to python Also switches to jinja-based preprocessing, which allows to simplify the SQL files. Use 'if not exists' where possible so that the step can be rerun to fix missing indexes. --- diff --git a/lib-php/admin/setup.php b/lib-php/admin/setup.php index 0888fe64..f81c0ca8 100644 --- a/lib-php/admin/setup.php +++ b/lib-php/admin/setup.php @@ -123,7 +123,7 @@ if ($aCMDResult['import-data'] || $aCMDResult['all']) { if ($aCMDResult['create-functions'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->createFunctions(); + $oSetup->createSqlFunctions(); } if ($aCMDResult['create-tables'] || $aCMDResult['all']) { @@ -144,7 +144,7 @@ if ($aCMDResult['create-partition-tables'] || $aCMDResult['all']) { if ($aCMDResult['create-partition-functions'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->createFunctions(); // also create partition functions + $oSetup->createSqlFunctions(); // also create partition functions } if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) { @@ -186,7 +186,14 @@ if ($aCMDResult['drop']) { if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->createSearchIndices(); + + $oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-search-indices'); + + if ($aCMDResult['drop'] ?? false) { + $oCmd->addParams('--drop'); + } + + run($oCmd); } if ($aCMDResult['create-country-names'] || $aCMDResult['all']) { diff --git a/lib-php/setup/SetupClass.php b/lib-php/setup/SetupClass.php index b4875ebf..4b6439a9 100755 --- a/lib-php/setup/SetupClass.php +++ b/lib-php/setup/SetupClass.php @@ -67,16 +67,6 @@ class SetupFunctions } } - public function createFunctions() - { - info('Create Functions'); - - // Try accessing the C module, so we know early if something is wrong - $this->checkModulePresence(); // raises exception on failure - - $this->createSqlFunctions(); - } - public function importTigerData($sTigerPath) { info('Import Tiger data'); @@ -210,31 +200,6 @@ class SetupFunctions $this->db()->exec($sSQL); } - public function createSearchIndices() - { - info('Create Search indices'); - - $sSQL = 'SELECT relname FROM pg_class, pg_index '; - $sSQL .= 'WHERE pg_index.indisvalid = false AND pg_index.indexrelid = pg_class.oid'; - $aInvalidIndices = $this->db()->getCol($sSQL); - - foreach ($aInvalidIndices as $sIndexName) { - info("Cleaning up invalid index $sIndexName"); - $this->db()->exec("DROP INDEX $sIndexName;"); - } - - $sTemplate = file_get_contents(CONST_SqlDir.'/indices.src.sql'); - if (!$this->bDrop) { - $sTemplate .= file_get_contents(CONST_SqlDir.'/indices_updates.src.sql'); - } - if (!$this->dbReverseOnly()) { - $sTemplate .= file_get_contents(CONST_SqlDir.'/indices_search.src.sql'); - } - $sTemplate = $this->replaceSqlPatterns($sTemplate); - - $this->pgsqlRunScript($sTemplate); - } - public function createCountryNames() { info('Create search index for default country names'); @@ -290,7 +255,7 @@ class SetupFunctions ); } - private function createSqlFunctions() + public function createSqlFunctions() { $oCmd = (clone($this->oNominatimCmd)) ->addParams('refresh', '--functions'); @@ -390,44 +355,4 @@ class SetupFunctions return $sSql; } - - /** - * Drop table with the given name if it exists. - * - * @param string $sName Name of table to remove. - * - * @return null - */ - private function dropTable($sName) - { - if ($this->bVerbose) echo "Dropping table $sName\n"; - $this->db()->deleteTable($sName); - } - - /** - * Check if the database is in reverse-only mode. - * - * @return True if there is no search_name table and infrastructure. - */ - private function dbReverseOnly() - { - return !($this->db()->tableExists('search_name')); - } - - /** - * Try accessing the C module, so we know early if something is wrong. - * - * Raises Nominatim\DatabaseError on failure - */ - private function checkModulePresence() - { - $sModulePath = getSetting('DATABASE_MODULE_PATH', CONST_InstallDir.'/module'); - $sSQL = "CREATE FUNCTION nominatim_test_import_func(text) RETURNS text AS '"; - $sSQL .= $sModulePath . "/nominatim.so', 'transliteration' LANGUAGE c IMMUTABLE STRICT"; - $sSQL .= ';DROP FUNCTION nominatim_test_import_func(text);'; - - $oDB = new \Nominatim\DB(); - $oDB->connect(); - $oDB->exec($sSQL, null, 'Database server failed to load '.$sModulePath.'/nominatim.so module'); - } } diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql new file mode 100644 index 00000000..cb77e02b --- /dev/null +++ b/lib-sql/indices.sql @@ -0,0 +1,67 @@ +-- Indices used only during search and update. +-- These indices are created only after the indexing process is done. + +CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id + ON word USING BTREE (word_id) {{db.tablespace.search_index}}; + +CREATE INDEX {{sql.if_index_not_exists}} idx_place_addressline_address_place_id + ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}}; + +CREATE INDEX {{sql.if_index_not_exists}} idx_placex_rank_search + ON placex USING BTREE (rank_search) {{db.tablespace.search_index}}; + +CREATE INDEX {{sql.if_index_not_exists}} idx_placex_rank_address + ON placex USING BTREE (rank_address) {{db.tablespace.search_index}}; + +CREATE INDEX {{sql.if_index_not_exists}} idx_placex_parent_place_id + ON placex USING BTREE (parent_place_id) {{db.tablespace.search_index}} + WHERE parent_place_id IS NOT NULL; + +CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_lookupPolygon + ON placex USING gist (geometry) {{db.tablespace.search_index}} + WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon') + AND rank_address between 4 and 25 AND type != 'postcode' + AND name is not null AND indexed_status = 0 AND linked_place_id is null; + +CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_placeNode + ON placex USING gist (geometry) {{db.tablespace.search_index}} + WHERE osm_type = 'N' AND rank_search between 5 and 25 + AND class = 'place' AND type != 'postcode' + AND name is not null AND indexed_status = 0 AND linked_place_id is null; + +CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_place_id + ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}}; + +CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_osm_id + ON location_property_osmline USING BTREE (osm_id) {{db.tablespace.search_index}}; + +CREATE UNIQUE INDEX {{sql.if_index_not_exists}} idx_postcode_id + ON location_postcode USING BTREE (place_id) {{db.tablespace.search_index}}; + +CREATE INDEX {{sql.if_index_not_exists}} idx_postcode_postcode + ON location_postcode USING BTREE (postcode) {{db.tablespace.search_index}}; + +-- Indices only needed for updating. + +{% if not drop %} + CREATE INDEX {{sql.if_index_not_exists}} idx_placex_pendingsector + ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}} + WHERE indexed_status > 0; + + CREATE INDEX {{sql.if_index_not_exists}} idx_location_area_country_place_id + ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}}; + + CREATE UNIQUE INDEX {{sql.if_index_not_exists}} idx_place_osm_unique + ON place USING btree(osm_id, osm_type, class, type) {{db.tablespace.address_index}}; +{% endif %} + +-- Indices only needed for search. + +{% if 'search_name' in db.tables %} + CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_nameaddress_vector + ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {{db.tablespace.search_index}}; + CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_name_vector + ON search_name USING GIN (name_vector) WITH (fastupdate = off) {{db.tablespace.search_index}}; + CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_centroid + ON search_name USING GIST (centroid) {{db.tablespace.search_index}}; +{% endif %} diff --git a/lib-sql/indices.src.sql b/lib-sql/indices.src.sql deleted file mode 100644 index 7fcd965f..00000000 --- a/lib-sql/indices.src.sql +++ /dev/null @@ -1,30 +0,0 @@ --- Indices used only during search and update. --- These indices are created only after the indexing process is done. - -CREATE INDEX idx_word_word_id on word USING BTREE (word_id) {ts:search-index}; - -CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) {ts:search-index}; - -DROP INDEX IF EXISTS idx_placex_rank_search; -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) {ts:search-index}; -CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) {ts:search-index}; -CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) {ts:search-index} where parent_place_id IS NOT NULL; - -CREATE INDEX idx_placex_geometry_reverse_lookupPolygon - ON placex USING gist (geometry) {ts:search-index} - WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon') - AND rank_address between 4 and 25 AND type != 'postcode' - AND name is not null AND indexed_status = 0 AND linked_place_id is null; -CREATE INDEX idx_placex_geometry_reverse_placeNode - ON placex USING gist (geometry) {ts:search-index} - WHERE osm_type = 'N' AND rank_search between 5 and 25 - AND class = 'place' AND type != 'postcode' - AND name is not null AND indexed_status = 0 AND linked_place_id is null; - -GRANT SELECT ON table country_osm_grid to "{www-user}"; - -CREATE INDEX idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {ts:search-index}; -CREATE INDEX idx_osmline_parent_osm_id ON location_property_osmline USING BTREE (osm_id) {ts:search-index}; - -CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id) {ts:search-index}; -CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode) {ts:search-index}; diff --git a/lib-sql/indices_search.src.sql b/lib-sql/indices_search.src.sql deleted file mode 100644 index d1363fc6..00000000 --- a/lib-sql/indices_search.src.sql +++ /dev/null @@ -1,6 +0,0 @@ --- Indices used for /search API. --- These indices are created only after the indexing process is done. - -CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {ts:search-index}; -CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) {ts:search-index}; -CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) {ts:search-index}; diff --git a/lib-sql/indices_updates.src.sql b/lib-sql/indices_updates.src.sql deleted file mode 100644 index 6d4c968e..00000000 --- a/lib-sql/indices_updates.src.sql +++ /dev/null @@ -1,9 +0,0 @@ --- Indices used only during search and update. --- These indices are created only after the indexing process is done. - -CREATE INDEX CONCURRENTLY idx_placex_pendingsector ON placex USING BTREE (rank_address,geometry_sector) {ts:address-index} where indexed_status > 0; - -CREATE INDEX CONCURRENTLY idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) {ts:address-index}; - -DROP INDEX CONCURRENTLY IF EXISTS place_id_idx; -CREATE UNIQUE INDEX CONCURRENTLY idx_place_osm_unique on place using btree(osm_id,osm_type,class,type) {ts:address-index}; diff --git a/lib-sql/tables.sql b/lib-sql/tables.sql index 53362ce4..0895c6dd 100644 --- a/lib-sql/tables.sql +++ b/lib-sql/tables.sql @@ -270,3 +270,5 @@ ALTER TABLE ONLY wikipedia_redirect ADD CONSTRAINT wikipedia_redirect_pkey PRIMA -- osm2pgsql does not create indexes on the middle tables for Nominatim -- Add one for lookup of associated street relations. CREATE INDEX planet_osm_rels_parts_associated_idx ON planet_osm_rels USING gin(parts) WHERE tags @> ARRAY['associatedStreet']; + +GRANT SELECT ON table country_osm_grid to "{{config.DATABASE_WEBUSER}}"; diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index 662bc8ce..71980739 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -126,10 +126,12 @@ class SetupAll: indexer.index_full(analyse=not args.index_noanalyse) LOG.warning('Post-process tables') - params = ['setup.php', '--create-search-indices', '--create-country-names'] - if args.no_updates: - params.append('--drop') - run_legacy_script(*params, nominatim_env=args, throw_on_fail=not args.ignore_errors) + with connect(args.config.get_libpq_dsn()) as conn: + database_import.create_search_indices(conn, args.config, + args.sqllib_dir, + drop=args.no_updates) + run_legacy_script('setup.php', '--create-country-names', + nominatim_env=args, throw_on_fail=not args.ignore_errors) webdir = args.project_dir / 'website' LOG.warning('Setup website at %s', webdir) diff --git a/nominatim/clicmd/transition.py b/nominatim/clicmd/transition.py index e8e40646..b8db1a38 100644 --- a/nominatim/clicmd/transition.py +++ b/nominatim/clicmd/transition.py @@ -41,6 +41,8 @@ class AdminTransition: help='Create required partition tables') group.add_argument('--index', action='store_true', help='Index the data') + group.add_argument('--create-search-indices', action='store_true', + help='Create additional indices required for search and update') group = parser.add_argument_group('Options') group.add_argument('--no-partitions', action='store_true', help='Do not partition search indices') @@ -120,3 +122,8 @@ class AdminTransition: from ..indexer.indexer import Indexer indexer = Indexer(args.config.get_libpq_dsn(), args.threads or 1) indexer.index_full() + + if args.create_search_indices: + LOG.warning('Create Search indices') + with connect(args.config.get_libpq_dsn()) as conn: + database_import.create_search_indices(conn, args.config, args.sqllib_dir, args.drop) diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py index 6f83ff32..85244752 100644 --- a/nominatim/db/sql_preprocessor.py +++ b/nominatim/db/sql_preprocessor.py @@ -25,6 +25,36 @@ def _get_tables(conn): return set((row[0] for row in list(cur))) + +def _setup_tablespace_sql(config): + """ Returns a dict with tablespace expressions for the different tablespace + kinds depending on whether a tablespace is configured or not. + """ + out = {} + for subset in ('ADDRESS', 'SEARCH', 'AUX'): + for kind in ('DATA', 'INDEX'): + tspace = getattr(config, 'TABLESPACE_{}_{}'.format(subset, kind)) + if tspace: + tspace = 'TABLESPACE "{}"'.format(tspace) + out['{}_{}'.format(subset.lower, kind.lower())] = tspace + + return out + + +def _setup_postgres_sql(conn): + """ Set up a dictionary with various Postgresql/Postgis SQL terms which + are dependent on the database version in use. + """ + out = {} + pg_version = conn.server_version_tuple() + # CREATE INDEX IF NOT EXISTS was introduced in PG9.5. + # Note that you need to ignore failures on older versions when + # unsing this construct. + out['if_index_not_exists'] = ' IF NOT EXISTS ' if pg_version >= (9, 5, 0) else '' + + return out + + class SQLPreprocessor: # pylint: disable=too-few-public-methods """ A environment for preprocessing SQL files from the lib-sql directory. @@ -44,17 +74,11 @@ class SQLPreprocessor: # pylint: disable=too-few-public-methods db_info['partitions'] = _get_partitions(conn) db_info['tables'] = _get_tables(conn) db_info['reverse_only'] = 'search_name' not in db_info['tables'] - - db_info['tablespace'] = {} - for subset in ('ADDRESS', 'SEARCH', 'AUX'): - for kind in ('DATA', 'INDEX'): - tspace = getattr(config, 'TABLESPACE_{}_{}'.format(subset, kind)) - if tspace: - tspace = 'TABLESPACE "{}"'.format(tspace) - db_info['tablespace']['{}_{}'.format(subset.lower, kind.lower())] = tspace + db_info['tablespace'] = _setup_tablespace_sql(config) self.env.globals['config'] = config self.env.globals['db'] = db_info + self.env.globals['sql'] = _setup_postgres_sql(conn) self.env.globals['modulepath'] = config.DATABASE_MODULE_PATH or \ str((config.project_dir / 'module').resolve()) diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 06915700..017c74b6 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -199,7 +199,7 @@ def create_table_triggers(conn, config, sqllib_dir): def create_partition_tables(conn, config, sqllib_dir): - """ Create tables that have explicit partioning. + """ Create tables that have explicit partitioning. """ sql = SQLPreprocessor(conn, config, sqllib_dir) sql.run_sql_file(conn, 'partition-tables.src.sql') @@ -285,3 +285,24 @@ def load_data(dsn, data_dir, threads): with connect(dsn) as conn: with conn.cursor() as cur: cur.execute('ANALYSE') + + +def create_search_indices(conn, config, sqllib_dir, drop=False): + """ Create tables that have explicit partitioning. + """ + + # If index creation failed and left an index invalid, they need to be + # cleaned out first, so that the script recreates them. + with conn.cursor() as cur: + cur.execute("""SELECT relname FROM pg_class, pg_index + WHERE pg_index.indisvalid = false + AND pg_index.indexrelid = pg_class.oid""") + bad_indices = [row[0] for row in list(cur)] + for idx in bad_indices: + LOG.info("Drop invalid index %s.", idx) + cur.execute('DROP INDEX "{}"'.format(idx)) + conn.commit() + + sql = SQLPreprocessor(conn, config, sqllib_dir) + + sql.run_sql_file(conn, 'indices.sql', drop=drop) diff --git a/test/python/test_cli.py b/test/python/test_cli.py index e94e114f..418f4bcf 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -95,6 +95,7 @@ def test_import_full(temp_db, mock_func_factory): mock_func_factory(nominatim.tools.database_import, 'create_tables'), mock_func_factory(nominatim.tools.database_import, 'create_table_triggers'), mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'), + mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), mock_func_factory(nominatim.tools.refresh, 'setup_website'),