From f4d3ae6f70e5f0e86475d75c79dd3cbe1c07e103 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 21 Sep 2022 10:38:58 +0200 Subject: [PATCH] consolidate indexes over geometry_sectors The index over geometry_sectors are mainly used for ordering the places which need indexing. That means they function effectively as a TODO list. Consolodate them so that they always only contain the places which are still to do. Also add the appropriate index for the boundary indexing phase. --- lib-sql/indices.sql | 11 +++++++---- lib-sql/tables.sql | 13 ++++++++++++- nominatim/clicmd/setup.py | 26 +------------------------- nominatim/tools/check_database.py | 9 +++++---- test/python/cli/test_cmd_import.py | 3 --- 5 files changed, 25 insertions(+), 37 deletions(-) diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql index 4de0137f..916d0117 100644 --- a/lib-sql/indices.sql +++ b/lib-sql/indices.sql @@ -5,6 +5,13 @@ -- Copyright (C) 2022 by the Nominatim developer community. -- For a full list of authors see the git log. +-- The following indicies are only useful during imoprt when all of placex is processed. + +{% if drop %} + DROP INDEX IF EXISTS idx_placex_rank_address_sector; + DROP INDEX IF EXISTS idx_placex_rank_boundaries_sector; +{% endif %} + -- Indices used only during search and update. -- These indices are created only after the indexing process is done. @@ -39,10 +46,6 @@ CREATE INDEX IF NOT EXISTS idx_postcode_postcode -- Indices only needed for updating. {% if not drop %} ---- - CREATE INDEX IF NOT EXISTS idx_placex_pendingsector - ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}} - WHERE indexed_status > 0; --- CREATE INDEX IF NOT EXISTS idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}}; diff --git a/lib-sql/tables.sql b/lib-sql/tables.sql index 59b1ad5d..0958938a 100644 --- a/lib-sql/tables.sql +++ b/lib-sql/tables.sql @@ -162,7 +162,6 @@ CREATE INDEX idx_placex_node_osmid ON placex USING BTREE (osm_id) {{db.tablespac CREATE INDEX idx_placex_way_osmid ON placex USING BTREE (osm_id) {{db.tablespace.search_index}} WHERE osm_type = 'W'; CREATE INDEX idx_placex_relation_osmid ON placex USING BTREE (osm_id) {{db.tablespace.search_index}} WHERE osm_type = 'R'; CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL; -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}; CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}}; CREATE INDEX idx_placex_geometry_address_area_candidates ON placex USING gist (geometry) {{db.tablespace.address_index}} @@ -178,6 +177,18 @@ CREATE INDEX idx_placex_geometry_placenode ON placex and class = 'place' and type != 'postcode' and linked_place_id is null; CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26; +-- The following two indexes function as a todo list for indexing. + +CREATE INDEX idx_placex_rank_address_sector ON placex + USING BTREE (rank_address, geometry_sector) {{db.tablespace.address_index}} + WHERE indexed_status > 0; + +CREATE INDEX idx_placex_rank_boundaries_sector ON placex + USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}} + WHERE class = 'boundary' and type = 'administrative' + and indexed_status > 0; + + DROP SEQUENCE IF EXISTS seq_place; CREATE SEQUENCE seq_place start 1; GRANT SELECT on placex to "{{config.DATABASE_WEBUSER}}" ; diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index b4dde6fe..29724433 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -15,7 +15,7 @@ from pathlib import Path import psutil from nominatim.config import Configuration -from nominatim.db.connection import connect, Connection +from nominatim.db.connection import connect from nominatim.db import status, properties from nominatim.tokenizer.base import AbstractTokenizer from nominatim.version import version_str @@ -122,9 +122,6 @@ class SetupAll: args.project_dir, tokenizer) if args.continue_at is None or args.continue_at in ('load-data', 'indexing'): - if args.continue_at is not None and args.continue_at != 'load-data': - with connect(args.config.get_libpq_dsn()) as conn: - self._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX) LOG.warning('Indexing places') indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads) indexer.index_full(analyse=not args.index_noanalyse) @@ -189,27 +186,6 @@ class SetupAll: return tokenizer_factory.get_tokenizer_for_db(config) - def _create_pending_index(self, conn: Connection, tablespace: str) -> None: - """ Add a supporting index for finding places still to be indexed. - - This index is normally created at the end of the import process - for later updates. When indexing was partially done, then this - index can greatly improve speed going through already indexed data. - """ - if conn.index_exists('idx_placex_pendingsector'): - return - - with conn.cursor() as cur: - LOG.warning('Creating support index') - if tablespace: - tablespace = 'TABLESPACE ' + tablespace - cur.execute(f"""CREATE INDEX idx_placex_pendingsector - ON placex USING BTREE (rank_address,geometry_sector) - {tablespace} WHERE indexed_status > 0 - """) - conn.commit() - - def _finalize_database(self, dsn: str, offline: bool) -> None: """ Determine the database date and set the status accordingly. """ diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py index 80358f20..437775db 100644 --- a/nominatim/tools/check_database.py +++ b/nominatim/tools/check_database.py @@ -114,9 +114,10 @@ def _get_indexes(conn: Connection) -> List[str]: indexes.extend(('idx_placex_housenumber', 'idx_osmline_parent_osm_id_with_hnr')) if conn.table_exists('place'): - indexes.extend(('idx_placex_pendingsector', - 'idx_location_area_country_place_id', - 'idx_place_osm_unique')) + indexes.extend(('idx_location_area_country_place_id', + 'idx_place_osm_unique', + 'idx_placex_rank_address_sector', + 'idx_placex_rank_boundaries_sector')) return indexes @@ -199,7 +200,7 @@ def check_tokenizer(_: Connection, config: Configuration) -> CheckResult: def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult: """ Checking for wikipedia/wikidata data """ - if not conn.table_exists('search_name'): + if not conn.table_exists('search_name') or not conn.table_exists('place'): return CheckState.NOT_APPLICABLE with conn.cursor() as cur: diff --git a/test/python/cli/test_cmd_import.py b/test/python/cli/test_cmd_import.py index d545c760..737c4e5c 100644 --- a/test/python/cli/test_cmd_import.py +++ b/test/python/cli/test_cmd_import.py @@ -105,11 +105,8 @@ class TestCliImportWithDb: for mock in mocks: assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) - assert temp_db_conn.index_exists('idx_placex_pendingsector') - # Calling it again still works for the index assert self.call_nominatim('import', '--continue', 'indexing') == 0 - assert temp_db_conn.index_exists('idx_placex_pendingsector') def test_import_continue_postprocess(self, mock_func_factory): -- 2.45.1