From d81e152804b81f3d61ee743e436dd14fe924ee58 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 8 Feb 2021 22:21:57 +0100 Subject: [PATCH] integrate analyse of indexing into nominatim tool --- nominatim/clicmd/admin.py | 15 ++++ nominatim/tools/admin.py | 49 +++++++++++++ test/python/conftest.py | 36 ++++++++++ test/python/test_cli.py | 7 ++ test/python/test_tools_admin.py | 42 +++++++++++ utils/analyse_indexing.py | 119 -------------------------------- 6 files changed, 149 insertions(+), 119 deletions(-) create mode 100644 nominatim/tools/admin.py create mode 100644 test/python/test_tools_admin.py delete mode 100755 utils/analyse_indexing.py diff --git a/nominatim/clicmd/admin.py b/nominatim/clicmd/admin.py index 040b9232..8d34f386 100644 --- a/nominatim/clicmd/admin.py +++ b/nominatim/clicmd/admin.py @@ -2,6 +2,7 @@ Implementation of the 'admin' subcommand. """ from ..tools.exec_utils import run_legacy_script +from ..db.connection import connect # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 @@ -20,6 +21,8 @@ class AdminFuncs: help='Warm database caches for search and reverse queries.') group.add_argument('--check-database', action='store_true', help='Check that the database is complete and operational.') + group.add_argument('--analyse-indexing', action='store_true', + help='Print performance analysis of the indexing process.') group = parser.add_argument_group('Arguments for cache warming') group.add_argument('--search-only', action='store_const', dest='target', const='search', @@ -27,15 +30,27 @@ class AdminFuncs: group.add_argument('--reverse-only', action='store_const', dest='target', const='reverse', help="Only pre-warm tables for reverse queries") + group = parser.add_argument_group('Arguments for index anaysis') + mgroup = group.add_mutually_exclusive_group() + mgroup.add_argument('--osm-id', type=str, + help='Analyse indexing of the given OSM object') + mgroup.add_argument('--place-id', type=int, + help='Analyse indexing of the given Nominatim object') @staticmethod def run(args): + from ..tools import admin if args.warm: AdminFuncs._warm(args) if args.check_database: run_legacy_script('check_import_finished.php', nominatim_env=args) + if args.analyse_indexing: + conn = connect(args.config.get_libpq_dsn()) + admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id) + conn.close() + return 0 diff --git a/nominatim/tools/admin.py b/nominatim/tools/admin.py new file mode 100644 index 00000000..119adf37 --- /dev/null +++ b/nominatim/tools/admin.py @@ -0,0 +1,49 @@ +""" +Functions for database analysis and maintenance. +""" +import logging + +from ..errors import UsageError + +LOG = logging.getLogger() + +def analyse_indexing(conn, osm_id=None, place_id=None): + """ Analyse indexing of a single Nominatim object. + """ + with conn.cursor() as cur: + if osm_id: + osm_type = osm_id[0].upper() + if osm_type not in 'NWR' or not osm_id[1:].isdigit(): + LOG.fatal('OSM ID must be of form . Got: %s', osm_id) + raise UsageError("OSM ID parameter badly formatted") + cur.execute('SELECT place_id FROM placex WHERE osm_type = %s AND osm_id = %s', + (osm_type, osm_id[1:])) + + if cur.rowcount < 1: + LOG.fatal("OSM object %s not found in database.", osm_id) + raise UsageError("OSM object not found") + + place_id = cur.fetchone()[0] + + if place_id is None: + LOG.fatal("No OSM object given to index.") + raise UsageError("OSM object not found") + + cur.execute("update placex set indexed_status = 2 where place_id = %s", + (place_id, )) + + cur.execute("""SET auto_explain.log_min_duration = '0'; + SET auto_explain.log_analyze = 'true'; + SET auto_explain.log_nested_statements = 'true'; + LOAD 'auto_explain'; + SET client_min_messages = LOG; + SET log_min_messages = FATAL""") + + cur.execute("update placex set indexed_status = 0 where place_id = %s", + (place_id, )) + + # we do not want to keep the results + conn.rollback() + + for msg in conn.notices: + print(msg) diff --git a/test/python/conftest.py b/test/python/conftest.py index 8b0ba145..ecd40d7c 100644 --- a/test/python/conftest.py +++ b/test/python/conftest.py @@ -153,3 +153,39 @@ def place_row(place_table, temp_db_cursor): geom or 'SRID=4326;POINT(0 0 )')) return _insert + +@pytest.fixture +def placex_table(temp_db_with_extensions, temp_db_conn): + """ Create an empty version of the place table. + """ + with temp_db_conn.cursor() as cur: + cur.execute("""CREATE TABLE placex ( + place_id BIGINT NOT NULL, + parent_place_id BIGINT, + linked_place_id BIGINT, + importance FLOAT, + indexed_date TIMESTAMP, + geometry_sector INTEGER, + rank_address SMALLINT, + rank_search SMALLINT, + partition SMALLINT, + indexed_status SMALLINT, + osm_id int8, + osm_type char(1), + class text, + type text, + name hstore, + admin_level smallint, + address hstore, + extratags hstore, + geometry Geometry(Geometry,4326), + wikipedia TEXT, + country_code varchar(2), + housenumber TEXT, + postcode TEXT, + centroid GEOMETRY(Geometry, 4326)) + """) + temp_db_conn.commit() + + + diff --git a/test/python/test_cli.py b/test/python/test_cli.py index 9b39f580..f62bccf7 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -85,6 +85,13 @@ def test_admin_command_legacy(monkeypatch, params): assert mock_run_legacy.called == 1 +@pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))]) +def test_admin_command_tool(monkeypatch, func, params): + mock = MockParamCapture() + monkeypatch.setattr(nominatim.tools.admin, func, mock) + + assert 0 == call_nominatim('admin', *params) + assert mock.called == 1 @pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc'), ('node', 12), ('way', 8), ('relation', 32)]) diff --git a/test/python/test_tools_admin.py b/test/python/test_tools_admin.py new file mode 100644 index 00000000..a40a17db --- /dev/null +++ b/test/python/test_tools_admin.py @@ -0,0 +1,42 @@ +""" +Tests for maintenance and analysis functions. +""" +import pytest + +from nominatim.db.connection import connect +from nominatim.errors import UsageError +from nominatim.tools import admin + +@pytest.fixture +def db(temp_db, placex_table): + conn = connect('dbname=' + temp_db) + yield conn + conn.close() + +def test_analyse_indexing_no_objects(db): + with pytest.raises(UsageError): + admin.analyse_indexing(db) + + +@pytest.mark.parametrize("oid", ['1234', 'N123a', 'X123']) +def test_analyse_indexing_bad_osmid(db, oid): + with pytest.raises(UsageError): + admin.analyse_indexing(db, osm_id=oid) + + +def test_analyse_indexing_unknown_osmid(db): + with pytest.raises(UsageError): + admin.analyse_indexing(db, osm_id='W12345674') + + +def test_analyse_indexing_with_place_id(db, temp_db_cursor): + temp_db_cursor.execute("INSERT INTO placex (place_id) VALUES(12345)") + + admin.analyse_indexing(db, place_id=12345) + + +def test_analyse_indexing_with_osm_id(db, temp_db_cursor): + temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_type, osm_id) + VALUES(9988, 'N', 10000)""") + + admin.analyse_indexing(db, osm_id='N10000') diff --git a/utils/analyse_indexing.py b/utils/analyse_indexing.py deleted file mode 100755 index 97cb6843..00000000 --- a/utils/analyse_indexing.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-only -# -# This file is part of Nominatim. -# Copyright (C) 2020 Sarah Hoffmann - -""" -Script for analysing the indexing process. - -The script enables detailed logging for nested statements and then -runs the indexing process for teh given object. Detailed 'EXPLAIN ANALYSE' -information is printed for each executed query in the trigger. The -transaction is then rolled back, so that no actual changes to the database -happen. It also disables logging into the system log, so that the -log files are not cluttered. -""" - -from argparse import ArgumentParser, RawDescriptionHelpFormatter, ArgumentTypeError -import psycopg2 -import getpass -import re - -class Analyser(object): - - def __init__(self, options): - password = None - if options.password_prompt: - password = getpass.getpass("Database password: ") - - self.options = options - self.conn = psycopg2.connect(dbname=options.dbname, - user=options.user, - password=password, - host=options.host, - port=options.port) - - - - def run(self): - c = self.conn.cursor() - - if self.options.placeid: - place_id = self.options.placeid - else: - if self.options.rank: - c.execute(f"""select place_id from placex - where rank_address = {self.options.rank} - and linked_place_id is null - limit 1""") - objinfo = f"rank {self.options.rank}" - - if self.options.osmid: - osm_type = self.options.osmid[0].upper() - if osm_type not in ('N', 'W', 'R'): - raise RuntimeError("OSM ID must be of form ") - try: - osm_id = int(self.options.osmid[1:]) - except ValueError: - raise RuntimeError("OSM ID must be of form ") - - c.execute(f"""SELECT place_id FROM placex - WHERE osm_type = '{osm_type}' AND osm_id = {osm_id}""") - objinfo = f"OSM object {self.options.osmid}" - - - if c.rowcount < 1: - raise RuntimeError(f"Cannot find a place for {objinfo}.") - place_id = c.fetchone()[0] - - c.execute(f"""update placex set indexed_status = 2 where - place_id = {place_id}""") - - c.execute("""SET auto_explain.log_min_duration = '0'; - SET auto_explain.log_analyze = 'true'; - SET auto_explain.log_nested_statements = 'true'; - LOAD 'auto_explain'; - SET client_min_messages = LOG; - SET log_min_messages = FATAL"""); - - c.execute(f"""update placex set indexed_status = 0 where - place_id = {place_id}""") - - c.close() # automatic rollback - - for l in self.conn.notices: - print(l) - - -if __name__ == '__main__': - def h(s): - return re.sub("\s\s+" , " ", s) - - p = ArgumentParser(description=__doc__, - formatter_class=RawDescriptionHelpFormatter) - - group = p.add_mutually_exclusive_group(required=True) - group.add_argument('--rank', dest='rank', type=int, - help='Analyse indexing of the given address rank') - group.add_argument('--osm-id', dest='osmid', type=str, - help='Analyse indexing of the given OSM object') - group.add_argument('--place-id', dest='placeid', type=int, - help='Analyse indexing of the given Nominatim object') - p.add_argument('-d', '--database', - dest='dbname', action='store', default='nominatim', - help='Name of the PostgreSQL database to connect to.') - p.add_argument('-U', '--username', - dest='user', action='store', - help='PostgreSQL user name.') - p.add_argument('-W', '--password', - dest='password_prompt', action='store_true', - help='Force password prompt.') - p.add_argument('-H', '--host', - dest='host', action='store', - help='PostgreSQL server hostname or socket location.') - p.add_argument('-P', '--port', - dest='port', action='store', - help='PostgreSQL server port') - - Analyser(p.parse_args()).run() -- 2.45.2