]> git.openstreetmap.org Git - nominatim.git/commitdiff
convert admin --analyse-indexing to new indexing method
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 7 Jul 2022 09:23:14 +0000 (11:23 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 7 Jul 2022 14:20:08 +0000 (16:20 +0200)
A proper run of indexing requires the place information from the
analyzer. Add the pre-processing of place data, so the right
information is handed into the update function.

nominatim/clicmd/admin.py
nominatim/tools/admin.py
test/python/mocks.py
test/python/tools/test_admin.py

index c020e224c1c851e8f21146b7b7b57a358304da39..1ed0ac9b6eccba56a975daaac57eb519251e8c80 100644 (file)
@@ -10,7 +10,6 @@ Implementation of the 'admin' subcommand.
 import logging
 
 from nominatim.tools.exec_utils import run_legacy_script
-from nominatim.db.connection import connect
 
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
@@ -63,8 +62,7 @@ class AdminFuncs:
         if args.analyse_indexing:
             LOG.warning('Analysing performance of indexing function')
             from ..tools import admin
-            with connect(args.config.get_libpq_dsn()) as conn:
-                admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id)
+            admin.analyse_indexing(args.config, osm_id=args.osm_id, place_id=args.place_id)
             return 0
 
         if args.migrate:
index 1886bf246a45fb451082f3bf8c512a47cba5e50b..1bf217e2d81e77fd2526f3f3b3b98ec03a236a05 100644 (file)
@@ -9,47 +9,75 @@ Functions for database analysis and maintenance.
 """
 import logging
 
+from psycopg2.extras import Json, register_hstore
+
+from nominatim.db.connection import connect
+from nominatim.tokenizer import factory as tokenizer_factory
 from nominatim.errors import UsageError
+from nominatim.data.place_info import PlaceInfo
 
 LOG = logging.getLogger()
 
-def analyse_indexing(conn, osm_id=None, place_id=None):
+def _get_place_info(cursor, osm_id, place_id):
+    sql = """SELECT place_id, extra.*
+             FROM placex, LATERAL placex_indexing_prepare(placex) as extra
+          """
+
+    if osm_id:
+        osm_type = osm_id[0].upper()
+        if osm_type not in 'NWR' or not osm_id[1:].isdigit():
+            LOG.fatal('OSM ID must be of form <N|W|R><id>. Got: %s', osm_id)
+            raise UsageError("OSM ID parameter badly formatted")
+
+        sql += ' WHERE placex.osm_type = %s AND placex.osm_id = %s'
+        values = (osm_type, int(osm_id[1:]))
+    elif place_id is not None:
+        sql += ' WHERE placex.place_id = %s'
+        values = (place_id, )
+    else:
+        LOG.fatal("No OSM object given to index.")
+        raise UsageError("OSM object not found")
+
+    cursor.execute(sql + ' LIMIT 1', values)
+
+    if cursor.rowcount < 1:
+        LOG.fatal("OSM object %s not found in database.", osm_id)
+        raise UsageError("OSM object not found")
+
+    return cursor.fetchone()
+
+
+def analyse_indexing(config, osm_id=None, place_id=None):
     """ Analyse indexing of a single Nominatim object.
     """
-    with conn.cursor() as cur:
-        if osm_id:
-            osm_type = osm_id[0].upper()
-            if osm_type not in 'NWR' or not osm_id[1:].isdigit():
-                LOG.fatal('OSM ID must be of form <N|W|R><id>. Got: %s', osm_id)
-                raise UsageError("OSM ID parameter badly formatted")
-            cur.execute('SELECT place_id FROM placex WHERE osm_type = %s AND osm_id = %s',
-                        (osm_type, osm_id[1:]))
-
-            if cur.rowcount < 1:
-                LOG.fatal("OSM object %s not found in database.", osm_id)
-                raise UsageError("OSM object not found")
-
-            place_id = cur.fetchone()[0]
-
-        if place_id is None:
-            LOG.fatal("No OSM object given to index.")
-            raise UsageError("OSM object not found")
-
-        cur.execute("update placex set indexed_status = 2 where place_id = %s",
-                    (place_id, ))
-
-        cur.execute("""SET auto_explain.log_min_duration = '0';
-                       SET auto_explain.log_analyze = 'true';
-                       SET auto_explain.log_nested_statements = 'true';
-                       LOAD 'auto_explain';
-                       SET client_min_messages = LOG;
-                       SET log_min_messages = FATAL""")
-
-        cur.execute("update placex set indexed_status = 0 where place_id = %s",
-                    (place_id, ))
-
-    # we do not want to keep the results
-    conn.rollback()
-
-    for msg in conn.notices:
-        print(msg)
+    with connect(config.get_libpq_dsn()) as conn:
+        register_hstore(conn)
+        with conn.cursor() as cur:
+            place = _get_place_info(cur, osm_id, place_id)
+
+            cur.execute("update placex set indexed_status = 2 where place_id = %s",
+                        (place['place_id'], ))
+
+            cur.execute("""SET auto_explain.log_min_duration = '0';
+                           SET auto_explain.log_analyze = 'true';
+                           SET auto_explain.log_nested_statements = 'true';
+                           LOAD 'auto_explain';
+                           SET client_min_messages = LOG;
+                           SET log_min_messages = FATAL""")
+
+            tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+
+            with tokenizer.name_analyzer() as analyzer:
+                cur.execute("""UPDATE placex
+                               SET indexed_status = 0, address = %s, token_info = %s,
+                               name = %s, linked_place_id = %s
+                               WHERE place_id = %s""",
+                            (place['address'],
+                             Json(analyzer.process_place(PlaceInfo(place))),
+                             place['name'], place['linked_place_id'], place['place_id']))
+
+        # we do not want to keep the results
+        conn.rollback()
+
+        for msg in conn.notices:
+            print(msg)
index 9c6ef53215dd91c24368a6153385da9011f52445..a2fff67794b482decc1a6883e9858b57425e8a80 100644 (file)
@@ -42,6 +42,7 @@ class MockPlacexTable:
                                admin_level smallint,
                                address hstore,
                                extratags hstore,
+                               token_info jsonb,
                                geometry Geometry(Geometry,4326),
                                wikipedia TEXT,
                                country_code varchar(2),
index e53d35c478469f7dcebba1658aed50c0a37dc6a9..9c010b9d4b77e00bc784b305eb2c1cfceab3b2be 100644 (file)
@@ -11,37 +11,62 @@ import pytest
 
 from nominatim.errors import UsageError
 from nominatim.tools import admin
+from nominatim.tokenizer import factory
 
 @pytest.fixture(autouse=True)
-def create_placex_table(placex_table):
+def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table):
     """ All tests in this module require the placex table to be set up.
     """
+    temp_db_cursor.execute("DROP TYPE IF EXISTS prepare_update_info CASCADE")
+    temp_db_cursor.execute("""CREATE TYPE prepare_update_info AS (
+                             name HSTORE,
+                             address HSTORE,
+                             rank_address SMALLINT,
+                             country_code TEXT,
+                             class TEXT,
+                             type TEXT,
+                             linked_place_id BIGINT
+                           )""")
+    temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
+                                                     OUT result prepare_update_info)
+                           AS $$
+                           BEGIN
+                             result.address := p.address;
+                             result.name := p.name;
+                             result.class := p.class;
+                             result.type := p.type;
+                             result.country_code := p.country_code;
+                             result.rank_address := p.rank_address;
+                           END;
+                           $$ LANGUAGE plpgsql STABLE;
+                        """)
+    factory.create_tokenizer(project_env)
 
 
-def test_analyse_indexing_no_objects(temp_db_conn):
+def test_analyse_indexing_no_objects(project_env):
     with pytest.raises(UsageError):
-        admin.analyse_indexing(temp_db_conn)
+        admin.analyse_indexing(project_env)
 
 
 @pytest.mark.parametrize("oid", ['1234', 'N123a', 'X123'])
-def test_analyse_indexing_bad_osmid(temp_db_conn, oid):
+def test_analyse_indexing_bad_osmid(project_env, oid):
     with pytest.raises(UsageError):
-        admin.analyse_indexing(temp_db_conn, osm_id=oid)
+        admin.analyse_indexing(project_env, osm_id=oid)
 
 
-def test_analyse_indexing_unknown_osmid(temp_db_conn):
+def test_analyse_indexing_unknown_osmid(project_env):
     with pytest.raises(UsageError):
-        admin.analyse_indexing(temp_db_conn, osm_id='W12345674')
+        admin.analyse_indexing(project_env, osm_id='W12345674')
 
 
-def test_analyse_indexing_with_place_id(temp_db_conn, temp_db_cursor):
+def test_analyse_indexing_with_place_id(project_env, temp_db_cursor):
     temp_db_cursor.execute("INSERT INTO placex (place_id) VALUES(12345)")
 
-    admin.analyse_indexing(temp_db_conn, place_id=12345)
+    admin.analyse_indexing(project_env, place_id=12345)
 
 
-def test_analyse_indexing_with_osm_id(temp_db_conn, temp_db_cursor):
+def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
     temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_type, osm_id)
                               VALUES(9988, 'N', 10000)""")
 
-    admin.analyse_indexing(temp_db_conn, osm_id='N10000')
+    admin.analyse_indexing(project_env, osm_id='N10000')