]> git.openstreetmap.org Git - nominatim.git/commitdiff
Integrated OSM views into importance computation
authorTareq Al-Ahdal <tareqoalahdal@gmail.com>
Thu, 25 Aug 2022 07:45:18 +0000 (09:45 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Sat, 1 Oct 2022 09:01:49 +0000 (11:01 +0200)
lib-sql/functions/importance.sql
lib-sql/functions/placex_triggers.sql
nominatim/clicmd/refresh.py
nominatim/clicmd/setup.py
nominatim/tools/refresh.py
test/python/cli/test_cmd_import.py
test/python/cli/test_cmd_refresh.py
test/python/tools/test_refresh.py

index ac3aa7f856cbae355f7111fdd12490a003e99a57..30e778d15f6dc37f3019a687f66f0d51e136a6fa 100644 (file)
@@ -98,19 +98,74 @@ $$
 LANGUAGE plpgsql STABLE;
 
 
+CREATE OR REPLACE FUNCTION get_osm_views(centroid GEOMETRY)
+  RETURNS BIGINT
+  AS $$
+DECLARE
+  result BIGINT;
+BEGIN
+  SELECT ST_Value(osm_views.rast, centroid)
+  FROM osm_views
+  WHERE ST_Intersects(ST_ConvexHull(osm_views.rast), centroid) LIMIT 1 INTO result;
+
+  return COALESCE(result, 0);
+END;
+$$
+LANGUAGE plpgsql STABLE;
+
+
+CREATE OR REPLACE FUNCTION normalize_osm_views(views BIGINT)
+  RETURNS FLOAT
+  AS $$
+  DECLARE
+    normalized_osm_views FLOAT;
+    max_views BIGINT;
+  BEGIN
+    IF views > 0 THEN
+      -- Get the highest view count to use it in normalizing the data
+      SELECT max_views_count FROM osm_views_stat INTO max_views;
+      normalized_osm_views := (LOG(views))/(LOG(max_views));
+    ELSE
+      normalized_osm_views := 0.0;
+    END IF;
+
+    RETURN normalized_osm_views;
+  END;
+$$
+LANGUAGE plpgsql;
+
+
 CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
                                               country_code varchar(2),
-                                              osm_type varchar(1), osm_id BIGINT)
+                                              osm_type varchar(1), osm_id BIGINT,
+                                              centroid GEOMETRY)
   RETURNS place_importance
   AS $$
 DECLARE
   match RECORD;
   result place_importance;
+  osm_views_exists BIGINT;
+  views BIGINT;
 BEGIN
+ -- check if osm_views table exists
+ SELECT COUNT(table_name)
+   INTO osm_views_exists
+ FROM information_schema.tables
+ WHERE table_schema LIKE 'public' AND 
+       table_type LIKE 'BASE TABLE' AND
+       table_name = 'osm_views';
+
+  -- add importance by OSM views if osm_views table exists
+  IF osm_views_exists THEN
+    views := get_osm_views(centroid);
+    result.importance := normalize_osm_views(views) * 0.35;
+  END IF;
+
+  -- add importance by wiki data if the place has one
   FOR match IN SELECT * FROM get_wikipedia_match(extratags, country_code)
                WHERE language is not NULL
   LOOP
-    result.importance := match.importance;
+    result.importance := COALESCE(result.importance, 0) + match.importance * 0.65;
     result.wikipedia := match.language || ':' || match.title;
     RETURN result;
   END LOOP;
@@ -119,13 +174,13 @@ BEGIN
     FOR match IN SELECT * FROM wikipedia_article
                   WHERE wd_page_title = extratags->'wikidata'
                   ORDER BY language = 'en' DESC, langcount DESC LIMIT 1 LOOP
-      result.importance := match.importance;
+      result.importance := COALESCE(result.importance, 0) + match.importance * 0.65;
       result.wikipedia := match.language || ':' || match.title;
       RETURN result;
     END LOOP;
   END IF;
 
-  RETURN null;
+  RETURN result;
 END;
 $$
 LANGUAGE plpgsql;
index a2276f074587658b4f0b2b781f559dcce0d49cde..4432fa2458d4447b1f59a0731ab89fda3c318fc8 100644 (file)
@@ -965,7 +965,7 @@ BEGIN
 
   NEW.importance := null;
   SELECT wikipedia, importance
-    FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id)
+    FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id, NEW.centroid)
     INTO NEW.wikipedia,NEW.importance;
 
 {% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %}
@@ -1088,7 +1088,7 @@ BEGIN
 
     SELECT wikipedia, importance
       FROM compute_importance(location.extratags, NEW.country_code,
-                              'N', location.osm_id)
+                              'N', location.osm_id, NEW.centroid)
       INTO linked_wikipedia,linked_importance;
 
     -- Use the maximum importance if one could be computed from the linked object.
index 8838a740aced4a7e2f059501bf58fba68c012858..d03d682f89ba7fb6b850877b445c55b83e2300ed 100644 (file)
@@ -136,13 +136,13 @@ class UpdateRefresh:
         if args.osm_views:
             data_path = Path(args.project_dir)
             LOG.warning('Import OSM views GeoTIFF data from %s', data_path)
-            with connect(args.config.get_libpq_dsn()) as conn:
-                if refresh.import_osm_views_geotiff(conn, data_path) == 1:
-                    LOG.fatal('FATAL: OSM views GeoTIFF file not found')
-                    return 1
-                if refresh.import_osm_views_geotiff(conn, data_path) == 2:
-                    LOG.fatal('FATAL: PostGIS version number is less than 3')
-                    return 1
+            num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path)
+            if num == 1:
+                LOG.fatal('FATAL: OSM views GeoTIFF file not found')
+                return 1
+            if num == 2:
+                LOG.fatal('FATAL: PostGIS version number is less than 3')
+                return 1
 
         # Attention: importance MUST come after wiki data import.
         if args.importance:
index c1cbab21c377601c8ebafdc3a22fd2136aeb3ee3..c870fb17132bf44511828a11e9eb7c93d7af5507 100644 (file)
@@ -105,18 +105,18 @@ class SetupAll:
             if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
                                                  data_path) > 0:
                 LOG.error('Wikipedia importance dump file not found. '
-                          'Calculating importance values of locations will not \
-                            use Wikipedia importance data.')
+                          'Calculating importance values of locations will not '
+                          'use Wikipedia importance data.')
 
             LOG.warning('Importing OSM views GeoTIFF data')
             data_path = Path(args.project_dir)
-            with connect(args.config.get_libpq_dsn()) as conn:
-                if refresh.import_osm_views_geotiff(conn, data_path) == 1:
-                    LOG.error('OSM views GeoTIFF file not found. '
-                          'Calculating importance values of locations will not use OSM views data.')
-                elif refresh.import_osm_views_geotiff(conn, data_path) == 2:
-                    LOG.error('PostGIS version number is less than 3. '
-                          'Calculating importance values of locations will not use OSM views data.')
+            num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path)
+            if num == 1:
+                LOG.error('OSM views GeoTIFF file not found. '
+                        'Calculating importance values of locations will not use OSM views data.')
+            elif num == 2:
+                LOG.error('PostGIS version number is less than 3. '
+                        'Calculating importance values of locations will not use OSM views data.')
 
         if args.continue_at is None or args.continue_at == 'load-data':
             LOG.warning('Initialise tables')
index 1bb801f569aa9db4b3c105323db2cc6ca79987f7..531de14db8d4911270cf603a4bc6fc0c27f29337 100644 (file)
@@ -16,7 +16,7 @@ from pathlib import Path
 from psycopg2 import sql as pysql
 
 from nominatim.config import Configuration
-from nominatim.db.connection import Connection
+from nominatim.db.connection import Connection, connect
 from nominatim.db.utils import execute_file
 from nominatim.db.sql_preprocessor import SQLPreprocessor
 from nominatim.version import version_str
@@ -147,28 +147,48 @@ def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = F
 
     return 0
 
-def import_osm_views_geotiff(conn: Connection, data_path: Path) -> int:
+def import_osm_views_geotiff(dsn: str, data_path: Path) -> int:
     """ Replaces the OSM views table with new data.
 
         Returns 0 if all was well and 1 if the OSM views GeoTIFF file could not
         be found. Throws an exception if there was an error reading the file.
     """
     datafile = data_path / 'osmviews.tiff'
-
     if not datafile.exists():
         return 1
-
-    postgis_version = conn.postgis_version_tuple()
-    if postgis_version[0] < 3:
-        return 2
-
-    with conn.cursor() as cur:
-        cur.execute('DROP TABLE IF EXISTS "osm_views"')
-        conn.commit()
-
-        cmd = f"raster2pgsql -s 4326 -I -C -t 100x100 {datafile} \
-            public.osm_views | psql nominatim > /dev/null"
-        subprocess.run(["/bin/bash", "-c" , cmd], check=True)
+    with connect(dsn) as conn:
+
+        postgis_version = conn.postgis_version_tuple()
+        if postgis_version[0] < 3:
+            return 2
+
+        with conn.cursor() as cur:
+            cur.drop_table("osm_views")
+            cur.drop_table("osm_views_stat")
+
+            # -ovr: 6 -> zoom 12, 5 -> zoom 13, 4 -> zoom 14, 3 -> zoom 15
+            reproject_geotiff = f"gdalwarp -q -multi -ovr 3 -overwrite \
+                -co COMPRESS=LZW -tr 0.01 0.01 -t_srs EPSG:4326 {datafile} raster2import.tiff"
+            subprocess.run(["/bin/bash", "-c" , reproject_geotiff], check=True)
+
+            tile_size = 256
+            import_geotiff = f"raster2pgsql -I -C -Y -t {tile_size}x{tile_size} raster2import.tiff \
+                public.osm_views | psql {dsn} > /dev/null"
+            subprocess.run(["/bin/bash", "-c" , import_geotiff], check=True)
+
+            cleanup = "rm raster2import.tiff"
+            subprocess.run(["/bin/bash", "-c" , cleanup], check=True)
+
+            # To normalize osm views data, the max view value is needed
+            cur.execute(f"""
+            CREATE TABLE osm_views_stat AS (
+                SELECT MAX(ST_Value(osm_views.rast, 1, x, y)) AS max_views_count
+                FROM osm_views CROSS JOIN
+                generate_series(1, {tile_size}) As x
+                CROSS JOIN generate_series(1, {tile_size}) As y
+                WHERE x <= ST_Width(rast) AND y <= ST_Height(rast));
+            """)
+            conn.commit()
 
     return 0
 
@@ -182,7 +202,7 @@ def recompute_importance(conn: Connection) -> None:
         cur.execute("""
             UPDATE placex SET (wikipedia, importance) =
                (SELECT wikipedia, importance
-                FROM compute_importance(extratags, country_code, osm_type, osm_id))
+                FROM compute_importance(extratags, country_code, osm_type, osm_id, centroid))
             """)
         cur.execute("""
             UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
index 17c6697dbeaa3ef28772d022ffa20403e5729f00..b6a8721fb61188de9c5a8c0a65913bfdb90db379 100644 (file)
@@ -69,7 +69,7 @@ class TestCliImportWithDb:
         assert cf_mock.called > 1
 
         for mock in mocks:
-            assert mock.called > 0, "Mock '{}' not called".format(mock.func_name)
+            assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
 
 
     def test_import_continue_load_data(self, mock_func_factory):
index af06d1611982f8928c949af3fd21533278301c9f..211c1d64fe4d9441d93f36074e1e3e7ceb82d331 100644 (file)
@@ -32,7 +32,7 @@ class TestRefresh:
         func_mock = mock_func_factory(nominatim.tools.refresh, func)
 
         assert self.call_nominatim('refresh', '--' + command) == 0
-        assert func_mock.called > 0
+        assert func_mock.called == 1
 
 
     def test_refresh_word_count(self):
index 311c84683fb2446e1542678561f5379f60baa6d2..fa6e216efb07c55f69c8b941f36ab249a811148a 100644 (file)
@@ -38,6 +38,7 @@ def test_recompute_importance(placex_table, table_factory, temp_db_conn, temp_db
     temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
                                               country_code varchar(2),
                                               osm_type varchar(1), osm_id BIGINT,
+                                              centroid GEOMETRY,
                                               OUT importance FLOAT,
                                               OUT wikipedia TEXT)
                                AS $$ SELECT 0.1::float, 'foo'::text $$ LANGUAGE SQL""")