]> git.openstreetmap.org Git - nominatim.git/commitdiff
Enhanced the implementation of OSM views GeoTIFF import functionality
authorTareq Al-Ahdal <tareqoalahdal@gmail.com>
Sun, 24 Jul 2022 11:04:23 +0000 (19:04 +0800)
committerSarah Hoffmann <lonvia@denofr.de>
Sat, 1 Oct 2022 09:01:49 +0000 (11:01 +0200)
docs/admin/Import.md
nominatim/clicmd/args.py
nominatim/clicmd/refresh.py
nominatim/clicmd/setup.py
nominatim/tools/database_import.py
nominatim/tools/refresh.py
settings/env.defaults
test/python/cli/test_cmd_import.py
test/python/cli/test_cmd_refresh.py
test/python/tools/test_freeze.py
test/python/tools/test_refresh.py

index 90294959c334297c805f71510c7224544f99584e..91b927287d8fdc7b4c961b833ff33178015863d3 100644 (file)
@@ -78,11 +78,28 @@ This data is available as a binary download. Put it into your project directory:
 
 The file is about 400MB and adds around 4GB to the Nominatim database.
 
+### OSM views 
+OSM publishes aggregate map access numbers that are generated based on the users’ 
+behavior when viewing locations on the map. This data is also optional and 
+it complements wikipedia/wikidata rankings to further enhance the search results
+if added.
+OSM views data is avalaible as a GeoTIFF file. Put it into your project directory:
+
+    cd $PROJECT_DIR
+    wget https://qrank.wmcloud.org/download/osmviews.tiff
+
+The file is about 380MB and adds around 4GB to the Nominatim database. Importing
+OSM views into Nominatim takes a little over 3 hours.
+
+!!! warning
+    Importing OSM views is currently an experimental feature. OSM views data are
+    not yet included in the importance values calculations.
+
 !!! tip
-    If you forgot to download the wikipedia rankings, you can also add
-    importances after the import. Download the files, then run
-    `nominatim refresh --wiki-data --importance`. Updating importances for
-    a planet can take a couple of hours.
+    If you forgot to download the wikipedia rankings or OSM views, then you can 
+    also add importances after the import. To add both, download their files, then 
+    run `nominatim refresh --wiki-data --osm-views --importance`. Updating 
+    importances for a planet will take a couple of hours.
 
 ### External postcodes
 
@@ -139,7 +156,7 @@ import. So this option is particularly interesting if you plan to transfer the
 database or reuse the space later.
 
 !!! warning
-    The datastructure for updates are also required when adding additional data
+    The data structure for updates are also required when adding additional data
     after the import, for example [TIGER housenumber data](../customize/Tiger.md).
     If you plan to use those, you must not use the `--no-updates` parameter.
     Do a normal import, add the external data and once you are done with
index 4457db5fcb457d800fd40265f8a009a7704f9ae3..6edfda7b80ae1ee6bbf41d93985c41625c1da88d 100644 (file)
@@ -115,6 +115,7 @@ class NominatimArgs:
     address_levels: bool
     functions: bool
     wiki_data: bool
+    osm_views: bool
     importance: bool
     website: bool
     diffs: bool
index 6f307a65ae159cc2ac841588c2cf81b89435ac83..8838a740aced4a7e2f059501bf58fba68c012858 100644 (file)
@@ -85,7 +85,7 @@ class UpdateRefresh:
                            help='Enable debug warning statements in functions')
 
 
-    def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches
+    def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements
         from ..tools import refresh, postcodes
         from ..indexer.indexer import Indexer
 
@@ -132,15 +132,17 @@ class UpdateRefresh:
                                                  data_path) > 0:
                 LOG.fatal('FATAL: Wikipedia importance dump file not found')
                 return 1
-        
+
         if args.osm_views:
-            data_path = Path(args.config.OSM_VIEWS_DATA_PATH
-                             or args.project_dir)
+            data_path = Path(args.project_dir)
             LOG.warning('Import OSM views GeoTIFF data from %s', data_path)
-            if refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(),
-                                                 data_path) > 0:
-                LOG.fatal('FATAL: OSM views GeoTIFF file not found')
-                return 1
+            with connect(args.config.get_libpq_dsn()) as conn:
+                if refresh.import_osm_views_geotiff(conn, data_path) == 1:
+                    LOG.fatal('FATAL: OSM views GeoTIFF file not found')
+                    return 1
+                if refresh.import_osm_views_geotiff(conn, data_path) == 2:
+                    LOG.fatal('FATAL: PostGIS version number is less than 3')
+                    return 1
 
         # Attention: importance MUST come after wiki data import.
         if args.importance:
index c7366c3a9e7edd7b7e4d8f493017e429f7bfe245..c1cbab21c377601c8ebafdc3a22fd2136aeb3ee3 100644 (file)
@@ -105,14 +105,17 @@ class SetupAll:
             if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
                                                  data_path) > 0:
                 LOG.error('Wikipedia importance dump file not found. '
-                          'Calculating importance values of locations will not use Wikipedia importance data.')
-            
+                          'Calculating importance values of locations will not \
+                            use Wikipedia importance data.')
+
             LOG.warning('Importing OSM views GeoTIFF data')
-            database_import.import_osm_views_geotiff()
-            data_path = Path(args.config.OSM_VIEWS_DATA_PATH or args.project_dir)
-            if refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(),
-                                                 data_path) > 0:
-                LOG.error('OSM views GeoTIFF file not found. '
+            data_path = Path(args.project_dir)
+            with connect(args.config.get_libpq_dsn()) as conn:
+                if refresh.import_osm_views_geotiff(conn, data_path) == 1:
+                    LOG.error('OSM views GeoTIFF file not found. '
+                          'Calculating importance values of locations will not use OSM views data.')
+                elif refresh.import_osm_views_geotiff(conn, data_path) == 2:
+                    LOG.error('PostGIS version number is less than 3. '
                           'Calculating importance values of locations will not use OSM views data.')
 
         if args.continue_at is None or args.continue_at == 'load-data':
index 20883b96ce0289bb2c2c28b6210695ee51759057..cb620d41fb8f31126fe69a622bf14130e38494d1 100644 (file)
@@ -75,7 +75,11 @@ def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None:
         with conn.cursor() as cur:
             cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
             cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
-            cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')
+
+            postgis_version = conn.postgis_version_tuple()
+            if postgis_version[0] >= 3:
+                cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')
+
         conn.commit()
 
         _require_version('PostGIS',
@@ -247,8 +251,3 @@ def create_search_indices(conn: Connection, config: Configuration,
 
     sql.run_parallel_sql_file(config.get_libpq_dsn(),
                               'indices.sql', min(8, threads), drop=drop)
-
-
-def import_osm_views_geotiff():
-    """Import OSM views GeoTIFF file"""
-    subprocess.run("raster2pgsql -s 4326 -I -C -t 100x100 -e osmviews.tiff public.osmviews | psql nominatim", shell=True, check=True)
index a3b6c4f086397cb4467e45c6406f346f7e11def8..1bb801f569aa9db4b3c105323db2cc6ca79987f7 100644 (file)
@@ -9,6 +9,7 @@ Functions for bringing auxiliary data in the database up-to-date.
 """
 from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, cast
 import logging
+import subprocess
 from textwrap import dedent
 from pathlib import Path
 
@@ -146,10 +147,10 @@ def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = F
 
     return 0
 
-def import_osm_views_geotiff(dsn, data_path, ignore_errors=False):
+def import_osm_views_geotiff(conn: Connection, data_path: Path) -> int:
     """ Replaces the OSM views table with new data.
-        
-        Returns 0 if all was well and 1 if the GeoTIFF file could not
+
+        Returns 0 if all was well and 1 if the OSM views GeoTIFF file could not
         be found. Throws an exception if there was an error reading the file.
     """
     datafile = data_path / 'osmviews.tiff'
@@ -157,12 +158,17 @@ def import_osm_views_geotiff(dsn, data_path, ignore_errors=False):
     if not datafile.exists():
         return 1
 
-    pre_code = """BEGIN;
-                  DROP TABLE IF EXISTS "osmviews";
-               """
-    post_code = "COMMIT"
-    execute_file(dsn, datafile, ignore_errors=ignore_errors,
-                 pre_code=pre_code, post_code=post_code)
+    postgis_version = conn.postgis_version_tuple()
+    if postgis_version[0] < 3:
+        return 2
+
+    with conn.cursor() as cur:
+        cur.execute('DROP TABLE IF EXISTS "osm_views"')
+        conn.commit()
+
+        cmd = f"raster2pgsql -s 4326 -I -C -t 100x100 {datafile} \
+            public.osm_views | psql nominatim > /dev/null"
+        subprocess.run(["/bin/bash", "-c" , cmd], check=True)
 
     return 0
 
index c975926254fcfc9919425d740abd6303c7876abc..3115f4382aacf582c5a1054e78c03130bde9f00f 100644 (file)
@@ -86,10 +86,6 @@ NOMINATIM_TIGER_DATA_PATH=
 # When unset, the data is expected to be located in the project directory.
 NOMINATIM_WIKIPEDIA_DATA_PATH=
 
-# Directory where to find OSM views GeoTIFF file.
-# When unset, the data is expected to be located in the project directory.
-NOMINATIM_OSM_VIEWS_DATA_PATH=
-
 # Configuration file for special phrase import.
 # OBSOLETE: use `nominatim special-phrases --config <file>` or simply put
 #           a custom phrase-settings.json into your project directory.
index b6a8721fb61188de9c5a8c0a65913bfdb90db379..17c6697dbeaa3ef28772d022ffa20403e5729f00 100644 (file)
@@ -69,7 +69,7 @@ class TestCliImportWithDb:
         assert cf_mock.called > 1
 
         for mock in mocks:
-            assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
+            assert mock.called > 0, "Mock '{}' not called".format(mock.func_name)
 
 
     def test_import_continue_load_data(self, mock_func_factory):
index ed3a68babce0a21256c860f94345cedcf5928ec2..af06d1611982f8928c949af3fd21533278301c9f 100644 (file)
@@ -24,7 +24,7 @@ class TestRefresh:
     @pytest.mark.parametrize("command,func", [
                              ('address-levels', 'load_address_levels_from_config'),
                              ('wiki-data', 'import_wikipedia_articles'),
-                             ('osm-views', 'import_osm_views_geotiff')
+                             ('osm-views', 'import_osm_views_geotiff'),
                              ('importance', 'recompute_importance'),
                              ('website', 'setup_website'),
                              ])
@@ -32,7 +32,7 @@ class TestRefresh:
         func_mock = mock_func_factory(nominatim.tools.refresh, func)
 
         assert self.call_nominatim('refresh', '--' + command) == 0
-        assert func_mock.called == 1
+        assert func_mock.called > 0
 
 
     def test_refresh_word_count(self):
@@ -72,21 +72,17 @@ class TestRefresh:
 
         assert self.call_nominatim('refresh', '--wiki-data') == 1
 
-    def test_refresh_osm_views_geotiff_file_not_found(self, monkeypatch):
-        monkeypatch.setenv('NOMINATIM_OSM_VIEWS_DATA_PATH', 'gjoiergjeroi345Q')
-
+    def test_refresh_osm_views_geotiff_file_not_found(self):
         assert self.call_nominatim('refresh', '--osm-views') == 1
 
-    def test_refresh_importance_computed_after_wiki_and_osm_views_import(self, monkeypatch):
+    def test_refresh_importance_computed_after_wiki_import(self, monkeypatch):
         calls = []
         monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles',
                             lambda *args, **kwargs: calls.append('import') or 0)
-        monkeypatch.setattr(nominatim.tools.refresh, 'import_osm_views_geotiff',
-                            lambda *args, **kwargs: calls.append('import') or 0)
         monkeypatch.setattr(nominatim.tools.refresh, 'recompute_importance',
                             lambda *args, **kwargs: calls.append('update'))
 
-        assert self.call_nominatim('refresh', '--importance', '--wiki-data', '--osm-views') == 0
+        assert self.call_nominatim('refresh', '--importance', '--wiki-data') == 0
 
         assert calls == ['import', 'update']
 
index 6e8525500bf56934355bc29ed2ef43d7d93a0d96..3ebb1730e46bd788e4c4cf24ada301bd94c0d6af 100644 (file)
@@ -21,7 +21,6 @@ NOMINATIM_DROP_TABLES = [
     'address_levels',
     'location_area', 'location_area_country', 'location_area_large_100',
     'location_road_1',
-    'osmviews'
     'place', 'planet_osm_nodes', 'planet_osm_rels', 'planet_osm_ways',
     'search_name_111',
     'wikipedia_article', 'wikipedia_redirect'
index c8ebdab8734eb94d0900811bd20d52ee28d1b3cb..311c84683fb2446e1542678561f5379f60baa6d2 100644 (file)
@@ -34,17 +34,6 @@ def test_refresh_import_wikipedia(dsn, src_dir, table_factory, temp_db_cursor, r
     assert temp_db_cursor.table_rows('wikipedia_redirect') > 0
 
 
-@pytest.mark.parametrize("replace", (True, False))
-def test_refresh_import_osm_views_geotiff(dsn, src_dir, table_factory, temp_db_cursor, replace):
-    if replace:
-        table_factory('osmviews')
-
-    # use the small osm views GeoTIFF file for the API testdb
-    assert refresh.import_osm_views_geotiff(dsn, src_dir / 'test' / 'testdb') == 0
-
-    assert temp_db_cursor.table_rows('osmviews') > 0
-
-
 def test_recompute_importance(placex_table, table_factory, temp_db_conn, temp_db_cursor):
     temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
                                               country_code varchar(2),