From d53af96aa49dfe74d4c54d375fbc6dce111faae1 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 1 Nov 2019 10:07:04 +0100 Subject: [PATCH] update documentation for new wikipedia data --- docs/admin/Import-and-Update.md | 13 ++++++------- docs/admin/Migration.md | 15 +++++++++++++++ lib/setup/SetupClass.php | 13 +++---------- utils/update.php | 2 +- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/docs/admin/Import-and-Update.md b/docs/admin/Import-and-Update.md index 757dab69..a38d8eea 100644 --- a/docs/admin/Import-and-Update.md +++ b/docs/admin/Import-and-Update.md @@ -33,7 +33,7 @@ the directory exists. There should be at least 40GB of free space. ## Downloading additional data -### Wikipedia rankings +### Wikipedia/Wikidata rankings Wikipedia can be used as an optional auxiliary data source to help indicate the importance of OSM features. Nominatim will work without this information @@ -41,15 +41,14 @@ but it will improve the quality of the results if this is installed. This data is available as a binary download: cd $NOMINATIM_SOURCE_DIR/data - wget https://www.nominatim.org/data/wikipedia_article.sql.bin - wget https://www.nominatim.org/data/wikipedia_redirect.sql.bin + wget https://www.nominatim.org/data/wikimedia_importance.sql.gz -Combined the 2 files are around 1.5GB and add around 30GB to the install +The file is about 1GB and it adds around 10GB to the install size of Nominatim. They also increase the install time by an hour or so. -*NOTE:* you'll need to download the Wikipedia rankings before performing -the initial import of the data if you want the rankings applied to the -loaded data. +*NOTE:* if you forgot to download the wikipedia rankings, you can also add +them after the import by running `./utils/setup.php --import-wikipedia-articles` +and then `./utils/update.php --recompute-importance`. ### Great Britain, USA postcodes diff --git a/docs/admin/Migration.md b/docs/admin/Migration.md index f3668357..e6b6d102 100644 --- a/docs/admin/Migration.md +++ b/docs/admin/Migration.md @@ -6,6 +6,21 @@ to newer versions of Nominatim. SQL statements should be executed from the PostgreSQL commandline. Execute `psql nominatim` to enter command line mode. +## 3.4.0 -> master + +### New Wikipedia/Wikidata importance tables + +The `wikipedia_*` tables have a new format that also includes references to +Wikidata. You need to update the computation functions and the tables as +follows: + + * download the new Wikipedia tables as described in the import section + * reimport the tables: `./utils/setup.php --import-wikipedia-articles` + * update the functions: `./utils/setup.php --create-functions --enable-diff-updates` + * compute importance: `./utils/update.php --recompute-importance` + +The last step takes about 10 hours on the full planet. + ## 3.3.0 -> 3.4.0 ### Reorganisation of location_area_country table diff --git a/lib/setup/SetupClass.php b/lib/setup/SetupClass.php index 818aeeb7..c1c15d9a 100755 --- a/lib/setup/SetupClass.php +++ b/lib/setup/SetupClass.php @@ -323,19 +323,12 @@ class SetupFunctions public function importWikipediaArticles() { - $sWikiArticlesFile = CONST_Wikipedia_Data_Path.'/wikipedia_article.sql.bin'; - $sWikiRedirectsFile = CONST_Wikipedia_Data_Path.'/wikipedia_redirect.sql.bin'; + $sWikiArticlesFile = CONST_Wikipedia_Data_Path.'/wikimedia_importance.sql.gz'; if (file_exists($sWikiArticlesFile)) { - info('Importing wikipedia articles'); + info('Importing wikipedia articles and redirects'); $this->pgsqlRunDropAndRestore($sWikiArticlesFile); } else { - warn('wikipedia article dump file not found - places will have default importance'); - } - if (file_exists($sWikiRedirectsFile)) { - info('Importing wikipedia redirects'); - $this->pgsqlRunDropAndRestore($sWikiRedirectsFile); - } else { - warn('wikipedia redirect dump file not found - some place importance values may be missing'); + warn('wikipedia importance dump file not found - places will have default importance'); } } diff --git a/utils/update.php b/utils/update.php index 31c911cc..735f4638 100644 --- a/utils/update.php +++ b/utils/update.php @@ -322,7 +322,7 @@ if ($aResult['update-address-levels']) { } if ($aResult['recompute-importance']) { - echo 'Updating importance values for database.\n'; + echo "Updating importance values for database.\n"; $oDB = new Nominatim\DB(); $oDB->connect(); -- 2.45.2