From: Sarah Hoffmann Date: Mon, 19 Apr 2021 06:42:59 +0000 (+0200) Subject: Merge pull request #2281 from changpingc/changping/fix-tiger-index X-Git-Tag: v4.0.0~110 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/830e3be1e61b8bac9452b65bcaa0d9feca166e03?hp=29a314a092972e82cb0014499f60959d3f2909a4 Merge pull request #2281 from changpingc/changping/fix-tiger-index fix index on location_property_tiger (parent_place_id) --- diff --git a/.github/actions/setup-postgresql/action.yml b/.github/actions/setup-postgresql/action.yml index 0742e663..060a6789 100644 --- a/.github/actions/setup-postgresql/action.yml +++ b/.github/actions/setup-postgresql/action.yml @@ -14,8 +14,10 @@ runs: steps: - name: Remove existing PostgreSQL run: | - sudo apt-get update -qq sudo apt-get purge -yq postgresql* + sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' + sudo apt-get update -qq + shell: bash - name: Install PostgreSQL diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 2f920a66..a1a4344a 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -25,7 +25,7 @@ jobs: uses: shivammathur/setup-php@v2 with: php-version: '7.4' - tools: phpunit, phpcs + tools: phpunit, phpcs, composer - name: Get Date id: get-date @@ -46,7 +46,7 @@ jobs: - uses: ./Nominatim/.github/actions/build-nominatim - name: Install test prerequsites - run: sudo apt-get install -y -qq php-codesniffer pylint python3-pytest python3-behave + run: sudo apt-get install -y -qq php-codesniffer pylint python3-pytest python3-behave python3-pytest-cov php-codecoverage php-xdebug - name: PHP linting run: phpcs --report-width=120 . @@ -57,17 +57,30 @@ jobs: working-directory: Nominatim - name: PHP unit tests - run: phpunit ./ + run: phpunit --coverage-clover ../../coverage-php.xml ./ working-directory: Nominatim/test/php - name: Python unit tests - run: py.test-3 test/python + run: py.test-3 --cov=nominatim --cov-report=xml test/python working-directory: Nominatim - name: BDD tests - run: behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3 + run: | + behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3 -DPHPCOV=./cov + composer require phpunit/phpcov:7.0.2 + vendor/bin/phpcov merge --clover ../../coverage-bdd.xml ./cov working-directory: Nominatim/test/bdd + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1 + with: + files: ./Nominatim/coverage*.xml + directory: ./ + name: codecov-umbrella + fail_ci_if_error: true + path_to_write_report: ./coverage/codecov_report.txt + verbose: true + import: runs-on: ubuntu-20.04 diff --git a/CMakeLists.txt b/CMakeLists.txt index f3247900..fa92dd02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,21 +109,6 @@ if (BUILD_IMPORTER) " wget -O ${PROJECT_SOURCE_DIR}/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz") endif() - set(CUSTOMSCRIPTS - check_import_finished.php - country_languages.php - export.php - query.php - setup.php - update.php - warm.php - ) - - foreach (script_source ${CUSTOMSCRIPTS}) - configure_file(${PROJECT_SOURCE_DIR}/cmake/script.tmpl - ${PROJECT_BINARY_DIR}/utils/${script_source}) - endforeach() - configure_file(${PROJECT_SOURCE_DIR}/cmake/tool.tmpl ${PROJECT_BINARY_DIR}/nominatim) endif() diff --git a/README.md b/README.md index 6fd0cd45..b643088b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ [![Build Status](https://github.com/osm-search/Nominatim/workflows/CI%20Tests/badge.svg)](https://github.com/osm-search/Nominatim/actions?query=workflow%3A%22CI+Tests%22) +[![codecov](https://codecov.io/gh/osm-search/Nominatim/branch/master/graph/badge.svg?token=8P1LXrhCMy)](https://codecov.io/gh/osm-search/Nominatim) Nominatim ========= diff --git a/cmake/script.tmpl b/cmake/script.tmpl deleted file mode 100755 index 3fbe535e..00000000 --- a/cmake/script.tmpl +++ /dev/null @@ -1,14 +0,0 @@ -#!@PHP_BIN@ -Cq - 3.7.0 +### New format and name of configuration file + +The configuration for an import is now saved in a `.env` file in the project +directory. This file follows the dotenv format. For more information, see +the [installation chapter](Import.md#configuration-setup-in-env). + +To migrate to the new system, create a new project directory, add the `.env` +file and port your custom configuration from `settings/local.php`. Most +settings are named similar and only have received a `NOMINATIM_` prefix. +Use the default settings in `settings/env.defaults` as a reference. + ### New location for data files External data files for Wikipedia importance, postcodes etc. are no longer diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index f638af9a..ec6876fa 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -18,7 +18,6 @@ class Geocode protected $aLangPrefOrder = array(); protected $aExcludePlaceIDs = array(); - protected $bReverseInPlan = true; protected $iLimit = 20; protected $iFinalLimit = 10; @@ -61,11 +60,6 @@ class Geocode return $this->oNormalizer->transliterate($sTerm); } - public function setReverseInPlan($bReverse) - { - $this->bReverseInPlan = $bReverse; - } - public function setLanguagePreference($aLangPref) { $this->aLangPrefOrder = $aLangPref; @@ -262,7 +256,6 @@ class Geocode $oParams->getString('country'), $oParams->getString('postalcode') ); - $this->setReverseInPlan(false); } else { $this->setQuery($sQuery); } @@ -330,7 +323,7 @@ class Geocode return false; } - public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bIsStructured) + public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens) { /* Calculate all searches using oValidTokens i.e. @@ -345,7 +338,7 @@ class Geocode */ foreach ($aPhrases as $iPhrase => $oPhrase) { $aNewPhraseSearches = array(); - $sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : ''; + $sPhraseType = $oPhrase->getPhraseType(); foreach ($oPhrase->getWordSets() as $aWordset) { $aWordsetSearches = $aSearches; @@ -388,7 +381,7 @@ class Geocode $aNewSearches = $oCurrentSearch->extendWithPartialTerm( $sToken, $oSearchTerm, - $bIsStructured, + (bool) $sPhraseType, $iPhrase, $oValidTokens->get(' '.$sToken) ); @@ -607,10 +600,8 @@ class Geocode // Commas are used to reduce the search space by indicating where phrases split if ($this->aStructuredQuery) { $aInPhrases = $this->aStructuredQuery; - $bStructuredPhrases = true; } else { $aInPhrases = explode(',', $sQuery); - $bStructuredPhrases = false; } Debug::printDebugArray('Search context', $oCtx); @@ -684,9 +675,9 @@ class Geocode Debug::newSection('Search candidates'); - $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases); + $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens); - if ($this->bReverseInPlan) { + if (!$this->aStructuredQuery) { // Reverse phrase array and also reverse the order of the wordsets in // the first and final phrase. Don't bother about phrases in the middle // because order in the address doesn't matter. @@ -695,7 +686,7 @@ class Geocode if (count($aPhrases) > 1) { $aPhrases[count($aPhrases)-1]->invertWordSets(); } - $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, false); + $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens); foreach ($aGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) { @@ -999,7 +990,6 @@ class Geocode 'Structured query' => $this->aStructuredQuery, 'Name keys' => Debug::fmtArrayVals($this->aLangPrefOrder), 'Excluded place IDs' => Debug::fmtArrayVals($this->aExcludePlaceIDs), - 'Try reversed query'=> $this->bReverseInPlan, 'Limit (for searches)' => $this->iLimit, 'Limit (for results)'=> $this->iFinalLimit, 'Country codes' => Debug::fmtArrayVals($this->aCountryCodes), diff --git a/lib-php/admin/check_import_finished.php b/lib-php/admin/check_import_finished.php deleted file mode 100644 index d5d011c4..00000000 --- a/lib-php/admin/check_import_finished.php +++ /dev/null @@ -1,10 +0,0 @@ -addParams('admin', '--check-database') - ->run(); diff --git a/lib-php/admin/query.php b/lib-php/admin/query.php index beb2f9ef..35fd1184 100644 --- a/lib-php/admin/query.php +++ b/lib-php/admin/query.php @@ -79,7 +79,6 @@ if (!$oParams->hasSetAny($aSearchParams)) { $oGeocode = new Nominatim\Geocode($oDB); $oGeocode->setLanguagePreference($oParams->getPreferredLanguages(false)); -$oGeocode->setReverseInPlan(true); $oGeocode->loadParamArray($oParams); if ($oParams->getBool('search')) { diff --git a/lib-php/admin/setup.php b/lib-php/admin/setup.php deleted file mode 100644 index 7523527a..00000000 --- a/lib-php/admin/setup.php +++ /dev/null @@ -1,218 +0,0 @@ -addParams('--threads', $iInstances); - if ($aCMDResult['ignore-errors'] ?? false) { - $oCmd->addParams('--ignore-errors'); - } - if ($aCMDResult['quiet'] ?? false) { - $oCmd->addParams('--quiet'); - } - if ($aCMDResult['verbose'] ?? false) { - $oCmd->addParams('--verbose'); - } - $oCmd->run(true); -} - - -//******************************************************* -// Making some sanity check: -// Check if osm-file is set and points to a valid file -if ($aCMDResult['import-data'] || $aCMDResult['all']) { - // to remain in /lib/setup_functions.php function - checkInFile($aCMDResult['osm-file']); -} - -// ****************************************************** -// instantiate Setup class -$oSetup = new SetupFunctions($aCMDResult); - -// ******************************************************* -// go through complete process if 'all' is selected or start selected functions -if ($aCMDResult['create-db'] || $aCMDResult['all']) { - $bDidSomething = true; - run((clone($oNominatimCmd))->addParams('transition', '--create-db')); -} - -if ($aCMDResult['setup-db'] || $aCMDResult['all']) { - $bDidSomething = true; - $oCmd = (clone($oNominatimCmd))->addParams('transition', '--setup-db'); - - if ($aCMDResult['no-partitions'] ?? false) { - $oCmd->addParams('--no-partitions'); - } - - run($oCmd); -} - -if ($aCMDResult['import-data'] || $aCMDResult['all']) { - $bDidSomething = true; - $oCmd = (clone($oNominatimCmd)) - ->addParams('transition', '--import-data') - ->addParams('--osm-file', $aCMDResult['osm-file']); - if ($aCMDResult['drop'] ?? false) { - $oCmd->addParams('--drop'); - } - - run($oCmd); -} - -if ($aCMDResult['create-functions'] || $aCMDResult['all']) { - $bDidSomething = true; - $oSetup->createSqlFunctions(); -} - -if ($aCMDResult['create-tables'] || $aCMDResult['all']) { - $bDidSomething = true; - $oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-tables'); - - if ($aCMDResult['reverse-only'] ?? false) { - $oCmd->addParams('--reverse-only'); - } - - run($oCmd); -} - -if ($aCMDResult['create-partition-tables'] || $aCMDResult['all']) { - $bDidSomething = true; - run((clone($oNominatimCmd))->addParams('transition', '--create-partition-tables')); -} - -if ($aCMDResult['create-partition-functions'] || $aCMDResult['all']) { - $bDidSomething = true; - $oSetup->createSqlFunctions(); // also create partition functions -} - -if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) { - $bDidSomething = true; - // ignore errors! - (clone($oNominatimCmd))->addParams('refresh', '--wiki-data')->run(); -} - -if ($aCMDResult['load-data'] || $aCMDResult['all']) { - $bDidSomething = true; - run((clone($oNominatimCmd))->addParams('transition', '--load-data')); -} - -if ($aCMDResult['import-tiger-data']) { - $bDidSomething = true; - $sTigerPath = getSetting('TIGER_DATA_PATH', CONST_InstallDir.'/tiger'); - run((clone($oNominatimCmd))->addParams('transition', '--tiger-data', $sTigerPath)); -} - -if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { - $bDidSomething = true; - $oSetup->calculatePostcodes($aCMDResult['all']); -} - -if ($aCMDResult['index'] || $aCMDResult['all']) { - $bDidSomething = true; - $oCmd = (clone($oNominatimCmd))->addParams('transition', '--index'); - if ($aCMDResult['index-noanalyse'] ?? false) { - $oCmd->addParams('--no-analyse'); - } - - run($oCmd); -} - -if ($aCMDResult['drop']) { - $bDidSomething = true; - run((clone($oNominatimCmd))->addParams('freeze')); -} - -if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) { - $bDidSomething = true; - - $oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-search-indices'); - - if ($aCMDResult['drop'] ?? false) { - $oCmd->addParams('--drop'); - } - - run($oCmd); -} - -if ($aCMDResult['create-country-names'] || $aCMDResult['all']) { - $bDidSomething = true; - run(clone($oNominatimCmd))->addParams('transition', '--create-country-names'); -} - -if ($aCMDResult['setup-website'] || $aCMDResult['all']) { - $bDidSomething = true; - run((clone($oNominatimCmd))->addParams('refresh', '--website')); -} - -// ****************************************************** -// If we did something, repeat the warnings -if (!$bDidSomething) { - showUsage($aCMDOptions, true); -} else { - echo "Summary of warnings:\n\n"; - repeatWarnings(); - echo "\n"; - info('Setup finished.'); -} diff --git a/lib-php/admin/specialphrases.php b/lib-php/admin/specialphrases.php deleted file mode 100644 index 84bcfb5c..00000000 --- a/lib-php/admin/specialphrases.php +++ /dev/null @@ -1,11 +0,0 @@ - -addParams('special-phrases', '--import-from-wiki') - ->run(); diff --git a/lib-php/admin/update.php b/lib-php/admin/update.php index 81e0b832..ea58f37c 100644 --- a/lib-php/admin/update.php +++ b/lib-php/admin/update.php @@ -3,12 +3,9 @@ require_once(CONST_LibDir.'/init-cmd.php'); require_once(CONST_LibDir.'/setup_functions.php'); -require_once(CONST_LibDir.'/setup/SetupClass.php'); ini_set('memory_limit', '800M'); -use Nominatim\Setup\SetupFunctions as SetupFunctions; - // (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help) $aCMDOptions = array( @@ -17,13 +14,6 @@ $aCMDOptions array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'), array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'), - array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'), - array('check-for-updates', '', 0, 1, 0, 0, 'bool', 'Check if new updates are available'), - array('no-update-functions', '', 0, 1, 0, 0, 'bool', 'Do not update trigger functions to support differential updates (assuming the diff update logic is already present)'), - array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'), - array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'), - array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'), - array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'), array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'), @@ -35,14 +25,6 @@ $aCMDOptions array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'), array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'), - array('index', '', 0, 1, 0, 0, 'bool', 'Index'), - array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'), - array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'), - - array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'), - array('update-address-levels', '', 0, 1, 0, 0, 'bool', 'Reimport address level configuration (EXPERT)'), - array('recompute-importance', '', 0, 1, 0, 0, 'bool', 'Recompute place importances'), - array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'), ); @@ -51,9 +33,6 @@ getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true); loadSettings($aCMDResult['project-dir'] ?? getcwd()); setupHTTPProxy(); -if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1; -if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0; - date_default_timezone_set('Etc/UTC'); $oDB = new Nominatim\DB(); @@ -103,35 +82,6 @@ if ($fPostgresVersion >= 11.0) { ); } -$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')); - -function run($oCmd) -{ - global $aCMDResult; - if ($aCMDResult['quiet'] ?? false) { - $oCmd->addParams('--quiet'); - } - if ($aCMDResult['verbose'] ?? false) { - $oCmd->addParams('--verbose'); - } - $oCmd->run(true); -} - - -if ($aResult['init-updates']) { - $oCmd = (clone($oNominatimCmd))->addParams('replication', '--init'); - - if ($aResult['no-update-functions']) { - $oCmd->addParams('--no-update-functions'); - } - - run($oCmd); -} - -if ($aResult['check-for-updates']) { - exit((clone($oNominatimCmd))->addParams('replication', '--check-for-updates')->run()); -} - if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { // import diffs and files directly (e.g. from osmosis --rri) $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file']; @@ -152,10 +102,6 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { // Don't update the import status - we don't know what this file contains } -if ($aResult['calculate-postcodes']) { - run((clone($oNominatimCmd))->addParams('refresh', '--postcodes')); -} - $sTemporaryFile = CONST_InstallDir.'/osmosischange.osc'; $bHaveDiff = false; $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api']; @@ -200,37 +146,3 @@ if ($bHaveDiff) { fail("osm2pgsql exited with error level $iRet\n"); } } - -if ($aResult['recompute-word-counts']) { - run((clone($oNominatimCmd))->addParams('refresh', '--word-counts')); -} - -if ($aResult['index']) { - run((clone $oNominatimCmd) - ->addParams('index', '--minrank', $aResult['index-rank']) - ->addParams('--threads', $aResult['index-instances'])); -} - -if ($aResult['update-address-levels']) { - run((clone($oNominatimCmd))->addParams('refresh', '--address-levels')); -} - -if ($aResult['recompute-importance']) { - run((clone($oNominatimCmd))->addParams('refresh', '--importance')); -} - -if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { - $oCmd = (clone($oNominatimCmd)) - ->addParams('replication') - ->addParams('--threads', $aResult['index-instances']); - - if (!$aResult['import-osmosis-all']) { - $oCmd->addParams('--once'); - } - - if ($aResult['no-index']) { - $oCmd->addParams('--no-index'); - } - - run($oCmd); -} diff --git a/lib-php/cmd.php b/lib-php/cmd.php index 5a12f99a..9c971e5f 100644 --- a/lib-php/cmd.php +++ b/lib-php/cmd.php @@ -144,58 +144,6 @@ function repeatWarnings() } -function runSQLScript($sScript, $bfatal = true, $bVerbose = false, $bIgnoreErrors = false) -{ - // Convert database DSN to psql parameters - $aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN')); - if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; - - $oCmd = new \Nominatim\Shell('psql'); - $oCmd->addParams('--port', $aDSNInfo['port']); - $oCmd->addParams('--dbname', $aDSNInfo['database']); - if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) { - $oCmd->addParams('--host', $aDSNInfo['hostspec']); - } - if (isset($aDSNInfo['username']) && $aDSNInfo['username']) { - $oCmd->addParams('--username', $aDSNInfo['username']); - } - if (isset($aDSNInfo['password'])) { - $oCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']); - } - if (!$bVerbose) { - $oCmd->addParams('--quiet'); - } - if ($bfatal && !$bIgnoreErrors) { - $oCmd->addParams('-v', 'ON_ERROR_STOP=1'); - } - - $aDescriptors = array( - 0 => array('pipe', 'r'), - 1 => STDOUT, - 2 => STDERR - ); - $ahPipes = null; - $hProcess = @proc_open($oCmd->escapedCmd(), $aDescriptors, $ahPipes, null, $oCmd->aEnv); - if (!is_resource($hProcess)) { - fail('unable to start pgsql'); - } - - if (!$bVerbose) { - fwrite($ahPipes[0], 'set client_min_messages to WARNING;'); - } - - while (strlen($sScript)) { - $iWritten = fwrite($ahPipes[0], $sScript); - if ($iWritten <= 0) break; - $sScript = substr($sScript, $iWritten); - } - fclose($ahPipes[0]); - $iReturn = proc_close($hProcess); - if ($bfatal && $iReturn > 0) { - fail("pgsql returned with error code ($iReturn)"); - } -} - function setupHTTPProxy() { if (!getSettingBool('HTTP_PROXY')) { diff --git a/lib-php/setup/SetupClass.php b/lib-php/setup/SetupClass.php deleted file mode 100755 index d07adce7..00000000 --- a/lib-php/setup/SetupClass.php +++ /dev/null @@ -1,261 +0,0 @@ -iInstances = isset($aCMDResult['threads']) - ? $aCMDResult['threads'] - : (min(16, getProcessorCount()) - 1); - - if ($this->iInstances < 1) { - $this->iInstances = 1; - warn('resetting threads to '.$this->iInstances); - } - - // parse database string - $this->aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN')); - if (!isset($this->aDSNInfo['port'])) { - $this->aDSNInfo['port'] = 5432; - } - - // setting member variables based on command line options stored in $aCMDResult - $this->bQuiet = isset($aCMDResult['quiet']) && $aCMDResult['quiet']; - $this->bVerbose = $aCMDResult['verbose']; - - //setting default values which are not set by the update.php array - if (isset($aCMDResult['ignore-errors'])) { - $this->sIgnoreErrors = $aCMDResult['ignore-errors']; - } else { - $this->sIgnoreErrors = false; - } - if (isset($aCMDResult['enable-debug-statements'])) { - $this->bEnableDebugStatements = $aCMDResult['enable-debug-statements']; - } else { - $this->bEnableDebugStatements = false; - } - if (isset($aCMDResult['enable-diff-updates'])) { - $this->bEnableDiffUpdates = $aCMDResult['enable-diff-updates']; - } else { - $this->bEnableDiffUpdates = false; - } - - $this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop']; - - $this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')); - if ($this->bQuiet) { - $this->oNominatimCmd->addParams('--quiet'); - } - if ($this->bVerbose) { - $this->oNominatimCmd->addParams('--verbose'); - } - } - - public function calculatePostcodes($bCMDResultAll) - { - info('Calculate Postcodes'); - $this->pgsqlRunScriptFile(CONST_SqlDir.'/postcode_tables.sql'); - - $sPostcodeFilename = CONST_InstallDir.'/gb_postcode_data.sql.gz'; - if (file_exists($sPostcodeFilename)) { - $this->pgsqlRunScriptFile($sPostcodeFilename); - } else { - warn('optional external GB postcode table file ('.$sPostcodeFilename.') not found. Skipping.'); - } - - $sPostcodeFilename = CONST_InstallDir.'/us_postcode_data.sql.gz'; - if (file_exists($sPostcodeFilename)) { - $this->pgsqlRunScriptFile($sPostcodeFilename); - } else { - warn('optional external US postcode table file ('.$sPostcodeFilename.') not found. Skipping.'); - } - - - $this->db()->exec('TRUNCATE location_postcode'); - - $sSQL = 'INSERT INTO location_postcode'; - $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) '; - $sSQL .= "SELECT nextval('seq_place'), 1, country_code,"; - $sSQL .= " upper(trim (both ' ' from address->'postcode')) as pc,"; - $sSQL .= ' ST_Centroid(ST_Collect(ST_Centroid(geometry)))'; - $sSQL .= ' FROM placex'; - $sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'"; - $sSQL .= ' AND geometry IS NOT null'; - $sSQL .= ' GROUP BY country_code, pc'; - $this->db()->exec($sSQL); - - // only add postcodes that are not yet available in OSM - $sSQL = 'INSERT INTO location_postcode'; - $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) '; - $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,"; - $sSQL .= ' ST_SetSRID(ST_Point(x,y),4326)'; - $sSQL .= ' FROM us_postcode WHERE postcode NOT IN'; - $sSQL .= ' (SELECT postcode FROM location_postcode'; - $sSQL .= " WHERE country_code = 'us')"; - $this->db()->exec($sSQL); - - // add missing postcodes for GB (if available) - $sSQL = 'INSERT INTO location_postcode'; - $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) '; - $sSQL .= "SELECT nextval('seq_place'), 1, 'gb', postcode, geometry"; - $sSQL .= ' FROM gb_postcode WHERE postcode NOT IN'; - $sSQL .= ' (SELECT postcode FROM location_postcode'; - $sSQL .= " WHERE country_code = 'gb')"; - $this->db()->exec($sSQL); - - if (!$bCMDResultAll) { - $sSQL = "DELETE FROM word WHERE class='place' and type='postcode'"; - $sSQL .= 'and word NOT IN (SELECT postcode FROM location_postcode)'; - $this->db()->exec($sSQL); - } - - $sSQL = 'SELECT count(getorcreate_postcode_id(v)) FROM '; - $sSQL .= '(SELECT distinct(postcode) as v FROM location_postcode) p'; - $this->db()->exec($sSQL); - } - - /** - * Return the connection to the database. - * - * @return Database object. - * - * Creates a new connection if none exists yet. Otherwise reuses the - * already established connection. - */ - private function db() - { - if (is_null($this->oDB)) { - $this->oDB = new \Nominatim\DB(); - $this->oDB->connect(); - } - - return $this->oDB; - } - - private function pgsqlRunScript($sScript, $bfatal = true) - { - runSQLScript( - $sScript, - $bfatal, - $this->bVerbose, - $this->sIgnoreErrors - ); - } - - public function createSqlFunctions() - { - $oCmd = (clone($this->oNominatimCmd)) - ->addParams('refresh', '--functions'); - - if (!$this->bEnableDiffUpdates) { - $oCmd->addParams('--no-diff-updates'); - } - - if ($this->bEnableDebugStatements) { - $oCmd->addParams('--enable-debug-statements'); - } - - $oCmd->run(!$this->sIgnoreErrors); - } - - private function pgsqlRunScriptFile($sFilename) - { - if (!file_exists($sFilename)) fail('unable to find '.$sFilename); - - $oCmd = (new \Nominatim\Shell('psql')) - ->addParams('--port', $this->aDSNInfo['port']) - ->addParams('--dbname', $this->aDSNInfo['database']); - - if (!$this->bVerbose) { - $oCmd->addParams('--quiet'); - } - if (isset($this->aDSNInfo['hostspec'])) { - $oCmd->addParams('--host', $this->aDSNInfo['hostspec']); - } - if (isset($this->aDSNInfo['username'])) { - $oCmd->addParams('--username', $this->aDSNInfo['username']); - } - if (isset($this->aDSNInfo['password'])) { - $oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']); - } - $ahGzipPipes = null; - if (preg_match('/\\.gz$/', $sFilename)) { - $aDescriptors = array( - 0 => array('pipe', 'r'), - 1 => array('pipe', 'w'), - 2 => array('file', '/dev/null', 'a') - ); - $oZcatCmd = new \Nominatim\Shell('zcat', $sFilename); - - $hGzipProcess = proc_open($oZcatCmd->escapedCmd(), $aDescriptors, $ahGzipPipes); - if (!is_resource($hGzipProcess)) fail('unable to start zcat'); - $aReadPipe = $ahGzipPipes[1]; - fclose($ahGzipPipes[0]); - } else { - $oCmd->addParams('--file', $sFilename); - $aReadPipe = array('pipe', 'r'); - } - $aDescriptors = array( - 0 => $aReadPipe, - 1 => array('pipe', 'w'), - 2 => array('file', '/dev/null', 'a') - ); - $ahPipes = null; - - $hProcess = proc_open($oCmd->escapedCmd(), $aDescriptors, $ahPipes, null, $oCmd->aEnv); - if (!is_resource($hProcess)) fail('unable to start pgsql'); - // TODO: error checking - while (!feof($ahPipes[1])) { - echo fread($ahPipes[1], 4096); - } - fclose($ahPipes[1]); - $iReturn = proc_close($hProcess); - if ($iReturn > 0) { - fail("pgsql returned with error code ($iReturn)"); - } - if ($ahGzipPipes) { - fclose($ahGzipPipes[1]); - proc_close($hGzipProcess); - } - } - - private function replaceSqlPatterns($sSql) - { - $sSql = str_replace('{www-user}', getSetting('DATABASE_WEBUSER'), $sSql); - - $aPatterns = array( - '{ts:address-data}' => getSetting('TABLESPACE_ADDRESS_DATA'), - '{ts:address-index}' => getSetting('TABLESPACE_ADDRESS_INDEX'), - '{ts:search-data}' => getSetting('TABLESPACE_SEARCH_DATA'), - '{ts:search-index}' => getSetting('TABLESPACE_SEARCH_INDEX'), - '{ts:aux-data}' => getSetting('TABLESPACE_AUX_DATA'), - '{ts:aux-index}' => getSetting('TABLESPACE_AUX_INDEX') - ); - - foreach ($aPatterns as $sPattern => $sTablespace) { - if ($sTablespace) { - $sSql = str_replace($sPattern, 'TABLESPACE "'.$sTablespace.'"', $sSql); - } else { - $sSql = str_replace($sPattern, '', $sSql); - } - } - - return $sSql; - } -} diff --git a/lib-php/setup_functions.php b/lib-php/setup_functions.php index c89db534..179b0159 100755 --- a/lib-php/setup_functions.php +++ b/lib-php/setup_functions.php @@ -1,20 +1,5 @@ 'name'); + bnd_name := lower(bnd.name->'name'); IF bnd_name = '' THEN bnd_name := NULL; END IF; @@ -180,12 +180,14 @@ BEGIN IF bnd.extratags ? 'place' and bnd_name is not null THEN FOR linked_placex IN SELECT * FROM placex - WHERE make_standard_name(name->'name') = bnd_name + WHERE (position(lower(name->'name') in bnd_name) > 0 + OR position(bnd_name in lower(name->'name')) > 0) AND placex.class = 'place' AND placex.type = bnd.extratags->'place' AND placex.osm_type = 'N' AND placex.linked_place_id is null AND placex.rank_search < 26 -- needed to select the right index - AND _st_covers(bnd.geometry, placex.geometry) + AND placex.type != 'postcode' + AND ST_Covers(bnd.geometry, placex.geometry) LOOP {% if debug %}RAISE WARNING 'Found type-matching place node %', linked_placex.osm_id;{% endif %} RETURN linked_placex; @@ -201,7 +203,7 @@ BEGIN AND placex.linked_place_id is null AND placex.rank_search < 26 AND _st_covers(bnd.geometry, placex.geometry) - ORDER BY make_standard_name(name->'name') = bnd_name desc + ORDER BY lower(name->'name') = bnd_name desc LOOP {% if debug %}RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;{% endif %} RETURN linked_placex; @@ -213,7 +215,7 @@ BEGIN {% if debug %}RAISE WARNING 'Looking for nodes with matching names';{% endif %} FOR linked_placex IN SELECT placex.* from placex - WHERE make_standard_name(name->'name') = bnd_name + WHERE lower(name->'name') = bnd_name AND ((bnd.rank_address > 0 and bnd.rank_address = (compute_place_rank(placex.country_code, 'N', placex.class, @@ -221,9 +223,11 @@ BEGIN false, placex.postcode)).address_rank) OR (bnd.rank_address = 0 and placex.rank_search = bnd.rank_search)) AND placex.osm_type = 'N' + AND placex.class = 'place' AND placex.linked_place_id is null AND placex.rank_search < 26 -- needed to select the right index - AND _st_covers(bnd.geometry, placex.geometry) + AND placex.type != 'postcode' + AND ST_Covers(bnd.geometry, placex.geometry) LOOP {% if debug %}RAISE WARNING 'Found matching place node %', linked_placex.osm_id;{% endif %} RETURN linked_placex; diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql index c121a963..a6f7cf95 100644 --- a/lib-sql/indices.sql +++ b/lib-sql/indices.sql @@ -23,12 +23,6 @@ CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_lookupPolyg AND rank_address between 4 and 25 AND type != 'postcode' AND name is not null AND indexed_status = 0 AND linked_place_id is null; -CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_placeNode - ON placex USING gist (geometry) {{db.tablespace.search_index}} - WHERE osm_type = 'N' AND rank_search between 5 and 25 - AND class = 'place' AND type != 'postcode' - AND name is not null AND indexed_status = 0 AND linked_place_id is null; - CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}}; diff --git a/lib-sql/tables.sql b/lib-sql/tables.sql index 329eb7a1..aa213dba 100644 --- a/lib-sql/tables.sql +++ b/lib-sql/tables.sql @@ -184,7 +184,10 @@ CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {{db.tabl CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL; CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}; CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}}; -CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name')) {{db.tablespace.address_index}} WHERE osm_type='N' and rank_search < 26; +CREATE INDEX idx_placex_geometry_placenode ON placex + USING GIST (geometry) {{db.tablespace.search_index}} + WHERE osm_type = 'N' and rank_search < 26 + and class = 'place' and type != 'postcode' and linked_place_id is null; CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26; DROP SEQUENCE IF EXISTS seq_place; diff --git a/nominatim/cli.py b/nominatim/cli.py index e162d1a6..d560676e 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -8,12 +8,12 @@ import sys import argparse from pathlib import Path -from .config import Configuration -from .tools.exec_utils import run_legacy_script, run_php_server -from .errors import UsageError -from . import clicmd -from .clicmd.args import NominatimArgs -from .tools import tiger_data +from nominatim.config import Configuration +from nominatim.tools.exec_utils import run_legacy_script, run_php_server +from nominatim.errors import UsageError +from nominatim import clicmd +from nominatim.clicmd.args import NominatimArgs +from nominatim.tools import tiger_data LOG = logging.getLogger() @@ -273,8 +273,6 @@ def get_set_parser(**kwargs): else: parser.parser.epilog = 'php-cgi not found. Query commands not available.' - parser.add_subcommand('transition', clicmd.AdminTransition) - return parser diff --git a/nominatim/clicmd/__init__.py b/nominatim/clicmd/__init__.py index ca64f363..f905fed1 100644 --- a/nominatim/clicmd/__init__.py +++ b/nominatim/clicmd/__init__.py @@ -2,12 +2,11 @@ Subcommand definitions for the command-line tool. """ -from .setup import SetupAll -from .replication import UpdateReplication -from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus -from .index import UpdateIndex -from .refresh import UpdateRefresh -from .admin import AdminFuncs -from .freeze import SetupFreeze -from .transition import AdminTransition -from .special_phrases import ImportSpecialPhrases +from nominatim.clicmd.setup import SetupAll +from nominatim.clicmd.replication import UpdateReplication +from nominatim.clicmd.api import APISearch, APIReverse, APILookup, APIDetails, APIStatus +from nominatim.clicmd.index import UpdateIndex +from nominatim.clicmd.refresh import UpdateRefresh +from nominatim.clicmd.admin import AdminFuncs +from nominatim.clicmd.freeze import SetupFreeze +from nominatim.clicmd.special_phrases import ImportSpecialPhrases diff --git a/nominatim/clicmd/admin.py b/nominatim/clicmd/admin.py index 03d7ca8a..e9980772 100644 --- a/nominatim/clicmd/admin.py +++ b/nominatim/clicmd/admin.py @@ -3,8 +3,8 @@ Implementation of the 'admin' subcommand. """ import logging -from ..tools.exec_utils import run_legacy_script -from ..db.connection import connect +from nominatim.tools.exec_utils import run_legacy_script +from nominatim.db.connection import connect # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 diff --git a/nominatim/clicmd/api.py b/nominatim/clicmd/api.py index c3e869b8..a5556952 100644 --- a/nominatim/clicmd/api.py +++ b/nominatim/clicmd/api.py @@ -3,7 +3,7 @@ Subcommand definitions for API calls from the command line. """ import logging -from ..tools.exec_utils import run_api_script +from nominatim.tools.exec_utils import run_api_script # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 diff --git a/nominatim/clicmd/freeze.py b/nominatim/clicmd/freeze.py index 1b311e97..8a6c928e 100644 --- a/nominatim/clicmd/freeze.py +++ b/nominatim/clicmd/freeze.py @@ -2,7 +2,7 @@ Implementation of the 'freeze' subcommand. """ -from ..db.connection import connect +from nominatim.db.connection import connect # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 diff --git a/nominatim/clicmd/index.py b/nominatim/clicmd/index.py index 0225c5ed..8fd4f601 100644 --- a/nominatim/clicmd/index.py +++ b/nominatim/clicmd/index.py @@ -3,8 +3,8 @@ Implementation of the 'index' subcommand. """ import psutil -from ..db import status -from ..db.connection import connect +from nominatim.db import status +from nominatim.db.connection import connect # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index 9dca4e42..6a208344 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -4,7 +4,7 @@ Implementation of 'refresh' subcommand. import logging from pathlib import Path -from ..db.connection import connect +from nominatim.db.connection import connect # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 diff --git a/nominatim/clicmd/replication.py b/nominatim/clicmd/replication.py index f9c5561a..f8417bd1 100644 --- a/nominatim/clicmd/replication.py +++ b/nominatim/clicmd/replication.py @@ -6,9 +6,9 @@ import logging import socket import time -from ..db import status -from ..db.connection import connect -from ..errors import UsageError +from nominatim.db import status +from nominatim.db.connection import connect +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index 92d06943..fb7abdec 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -6,11 +6,10 @@ from pathlib import Path import psutil -from ..tools.exec_utils import run_legacy_script -from ..db.connection import connect -from ..db import status, properties -from ..version import NOMINATIM_VERSION -from ..errors import UsageError +from nominatim.db.connection import connect +from nominatim.db import status, properties +from nominatim.version import NOMINATIM_VERSION +from nominatim.errors import UsageError # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 @@ -56,6 +55,7 @@ class SetupAll: from ..tools import database_import from ..tools import refresh from ..indexer.indexer import Indexer + from ..tools import postcodes if args.osm_file and not Path(args.osm_file).is_file(): LOG.fatal("OSM file '%s' does not exist.", args.osm_file) @@ -105,21 +105,23 @@ class SetupAll: LOG.error('Wikipedia importance dump file not found. ' 'Will be using default importances.') + if args.continue_at is None or args.continue_at == 'load-data': LOG.warning('Initialise tables') with connect(args.config.get_libpq_dsn()) as conn: database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY) - if args.continue_at is None or args.continue_at == 'load-data': LOG.warning('Load data into placex table') database_import.load_data(args.config.get_libpq_dsn(), args.data_dir, args.threads or psutil.cpu_count() or 1) LOG.warning('Calculate postcodes') - run_legacy_script('setup.php', '--calculate-postcodes', - nominatim_env=args, throw_on_fail=not args.ignore_errors) + postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir) if args.continue_at is None or args.continue_at in ('load-data', 'indexing'): + if args.continue_at is not None and args.continue_at != 'load-data': + with connect(args.config.get_libpq_dsn()) as conn: + SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX) LOG.warning('Indexing places') indexer = Indexer(args.config.get_libpq_dsn(), args.threads or psutil.cpu_count() or 1) @@ -149,3 +151,25 @@ class SetupAll: '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION)) return 0 + + + @staticmethod + def _create_pending_index(conn, tablespace): + """ Add a supporting index for finding places still to be indexed. + + This index is normally created at the end of the import process + for later updates. When indexing was partially done, then this + index can greatly improve speed going through already indexed data. + """ + if conn.index_exists('idx_placex_pendingsector'): + return + + with conn.cursor() as cur: + LOG.warning('Creating support index') + if tablespace: + tablespace = 'TABLESPACE ' + tablespace + cur.execute("""CREATE INDEX idx_placex_pendingsector + ON placex USING BTREE (rank_address,geometry_sector) + {} WHERE indexed_status > 0 + """.format(tablespace)) + conn.commit() diff --git a/nominatim/clicmd/transition.py b/nominatim/clicmd/transition.py deleted file mode 100644 index c9341f49..00000000 --- a/nominatim/clicmd/transition.py +++ /dev/null @@ -1,146 +0,0 @@ -""" -Implementation of the 'transition' subcommand. - -This subcommand provides standins for functions that were available -through the PHP scripts but are now no longer directly accessible. -This module will be removed as soon as the transition phase is over. -""" -import logging -from pathlib import Path - -from ..db.connection import connect -from ..db import status -from ..errors import UsageError - -# Do not repeat documentation of subcommand classes. -# pylint: disable=C0111 -# Using non-top-level imports to avoid eventually unused imports. -# pylint: disable=E0012,C0415 - -LOG = logging.getLogger() - -class AdminTransition: - """\ - Internal functions for code transition. Do not use. - """ - - @staticmethod - def add_args(parser): - group = parser.add_argument_group('Sub-functions') - group.add_argument('--create-db', action='store_true', - help='Create nominatim db') - group.add_argument('--setup-db', action='store_true', - help='Build a blank nominatim db') - group.add_argument('--import-data', action='store_true', - help='Import a osm file') - group.add_argument('--load-data', action='store_true', - help='Copy data to live tables from import table') - group.add_argument('--create-tables', action='store_true', - help='Create main tables') - group.add_argument('--create-partition-tables', action='store_true', - help='Create required partition tables') - group.add_argument('--index', action='store_true', - help='Index the data') - group.add_argument('--create-search-indices', action='store_true', - help='Create additional indices required for search and update') - group.add_argument('--create-country-names', action='store_true', - help='Create search index for default country names.') - group = parser.add_argument_group('Options') - group.add_argument('--no-partitions', action='store_true', - help='Do not partition search indices') - group.add_argument('--osm-file', metavar='FILE', - help='File to import') - group.add_argument('--drop', action='store_true', - help='Drop tables needed for updates, making the database readonly') - group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, - help='Size of cache to be used by osm2pgsql (in MB)') - group.add_argument('--no-analyse', action='store_true', - help='Do not perform analyse operations during index') - group.add_argument('--ignore-errors', action='store_true', - help="Ignore certain erros on import.") - group.add_argument('--reverse-only', action='store_true', - help='Do not create search tables and indexes') - group.add_argument('--tiger-data', metavar='FILE', - help='File to import') - - @staticmethod - def run(args): # pylint: disable=too-many-statements - from ..tools import database_import, tiger_data - from ..tools import refresh - - if args.create_db: - LOG.warning('Create DB') - database_import.create_db(args.config.get_libpq_dsn()) - - if args.setup_db: - LOG.warning('Setup DB') - - with connect(args.config.get_libpq_dsn()) as conn: - database_import.setup_extensions(conn) - database_import.install_module(args.module_dir, args.project_dir, - args.config.DATABASE_MODULE_PATH, - conn=conn) - - database_import.import_base_data(args.config.get_libpq_dsn(), - args.data_dir, args.no_partitions) - - if args.import_data: - LOG.warning('Import data') - if not args.osm_file: - raise UsageError('Missing required --osm-file argument') - database_import.import_osm_data(Path(args.osm_file), - args.osm2pgsql_options(0, 1), - drop=args.drop, - ignore_errors=args.ignore_errors) - - if args.create_tables: - LOG.warning('Create Tables') - with connect(args.config.get_libpq_dsn()) as conn: - database_import.create_tables(conn, args.config, args.sqllib_dir, args.reverse_only) - refresh.load_address_levels_from_file(conn, Path(args.config.ADDRESS_LEVEL_CONFIG)) - refresh.create_functions(conn, args.config, args.sqllib_dir, - enable_diff_updates=False) - database_import.create_table_triggers(conn, args.config, args.sqllib_dir) - - if args.create_partition_tables: - LOG.warning('Create Partition Tables') - with connect(args.config.get_libpq_dsn()) as conn: - database_import.create_partition_tables(conn, args.config, args.sqllib_dir) - - if args.load_data: - LOG.warning('Load data') - with connect(args.config.get_libpq_dsn()) as conn: - database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY) - database_import.load_data(args.config.get_libpq_dsn(), - args.data_dir, - args.threads or 1) - - with connect(args.config.get_libpq_dsn()) as conn: - try: - status.set_status(conn, status.compute_database_date(conn)) - except Exception as exc: # pylint: disable=broad-except - LOG.error('Cannot determine date of database: %s', exc) - - if args.index: - LOG.warning('Indexing') - from ..indexer.indexer import Indexer - indexer = Indexer(args.config.get_libpq_dsn(), args.threads or 1) - indexer.index_full() - - if args.create_search_indices: - LOG.warning('Create Search indices') - with connect(args.config.get_libpq_dsn()) as conn: - database_import.create_search_indices(conn, args.config, args.sqllib_dir, args.drop) - - if args.tiger_data: - LOG.warning('Tiger data') - tiger_data.add_tiger_data(args.config.get_libpq_dsn(), - args.tiger_data, - args.threads or 1, - args.config, - args.sqllib_dir) - - if args.create_country_names: - LOG.warning('Create search index for default country names.') - with connect(args.config.get_libpq_dsn()) as conn: - database_import.create_country_names(conn, args.config) diff --git a/nominatim/config.py b/nominatim/config.py index a22f90ab..d4645b93 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -7,7 +7,7 @@ from pathlib import Path from dotenv import dotenv_values -from .errors import UsageError +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py index 5aa05ced..ac8d7c85 100644 --- a/nominatim/db/connection.py +++ b/nominatim/db/connection.py @@ -9,7 +9,7 @@ import psycopg2 import psycopg2.extensions import psycopg2.extras -from ..errors import UsageError +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/db/status.py b/nominatim/db/status.py index 225638f4..e63a40f9 100644 --- a/nominatim/db/status.py +++ b/nominatim/db/status.py @@ -5,8 +5,8 @@ import datetime as dt import logging import re -from ..tools.exec_utils import get_url -from ..errors import UsageError +from nominatim.tools.exec_utils import get_url +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py index 0a2e2c06..b376940d 100644 --- a/nominatim/db/utils.py +++ b/nominatim/db/utils.py @@ -5,8 +5,8 @@ import subprocess import logging import gzip -from .connection import get_pg_env -from ..errors import UsageError +from nominatim.db.connection import get_pg_env +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index 06c05e1d..4f4de218 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -7,8 +7,8 @@ import select import psycopg2 -from .progress import ProgressLogger -from ..db.async_connection import DBConnection +from nominatim.indexer.progress import ProgressLogger +from nominatim.db.async_connection import DBConnection LOG = logging.getLogger() diff --git a/nominatim/tools/admin.py b/nominatim/tools/admin.py index 119adf37..2e18cb6f 100644 --- a/nominatim/tools/admin.py +++ b/nominatim/tools/admin.py @@ -3,7 +3,7 @@ Functions for database analysis and maintenance. """ import logging -from ..errors import UsageError +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py index d8ab08cc..265f8666 100644 --- a/nominatim/tools/check_database.py +++ b/nominatim/tools/check_database.py @@ -6,8 +6,8 @@ from textwrap import dedent import psycopg2 -from ..db.connection import connect -from ..errors import UsageError +from nominatim.db.connection import connect +from nominatim.errors import UsageError CHECKLIST = [] @@ -84,7 +84,8 @@ def _get_indexes(conn): 'idx_placex_rank_address', 'idx_placex_parent_place_id', 'idx_placex_geometry_reverse_lookuppolygon', - 'idx_placex_geometry_reverse_placenode', + 'idx_placex_geometry_placenode', + 'idx_placex_housenumber', 'idx_osmline_parent_place_id', 'idx_osmline_parent_osm_id', 'idx_postcode_id', diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 433cd8af..964bc702 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -11,13 +11,13 @@ from pathlib import Path import psutil import psycopg2 -from ..db.connection import connect, get_pg_env -from ..db import utils as db_utils -from ..db.async_connection import DBConnection -from ..db.sql_preprocessor import SQLPreprocessor -from .exec_utils import run_osm2pgsql -from ..errors import UsageError -from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION +from nominatim.db.connection import connect, get_pg_env +from nominatim.db import utils as db_utils +from nominatim.db.async_connection import DBConnection +from nominatim.db.sql_preprocessor import SQLPreprocessor +from nominatim.tools.exec_utils import run_osm2pgsql +from nominatim.errors import UsageError +from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION LOG = logging.getLogger() diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index e6b9d8d4..96679d27 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -6,8 +6,8 @@ import subprocess import urllib.request as urlrequest from urllib.parse import urlencode -from ..version import NOMINATIM_VERSION -from ..db.connection import get_pg_env +from nominatim.version import NOMINATIM_VERSION +from nominatim.db.connection import get_pg_env LOG = logging.getLogger() diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index b5f0b80e..07fd2ec5 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -3,11 +3,11 @@ Functions for database migration to newer software versions. """ import logging -from ..db import properties -from ..db.connection import connect -from ..version import NOMINATIM_VERSION -from . import refresh, database_import -from ..errors import UsageError +from nominatim.db import properties +from nominatim.db.connection import connect +from nominatim.version import NOMINATIM_VERSION +from nominatim.tools import refresh, database_import +from nominatim.errors import UsageError LOG = logging.getLogger() @@ -156,3 +156,20 @@ def change_housenumber_transliteration(conn, **_): cur.execute("""UPDATE placex SET housenumber = create_housenumber_id(housenumber) WHERE housenumber is not null""") + + +@_migration(3, 7, 0, 0) +def switch_placenode_geometry_index(conn, **_): + """ Replace idx_placex_geometry_reverse_placeNode index. + + Make the index slightly more permissive, so that it can also be used + when matching up boundaries and place nodes. It makes the index + idx_placex_adminname index unnecessary. + """ + with conn.cursor() as cur: + cur.execute(""" CREATE INDEX IF NOT EXISTS idx_placex_geometry_placenode ON placex + USING GIST (geometry) + WHERE osm_type = 'N' and rank_search < 26 + and class = 'place' and type != 'postcode' + and linked_place_id is null""") + cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """) diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py new file mode 100644 index 00000000..0a568cba --- /dev/null +++ b/nominatim/tools/postcodes.py @@ -0,0 +1,80 @@ +""" +Functions for importing, updating and otherwise maintaining the table +of artificial postcode centroids. +""" + +from nominatim.db.utils import execute_file +from nominatim.db.connection import connect + +def import_postcodes(dsn, project_dir): + """ Set up the initial list of postcodes. + """ + + with connect(dsn) as conn: + conn.drop_table('gb_postcode') + conn.drop_table('us_postcode') + + with conn.cursor() as cur: + cur.execute("""CREATE TABLE gb_postcode ( + id integer, + postcode character varying(9), + geometry GEOMETRY(Point, 4326))""") + + with conn.cursor() as cur: + cur.execute("""CREATE TABLE us_postcode ( + postcode text, + x double precision, + y double precision)""") + conn.commit() + + gb_postcodes = project_dir / 'gb_postcode_data.sql.gz' + if gb_postcodes.is_file(): + execute_file(dsn, gb_postcodes) + + us_postcodes = project_dir / 'us_postcode_data.sql.gz' + if us_postcodes.is_file(): + execute_file(dsn, us_postcodes) + + with conn.cursor() as cur: + cur.execute("TRUNCATE location_postcode") + cur.execute(""" + INSERT INTO location_postcode + (place_id, indexed_status, country_code, postcode, geometry) + SELECT nextval('seq_place'), 1, country_code, + upper(trim (both ' ' from address->'postcode')) as pc, + ST_Centroid(ST_Collect(ST_Centroid(geometry))) + FROM placex + WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%' + AND geometry IS NOT null + GROUP BY country_code, pc + """) + + cur.execute(""" + INSERT INTO location_postcode + (place_id, indexed_status, country_code, postcode, geometry) + SELECT nextval('seq_place'), 1, 'us', postcode, + ST_SetSRID(ST_Point(x,y),4326) + FROM us_postcode WHERE postcode NOT IN + (SELECT postcode FROM location_postcode + WHERE country_code = 'us') + """) + + cur.execute(""" + INSERT INTO location_postcode + (place_id, indexed_status, country_code, postcode, geometry) + SELECT nextval('seq_place'), 1, 'gb', postcode, geometry + FROM gb_postcode WHERE postcode NOT IN + (SELECT postcode FROM location_postcode + WHERE country_code = 'gb') + """) + + cur.execute(""" + DELETE FROM word WHERE class='place' and type='postcode' + and word NOT IN (SELECT postcode FROM location_postcode) + """) + + cur.execute(""" + SELECT count(getorcreate_postcode_id(v)) FROM + (SELECT distinct(postcode) as v FROM location_postcode) p + """) + conn.commit() diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index 581c69e8..77eecf04 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -7,9 +7,9 @@ from textwrap import dedent from psycopg2.extras import execute_values -from ..db.utils import execute_file -from ..db.sql_preprocessor import SQLPreprocessor -from ..version import NOMINATIM_VERSION +from nominatim.db.utils import execute_file +from nominatim.db.sql_preprocessor import SQLPreprocessor +from nominatim.version import NOMINATIM_VERSION LOG = logging.getLogger() diff --git a/nominatim/tools/replication.py b/nominatim/tools/replication.py index a0a741e8..d6e80891 100644 --- a/nominatim/tools/replication.py +++ b/nominatim/tools/replication.py @@ -6,9 +6,9 @@ from enum import Enum import logging import time -from ..db import status -from .exec_utils import run_osm2pgsql -from ..errors import UsageError +from nominatim.db import status +from nominatim.tools.exec_utils import run_osm2pgsql +from nominatim.errors import UsageError try: from osmium.replication.server import ReplicationServer diff --git a/nominatim/tools/special_phrases.py b/nominatim/tools/special_phrases.py index fd46a18d..9d0259dc 100644 --- a/nominatim/tools/special_phrases.py +++ b/nominatim/tools/special_phrases.py @@ -27,11 +27,22 @@ class SpecialPhrasesImporter(): self.black_list, self.white_list = self._load_white_and_black_lists() #Compile the regex here to increase performances. self.occurence_pattern = re.compile( - r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])' + r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' ) self.sanity_check_pattern = re.compile(r'^\w+$') self.transliterator = Transliterator.createFromRules("special-phrases normalizer", self.config.TERM_NORMALIZATION) + #This set will contain all existing phrases from the word table which + #no longer exist on the wiki. + #It contain tuples with the following format: (normalized_word, class, type, operator) + self.words_phrases_to_delete = set() + #This set will contain the phrases which still exist from the wiki. + #It is used to prevent duplicates on the wiki by removing them from + #the word_phrases_to_delete only at the end. + self.words_phrases_still_exist = set() + #This set will contain all existing place_classtype tables which doesn't match any + #special phrases class/type on the wiki. + self.table_phrases_to_delete = set() def import_from_wiki(self, languages=None): """ @@ -41,6 +52,9 @@ class SpecialPhrasesImporter(): if languages is not None and not isinstance(languages, list): raise TypeError('The \'languages\' argument should be of type list.') + self._fetch_existing_words_phrases() + self._fetch_existing_place_classtype_tables() + #Get all languages to process. languages = self._load_languages() if not languages else languages @@ -53,9 +67,46 @@ class SpecialPhrasesImporter(): class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang)) self._create_place_classtype_table_and_indexes(class_type_pairs) + self._remove_non_existent_phrases_from_db() self.db_connection.commit() LOG.warning('Import done.') + def _fetch_existing_words_phrases(self): + """ + Fetch existing special phrases from the word table. + Fill the word_phrases_to_delete set of the class. + """ + #Only extract special phrases terms: + #If class=place and type=house then it is a housenumber term. + #If class=place and type=postcode then it is a postcode term. + word_query = """ + SELECT word, class, type, operator FROM word + WHERE class != 'place' OR (type != 'house' AND type != 'postcode') + """ + with self.db_connection.cursor() as db_cursor: + db_cursor.execute(SQL(word_query)) + for row in db_cursor: + row[3] = '-' if row[3] is None else row[3] + self.words_phrases_to_delete.add( + (row[0], row[1], row[2], row[3]) + ) + + def _fetch_existing_place_classtype_tables(self): + """ + Fetch existing place_classtype tables. + Fill the table_phrases_to_delete set of the class. + """ + query = """ + SELECT table_name + FROM information_schema.tables + WHERE table_schema='public' + AND table_name like 'place_classtype_%'; + """ + with self.db_connection.cursor() as db_cursor: + db_cursor.execute(SQL(query)) + for row in db_cursor: + self.table_phrases_to_delete.add(row[0]) + def _load_white_and_black_lists(self): """ Load white and black lists from phrases-settings.json. @@ -80,7 +131,7 @@ class SpecialPhrasesImporter(): 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu', 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl', 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi'] - return self.config.LANGUAGES or default_languages + return self.config.LANGUAGES.split(',') if self.config.LANGUAGES else default_languages @staticmethod def _get_wiki_content(lang): @@ -102,8 +153,10 @@ class SpecialPhrasesImporter(): class_matchs = self.sanity_check_pattern.findall(phrase_class) if len(class_matchs) < 1 or len(type_matchs) < 1: - raise UsageError("Bad class/type for language {}: {}={}".format( - lang, phrase_class, phrase_type)) + LOG.warning("Bad class/type for language %s: %s=%s. It will not be imported", + lang, phrase_class, phrase_type) + return False + return True def _process_xml_content(self, xml_content, lang): """ @@ -122,12 +175,11 @@ class SpecialPhrasesImporter(): phrase_class = match[1].strip() phrase_type = match[2].strip() phrase_operator = match[3].strip() + #Needed if some operator in the wiki are not written in english + phrase_operator = '-' if phrase_operator not in ('near', 'in') else phrase_operator #hack around a bug where building=yes was imported with quotes into the wiki phrase_type = re.sub(r'\"|"', '', phrase_type) - #sanity check, in case somebody added garbage in the wiki - self._check_sanity(lang, phrase_class, phrase_type) - #blacklisting: disallow certain class/type combinations if ( phrase_class in self.black_list.keys() and @@ -141,7 +193,23 @@ class SpecialPhrasesImporter(): ): continue - #add class/type to the pairs dict + #Check if the phrase already exists in the database. + if ( + (normalized_label, phrase_class, phrase_type, phrase_operator) + in self.words_phrases_to_delete + ): + #Remove this phrase from the ones to delete as it still exist on the wiki. + self.words_phrases_still_exist.add( + (normalized_label, phrase_class, phrase_type, phrase_operator) + ) + class_type_pairs.add((phrase_class, phrase_type)) + #Dont need to add this phrase as it already exists in the word table. + continue + + #sanity check, in case somebody added garbage in the wiki + if not self._check_sanity(lang, phrase_class, phrase_type): + continue + class_type_pairs.add((phrase_class, phrase_type)) self._process_amenity( @@ -191,6 +259,15 @@ class SpecialPhrasesImporter(): phrase_class = pair[0] phrase_type = pair[1] + table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) + + if table_name in self.table_phrases_to_delete: + #Remove this table from the ones to delete as it match a class/type + #still existing on the special phrases of the wiki. + self.table_phrases_to_delete.remove(table_name) + #So dont need to create the table and indexes. + continue + #Table creation self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type) @@ -251,6 +328,41 @@ class SpecialPhrasesImporter(): .format(Identifier(table_name), Identifier(self.config.DATABASE_WEBUSER))) + def _remove_non_existent_phrases_from_db(self): + """ + Remove special phrases which doesn't exist on the wiki anymore. + Delete from the word table and delete the place_classtype tables. + """ + LOG.warning('Cleaning database...') + self.words_phrases_to_delete = self.words_phrases_to_delete - self.words_phrases_still_exist + #Array containing all queries to execute. Contain tuples of format (query, parameters) + queries_parameters = [] + + #Delete phrases from the word table which are not on the wiki anymore. + for phrase_to_delete in self.words_phrases_to_delete: + if phrase_to_delete[3] == '-': + query = """ + DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null + """ + parameters = (phrase_to_delete[0], phrase_to_delete[1], phrase_to_delete[2], ) + queries_parameters.append((query, parameters)) + else: + query = """ + DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator = %s + """ + parameters = (phrase_to_delete[0], phrase_to_delete[1], + phrase_to_delete[2], phrase_to_delete[3], ) + queries_parameters.append((query, parameters)) + + #Delete place_classtype tables corresponding to class/type which are not on the wiki anymore + for table in self.table_phrases_to_delete: + query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table)) + queries_parameters.append((query, ())) + + with self.db_connection.cursor() as db_cursor: + for query, parameters in queries_parameters: + db_cursor.execute(query, parameters) + def _convert_php_settings_if_needed(self, file_path): """ Convert php settings file of special phrases to json file if it is still in php format. diff --git a/nominatim/tools/tiger_data.py b/nominatim/tools/tiger_data.py index 9b960e2d..c1de3615 100644 --- a/nominatim/tools/tiger_data.py +++ b/nominatim/tools/tiger_data.py @@ -6,9 +6,9 @@ import os import tarfile import selectors -from ..db.connection import connect -from ..db.async_connection import DBConnection -from ..db.sql_preprocessor import SQLPreprocessor +from nominatim.db.connection import connect +from nominatim.db.async_connection import DBConnection +from nominatim.db.sql_preprocessor import SQLPreprocessor LOG = logging.getLogger() @@ -61,6 +61,20 @@ def handle_threaded_sql_statements(sel, file): except Exception as exc: # pylint: disable=broad-except LOG.info('Wrong SQL statement: %s', exc) +def handle_unregister_connection_pool(sel, place_threads): + """ Handles unregistering pool of connections + """ + + while place_threads > 0: + for key, _ in sel.select(1): + conn = key.data + sel.unregister(conn) + try: + conn.wait() + except Exception as exc: # pylint: disable=broad-except + LOG.info('Wrong SQL statement: %s', exc) + conn.close() + place_threads -= 1 def add_tiger_data(dsn, data_dir, threads, config, sqllib_dir): """ Import tiger data from directory or tar file @@ -95,13 +109,7 @@ def add_tiger_data(dsn, data_dir, threads, config, sqllib_dir): handle_threaded_sql_statements(sel, file) # Unregistering pool of database connections - while place_threads > 0: - for key, _ in sel.select(1): - conn = key.data - sel.unregister(conn) - conn.wait() - conn.close() - place_threads -= 1 + handle_unregister_connection_pool(sel, place_threads) if tar: tar.close() diff --git a/nominatim/version.py b/nominatim/version.py index 52550d19..9670ea60 100644 --- a/nominatim/version.py +++ b/nominatim/version.py @@ -10,7 +10,7 @@ Version information for Nominatim. # and must always be increased when there is a change to the database or code # that requires a migration. # Released versions always have a database patch level of 0. -NOMINATIM_VERSION = (3, 7, 0, 0) +NOMINATIM_VERSION = (3, 7, 0, 1) POSTGRESQL_REQUIRED_VERSION = (9, 3) POSTGIS_REQUIRED_VERSION = (2, 2) diff --git a/osm2pgsql b/osm2pgsql index 497476d5..7869a4e1 160000 --- a/osm2pgsql +++ b/osm2pgsql @@ -1 +1 @@ -Subproject commit 497476d56f7c1fcbbdb95b363293de6ce0feac00 +Subproject commit 7869a4e1255a7657bc8582a58adcf5aa2a3f3c70 diff --git a/test/python/conftest.py b/test/python/conftest.py index 871365d9..0d1cd2f3 100644 --- a/test/python/conftest.py +++ b/test/python/conftest.py @@ -33,8 +33,6 @@ class _TestingCursor(psycopg2.extras.DictCursor): """ Execute a query and return the result as a set of tuples. """ self.execute(sql, params) - if self.rowcount == 1: - return set(tuple(self.fetchone())) return set((tuple(row) for row in self)) diff --git a/test/python/test_cli.py b/test/python/test_cli.py index eb0ee584..afa01e57 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -21,6 +21,7 @@ import nominatim.tools.check_database import nominatim.tools.database_import import nominatim.tools.freeze import nominatim.tools.refresh +import nominatim.tools.postcodes from mocks import MockParamCapture @@ -48,6 +49,7 @@ def mock_run_legacy(monkeypatch): def mock_func_factory(monkeypatch): def get_mock(module, func): mock = MockParamCapture() + mock.func_name = func monkeypatch.setattr(module, func, mock) return mock @@ -96,20 +98,74 @@ def test_import_full(temp_db, mock_func_factory): mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), mock_func_factory(nominatim.tools.database_import, 'create_country_names'), mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'), + mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), mock_func_factory(nominatim.tools.refresh, 'setup_website'), mock_func_factory(nominatim.db.properties, 'set_property') ] cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions') - mock_func_factory(nominatim.clicmd.setup, 'run_legacy_script') assert 0 == call_nominatim('import', '--osm-file', __file__) assert cf_mock.called > 1 for mock in mocks: - assert mock.called == 1 + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + + +def test_import_continue_load_data(temp_db, mock_func_factory): + mocks = [ + mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'), + mock_func_factory(nominatim.tools.database_import, 'load_data'), + mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), + mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'), + mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), + mock_func_factory(nominatim.tools.refresh, 'setup_website'), + mock_func_factory(nominatim.db.properties, 'set_property') + ] + + assert 0 == call_nominatim('import', '--continue', 'load-data') + + for mock in mocks: + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + + +def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp_db_conn): + mocks = [ + mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), + mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), + mock_func_factory(nominatim.tools.refresh, 'setup_website'), + mock_func_factory(nominatim.db.properties, 'set_property') + ] + + assert 0 == call_nominatim('import', '--continue', 'indexing') + + for mock in mocks: + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + + assert temp_db_conn.index_exists('idx_placex_pendingsector') + + # Calling it again still works for the index + assert 0 == call_nominatim('import', '--continue', 'indexing') + assert temp_db_conn.index_exists('idx_placex_pendingsector') + + +def test_import_continue_postprocess(temp_db, mock_func_factory): + mocks = [ + mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), + mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.tools.refresh, 'setup_website'), + mock_func_factory(nominatim.db.properties, 'set_property') + ] + + assert 0 == call_nominatim('import', '--continue', 'db-postprocess') + + for mock in mocks: + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + def test_freeze_command(mock_func_factory, temp_db): mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables') diff --git a/test/python/test_tools_import_special_phrases.py b/test/python/test_tools_import_special_phrases.py index b77ae10d..4890e0b2 100644 --- a/test/python/test_tools_import_special_phrases.py +++ b/test/python/test_tools_import_special_phrases.py @@ -2,6 +2,7 @@ Tests for import special phrases methods of the class SpecialPhrasesImporter. """ +from mocks import MockParamCapture from nominatim.errors import UsageError from pathlib import Path import tempfile @@ -11,19 +12,63 @@ from nominatim.tools.special_phrases import SpecialPhrasesImporter TEST_BASE_DIR = Path(__file__) / '..' / '..' +def test_fetch_existing_words_phrases_basic(special_phrases_importer, word_table, + temp_db_cursor): + """ + Check for the fetch_existing_words_phrases() method. + It should return special phrase term added to the word + table. + """ + query =""" + INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word', + 'class', 'type', null, 0, 'near'); + """ + temp_db_cursor.execute(query) + + assert not special_phrases_importer.words_phrases_to_delete + special_phrases_importer._fetch_existing_words_phrases() + contained_phrase = special_phrases_importer.words_phrases_to_delete.pop() + assert contained_phrase == ('normalized_word', 'class', 'type', 'near') + +@pytest.mark.parametrize("house_type", ['house', 'postcode']) +def test_fetch_existing_words_phrases_special_cases(special_phrases_importer, word_table, + house_type, temp_db_cursor): + """ + Check for the fetch_existing_words_phrases() method. + It should return nothing as the terms added correspond + to a housenumber and postcode term. + """ + query =""" + INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word', + 'place', %s, null, 0, 'near'); + """ + temp_db_cursor.execute(query, (house_type,)) + + special_phrases_importer._fetch_existing_words_phrases() + assert not special_phrases_importer.words_phrases_to_delete + +def test_fetch_existing_place_classtype_tables(special_phrases_importer, temp_db_cursor): + """ + Check for the fetch_existing_place_classtype_tables() method. + It should return the table just created. + """ + temp_db_cursor.execute('CREATE TABLE place_classtype_testclasstypetable()') + + special_phrases_importer._fetch_existing_place_classtype_tables() + contained_table = special_phrases_importer.table_phrases_to_delete.pop() + assert contained_table == 'place_classtype_testclasstypetable' + def test_check_sanity_class(special_phrases_importer): """ Check for _check_sanity() method. If a wrong class or type is given, an UsageError should raise. If a good class and type are given, nothing special happens. """ - with pytest.raises(UsageError): - special_phrases_importer._check_sanity('en', '', 'type') - with pytest.raises(UsageError): - special_phrases_importer._check_sanity('en', 'class', '') + assert not special_phrases_importer._check_sanity('en', '', 'type') + assert not special_phrases_importer._check_sanity('en', 'class', '') - special_phrases_importer._check_sanity('en', 'class', 'type') + assert special_phrases_importer._check_sanity('en', 'class', 'type') def test_load_white_and_black_lists(special_phrases_importer): """ @@ -80,7 +125,7 @@ def test_convert_settings_giving_json(special_phrases_importer): assert returned == json_file def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs, - word_table, temp_db_conn): + temp_db_conn, word_table): """ Test that _process_amenity() execute well the getorcreate_amenityoperator() SQL function and that @@ -90,13 +135,13 @@ def test_process_amenity_with_operator(special_phrases_importer, getorcreate_ame special_phrases_importer._process_amenity('', '', '', '', 'in') with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute("SELECT * FROM temp_with_operator WHERE op='near' OR op='in'") + temp_db_cursor.execute("SELECT * FROM word WHERE operator='near' OR operator='in'") results = temp_db_cursor.fetchall() assert len(results) == 2 def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs, - temp_db_conn): + temp_db_conn, word_table): """ Test that _process_amenity() execute well the getorcreate_amenity() SQL function. @@ -104,7 +149,7 @@ def test_process_amenity_without_operator(special_phrases_importer, getorcreate_ special_phrases_importer._process_amenity('', '', '', '', '') with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute("SELECT * FROM temp_without_operator WHERE op='no_operator'") + temp_db_cursor.execute("SELECT * FROM word WHERE operator='no_operator'") result = temp_db_cursor.fetchone() assert result @@ -154,8 +199,8 @@ def test_grant_access_to_web_user(temp_db_conn, def_config, special_phrases_impo assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, phrase_class, phrase_type) def test_create_place_classtype_table_and_indexes( - temp_db_conn, def_config, placex_table, getorcreate_amenity_funcs, - getorcreate_amenityoperator_funcs, special_phrases_importer): + temp_db_conn, def_config, placex_table, + special_phrases_importer): """ Test that _create_place_classtype_table_and_indexes() create the right place_classtype tables and place_id indexes @@ -171,7 +216,7 @@ def test_create_place_classtype_table_and_indexes( assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1]) assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1]) -def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, +def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, word_table, getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs): """ Test that _process_xml_content() process the given xml content right @@ -188,13 +233,77 @@ def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, assert check_amenities_without_op(temp_db_conn) assert results[class_test] and type_test in results.values() +def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_phrases, + temp_db_conn): + """ + Check for the remove_non_existent_phrases_from_db() method. + + It should removed entries from the word table which are contained + in the words_phrases_to_delete set and not those also contained + in the words_phrases_still_exist set. + + place_classtype tables contained in table_phrases_to_delete should + be deleted. + """ + with temp_db_conn.cursor() as temp_db_cursor: + to_delete_phrase_tuple = ('normalized_word', 'class', 'type', 'near') + to_keep_phrase_tuple = ( + 'normalized_word_exists', 'class_exists', 'type_exists', 'near' + ) + special_phrases_importer.words_phrases_to_delete = { + to_delete_phrase_tuple, + to_keep_phrase_tuple + } + special_phrases_importer.words_phrases_still_exist = { + to_keep_phrase_tuple + } + special_phrases_importer.table_phrases_to_delete = { + 'place_classtype_testclasstypetable_to_delete' + } + + query_words = 'SELECT word, class, type, operator FROM word;' + query_tables = """ + SELECT table_name + FROM information_schema.tables + WHERE table_schema='public' + AND table_name like 'place_classtype_%'; + """ + + special_phrases_importer._remove_non_existent_phrases_from_db() + + temp_db_cursor.execute(query_words) + words_result = temp_db_cursor.fetchall() + temp_db_cursor.execute(query_tables) + tables_result = temp_db_cursor.fetchall() + assert len(words_result) == 1 and words_result[0] == [ + 'normalized_word_exists', 'class_exists', 'type_exists', 'near' + ] + assert (len(tables_result) == 1 and + tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep' + ) + def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table, - getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs): + getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs, word_table): """ Check that the main import_from_wiki() method is well executed. It should create the place_classtype table, the place_id and centroid indexes, grand access to the web user and executing the SQL functions for amenities. + It should also update the database well by deleting or preserving existing entries + of the database. """ + #Add some data to the database before execution in order to test + #what is deleted and what is preserved. + with temp_db_conn.cursor() as temp_db_cursor: + temp_db_cursor.execute(""" + INSERT INTO word VALUES(99999, ' animal shelter', 'animal shelter', + 'amenity', 'animal_shelter', null, 0, null); + + INSERT INTO word VALUES(99999, ' wrong_lookup_token', 'wrong_normalized_word', + 'wrong_class', 'wrong_type', null, 0, 'near'); + + CREATE TABLE place_classtype_amenity_animal_shelter(); + CREATE TABLE place_classtype_wrongclass_wrongtype();""") + monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content) special_phrases_importer.import_from_wiki(['en']) @@ -206,6 +315,45 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test) assert check_amenities_with_op(temp_db_conn) assert check_amenities_without_op(temp_db_conn) + assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter') + assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type') + + #Format (query, should_return_something_bool) use to easily execute all asserts + queries_tests = set() + + #Used to check that the correct phrase already in the word table before is still there. + query_correct_word = "SELECT * FROM word WHERE word = 'animal shelter'" + queries_tests.add((query_correct_word, True)) + + #Used to check if wrong phrase was deleted from the word table of the database. + query_wrong_word = "SELECT word FROM word WHERE word = 'wrong_normalized_word'" + queries_tests.add((query_wrong_word, False)) + + #Used to check that correct place_classtype table already in the datase before is still there. + query_existing_table = """ + SELECT table_name + FROM information_schema.tables + WHERE table_schema='public' + AND table_name = 'place_classtype_amenity_animal_shelter'; + """ + queries_tests.add((query_existing_table, True)) + + #Used to check that wrong place_classtype table was deleted from the database. + query_wrong_table = """ + SELECT table_name + FROM information_schema.tables + WHERE table_schema='public' + AND table_name = 'place_classtype_wrongclass_wrongtype'; + """ + queries_tests.add((query_wrong_table, False)) + + with temp_db_conn.cursor() as temp_db_cursor: + for query in queries_tests: + temp_db_cursor.execute(query[0]) + if (query[1] == True): + assert temp_db_cursor.fetchone() + else: + assert not temp_db_cursor.fetchone() def mock_get_wiki_content(lang): """ @@ -271,7 +419,7 @@ def check_amenities_with_op(temp_db_conn): contains more than one value (so that the SQL function was call more than one time). """ with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute("SELECT * FROM temp_with_operator") + temp_db_cursor.execute("SELECT * FROM word WHERE operator != 'no_operator'") return len(temp_db_cursor.fetchall()) > 1 def check_amenities_without_op(temp_db_conn): @@ -280,7 +428,7 @@ def check_amenities_without_op(temp_db_conn): contains more than one value (so that the SQL function was call more than one time). """ with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute("SELECT * FROM temp_without_operator") + temp_db_cursor.execute("SELECT * FROM word WHERE operator = 'no_operator'") return len(temp_db_cursor.fetchall()) > 1 @pytest.fixture @@ -305,6 +453,18 @@ def temp_phplib_dir_with_migration(): yield Path(phpdir) +@pytest.fixture +def default_phrases(word_table, temp_db_cursor): + temp_db_cursor.execute(""" + INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word', + 'class', 'type', null, 0, 'near'); + + INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word_exists', + 'class_exists', 'type_exists', null, 0, 'near'); + + CREATE TABLE place_classtype_testclasstypetable_to_delete(); + CREATE TABLE place_classtype_testclasstypetable_to_keep();""") + @pytest.fixture def make_strandard_name_func(temp_db_cursor): temp_db_cursor.execute(""" @@ -317,13 +477,12 @@ def make_strandard_name_func(temp_db_cursor): @pytest.fixture def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func): temp_db_cursor.execute(""" - CREATE TABLE temp_without_operator(op TEXT); - CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text) RETURNS void as $$ BEGIN - INSERT INTO temp_without_operator VALUES('no_operator'); + INSERT INTO word VALUES(null, lookup_word, normalized_word, + lookup_class, lookup_type, null, 0, 'no_operator'); END; $$ LANGUAGE plpgsql""") @@ -336,6 +495,7 @@ def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func): lookup_class text, lookup_type text, op text) RETURNS void as $$ BEGIN - INSERT INTO temp_with_operator VALUES(op); + INSERT INTO word VALUES(null, lookup_word, normalized_word, + lookup_class, lookup_type, null, 0, op); END; $$ LANGUAGE plpgsql""") \ No newline at end of file diff --git a/test/python/test_tools_postcodes.py b/test/python/test_tools_postcodes.py new file mode 100644 index 00000000..1fc060b0 --- /dev/null +++ b/test/python/test_tools_postcodes.py @@ -0,0 +1,50 @@ +""" +Tests for functions to maintain the artificial postcode table. +""" + +import pytest + +from nominatim.tools import postcodes + +@pytest.fixture +def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory, + placex_table, word_table): + table_factory('location_postcode', + """ place_id BIGINT, + parent_place_id BIGINT, + rank_search SMALLINT, + rank_address SMALLINT, + indexed_status SMALLINT, + indexed_date TIMESTAMP, + country_code varchar(2), + postcode TEXT, + geometry GEOMETRY(Geometry, 4326)""") + temp_db_cursor.execute('CREATE SEQUENCE seq_place') + temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT) + RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql; + """) + + +def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path): + postcodes.import_postcodes(dsn, tmp_path) + + assert temp_db_cursor.table_exists('gb_postcode') + assert temp_db_cursor.table_exists('us_postcode') + assert temp_db_cursor.table_rows('location_postcode') == 0 + + +def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path): + temp_db_cursor.execute(""" + INSERT INTO placex (place_id, country_code, address, geometry) + VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)') + """) + + postcodes.import_postcodes(dsn, tmp_path) + + rows = temp_db_cursor.row_set(""" SELECT postcode, country_code, + ST_X(geometry), ST_Y(geometry) + FROM location_postcode""") + print(rows) + assert len(rows) == 1 + assert rows == set((('9486', 'xx', 10, 12), )) + diff --git a/test/python/test_tools_tiger_data.py b/test/python/test_tools_tiger_data.py index 5029a132..1366fe3e 100644 --- a/test/python/test_tools_tiger_data.py +++ b/test/python/test_tools_tiger_data.py @@ -16,11 +16,24 @@ def test_add_tiger_data(dsn, src_dir, def_config, tmp_path, sql_preprocessor, temp_db_cursor.execute('CREATE EXTENSION postgis') temp_db_cursor.execute('CREATE TABLE place (id INT)') sqlfile = tmp_path / '1010.sql' - sqlfile.write_text("""INSERT INTO place values (1)""") + sqlfile.write_text("""INSERT INTO place values (1); + INSERT INTO non_existant_table values (1);""") tiger_data.add_tiger_data(dsn, str(tmp_path), threads, def_config, src_dir / 'lib-sql') assert temp_db_cursor.table_rows('place') == 1 +@pytest.mark.parametrize("threads", (1, 5)) +def test_add_tiger_data_bad_file(dsn, src_dir, def_config, tmp_path, sql_preprocessor, + temp_db_cursor, threads, temp_db): + temp_db_cursor.execute('CREATE EXTENSION hstore') + temp_db_cursor.execute('CREATE EXTENSION postgis') + temp_db_cursor.execute('CREATE TABLE place (id INT)') + sqlfile = tmp_path / '1010.txt' + sqlfile.write_text("""Random text""") + tiger_data.add_tiger_data(dsn, str(tmp_path), threads, def_config, src_dir / 'lib-sql') + + assert temp_db_cursor.table_rows('place') == 0 + @pytest.mark.parametrize("threads", (1, 5)) def test_add_tiger_data_tarfile(dsn, src_dir, def_config, tmp_path, temp_db_cursor, threads, temp_db, sql_preprocessor): @@ -28,10 +41,26 @@ def test_add_tiger_data_tarfile(dsn, src_dir, def_config, tmp_path, temp_db_cursor.execute('CREATE EXTENSION postgis') temp_db_cursor.execute('CREATE TABLE place (id INT)') sqlfile = tmp_path / '1010.sql' - sqlfile.write_text("""INSERT INTO place values (1)""") + sqlfile.write_text("""INSERT INTO place values (1); + INSERT INTO non_existant_table values (1);""") + tar = tarfile.open("sample.tar.gz", "w:gz") + tar.add(sqlfile) + tar.close() + tiger_data.add_tiger_data(dsn, str(src_dir / 'sample.tar.gz'), threads, def_config, src_dir / 'lib-sql') + + assert temp_db_cursor.table_rows('place') == 1 + +@pytest.mark.parametrize("threads", (1, 5)) +def test_add_tiger_data_bad_tarfile(dsn, src_dir, def_config, tmp_path, + temp_db_cursor, threads, temp_db, sql_preprocessor): + temp_db_cursor.execute('CREATE EXTENSION hstore') + temp_db_cursor.execute('CREATE EXTENSION postgis') + temp_db_cursor.execute('CREATE TABLE place (id INT)') + sqlfile = tmp_path / '1010.txt' + sqlfile.write_text("""Random text""") tar = tarfile.open("sample.tar.gz", "w:gz") tar.add(sqlfile) tar.close() tiger_data.add_tiger_data(dsn, str(src_dir / 'sample.tar.gz'), threads, def_config, src_dir / 'lib-sql') - assert temp_db_cursor.table_rows('place') == 1 \ No newline at end of file + assert temp_db_cursor.table_rows('place') == 0 \ No newline at end of file diff --git a/test/testdata/special_phrases_test_content.txt b/test/testdata/special_phrases_test_content.txt index bc8c65d0..739ded0d 100644 --- a/test/testdata/special_phrases_test_content.txt +++ b/test/testdata/special_phrases_test_content.txt @@ -3,7 +3,7 @@ OpenStreetMap Wiki wiki https://wiki.openstreetmap.org/wiki/Main_Page -MediaWiki 1.35.1 +MediaWiki 1.35.2 first-letter Media @@ -70,7 +70,7 @@ wikitext text/x-wiki -== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Zip Lines near || aerialway || zip_line || near || Y |- | Zip Wire || aerialway || zip_line || - || N |- | Zip Wires || aerialway || zip_line || - || Y |- | Zip Wire in || aerialway || zip_line || in || N |- | Zip Wires in || aerialway || zip_line || in || Y |- | Zip Wire near || aerialway || zip_line || near || N |} [[Category:Word list]] +== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || near|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || in || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || embassy || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]] cst5x7tt58izti1pxzgljf27tx8qjcj