From: Sarah Hoffmann Date: Sat, 27 May 2017 13:00:17 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~392 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/49ac49228ee208f7b219ee8ea8e1b199d6486245?hp=-c Merge remote-tracking branch 'upstream/master' --- 49ac49228ee208f7b219ee8ea8e1b199d6486245 diff --combined CMakeLists.txt index 9ca95a29,d80c4e6c..2b4df7bd --- a/CMakeLists.txt +++ b/CMakeLists.txt @@@ -50,13 -50,13 +50,13 @@@ find_package(PostgreSQL REQUIRED include_directories(${PostgreSQL_INCLUDE_DIRS}) link_directories(${PostgreSQL_LIBRARY_DIRS}) - find_program(OSMOSIS osmosis) - if (NOT EXISTS "${OSMOSIS}") - set(OSMOSIS_PATH "/nonexistent") - message(WARNING "Osmosis not found (required for updates)") + find_program(PYOSMIUM pyosmium-get-changes) + if (NOT EXISTS "${PYOSMIUM}") + set(PYOSMIUM_PATH "/nonexistent") + message(WARNING "pyosmium-get-changes not found (required for updates)") else() - set(OSMOSIS_PATH "${OSMOSIS}") - message(STATUS "Using osmosis at ${OSMOSIS_PATH}") + set(PYOSMIUM_PATH "${PYOSMIUM}") + message(STATUS "Using pyosmium-get-changes at ${PYOSMIUM_PATH}") endif() @@@ -92,14 -92,6 +92,14 @@@ set(CUSTOMFILE website/reverse.php website/search.php website/status.php + website/403.html + website/509.html + website/crossdomain.xml + website/favicon.ico + website/last_update.php + website/nominatim.xml + website/robots.txt + website/taginfo.json utils/blocks.php utils/country_languages.php utils/imports.php diff --combined lib/lib.php index a0f67ed8,d47aca57..7553117f --- a/lib/lib.php +++ b/lib/lib.php @@@ -33,6 -33,18 +33,18 @@@ function getCacheMemoryMB( return (int)($aMatches[1]/1024); } + function getDatabaseDate(&$oDB) + { + // Find the newest node in the DB + $iLastOSMID = $oDB->getOne("select max(osm_id) from place where osm_type = 'N'"); + // Lookup the timestamp that node was created + $sLastNodeURL = 'http://www.openstreetmap.org/api/0.6/node/'.$iLastOSMID."/1"; + $sLastNodeXML = file_get_contents($sLastNodeURL); + preg_match('#timestamp="(([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})Z)"#', $sLastNodeXML, $aLastNodeDate); + + return $aLastNodeDate[1]; + } + function bySearchRank($a, $b) { @@@ -611,10 -623,10 +623,10 @@@ function geometryText2Points($geometry_ // preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/', $aMatch[1], $aPolyPoints, PREG_SET_ORDER); // - } elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#', $geometry_as_text, $aMatch)) { +/* } elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#', $geometry_as_text, $aMatch)) { // preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/', $aMatch[1], $aPolyPoints, PREG_SET_ORDER); - // + */ } elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#', $geometry_as_text, $aMatch)) { // $aPolyPoints = createPointsAroundCenter($aMatch[1], $aMatch[2], $fRadius); diff --combined utils/update.php index 475e5836,b106c567..ad627482 --- a/utils/update.php +++ b/utils/update.php @@@ -12,8 -12,9 +12,9 @@@ $aCMDOption array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'), array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'), - array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import using osmosis'), - array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import using osmosis forever'), + array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'), + array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'), + array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'), array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolate)'), array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'), @@@ -36,7 -37,7 +37,13 @@@ ); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true); --if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1; ++if (!isset($aResult['index-instances'])) { ++ if (getLoadAverage() < 24) ++ $aResult['index-instances'] = 2; ++ else ++ $aResult['index-instances'] = 1; ++} ++ if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0; date_default_timezone_set('Etc/UTC'); @@@ -57,10 -58,39 +64,39 @@@ if (!is_null(CONST_Osm2pgsql_Flatnode_F $sOsm2pgsqlCmd .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File; } + if ($aResult['init-updates']) { + $sSetup = CONST_InstallPath.'/utils/setup.php'; + $iRet = -1; + passthru($sSetup.' --create-functions --enable-diff-updates', $iRet); + if ($iRet != 0) { + fail('Error running setup script'); + } + + $sDatabaseDate = getDatabaseDate($oDB); + $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', + strtotime($sDatabaseDate) - (3*60*60)); + + // get the appropriate state id + $aOutput = 0; + exec(CONST_Pyosmium_Get_Changes.' -D '.$sWindBack.' --server '.CONST_Replication_Url, + $aOutput, $iRet); + if ($iRet != 0) { + fail('Error running pyosmium tools'); + } + + pg_query($oDB->connection, 'TRUNCATE import_status'); + $sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('"; + $sSQL .= $sDatabaseDate."',".$aOutput[0].", true)"; + if (!pg_query($oDB->connection, $sSQL)) { + fail("Could not enter sequence into database."); + } + + echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n"; + } - if (isset($aResult['import-diff'])) { - // import diff directly (e.g. from osmosis --rri) - $sNextFile = $aResult['import-diff']; + if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { + // import diffs and files directly (e.g. from osmosis --rri) + $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file']; if (!file_exists($sNextFile)) { fail("Cannot open $sNextFile\n"); } @@@ -79,16 -109,6 +115,6 @@@ $sTemporaryFile = CONST_BasePath.'/data/osmosischange.osc'; $bHaveDiff = false; - if (isset($aResult['import-file']) && $aResult['import-file']) { - $bHaveDiff = true; - $sCMD = CONST_Osmosis_Binary.' --read-xml \''.$aResult['import-file'].'\' --read-empty --derive-change --write-xml-change '.$sTemporaryFile; - echo $sCMD."\n"; - exec($sCMD, $sJunk, $iErrorLevel); - if ($iErrorLevel) { - fail("Error converting osm to osc, osmosis returned: $iErrorLevel\n"); - } - } - $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api']; $sContentURL = ''; if (isset($aResult['import-node']) && $aResult['import-node']) { @@@ -116,33 -136,8 +142,8 @@@ if (isset($aResult['import-relation']) } if ($sContentURL) { - $sModifyXMLstr = file_get_contents($sContentURL); + file_put_contents($sTemporaryFile, file_get_contents($sContentURL)); $bHaveDiff = true; - - $aSpec = array( - 0 => array("pipe", "r"), // stdin - 1 => array("pipe", "w"), // stdout - 2 => array("pipe", "w") // stderr - ); - $sCMD = CONST_Osmosis_Binary.' --read-xml - --read-empty --derive-change --write-xml-change '.$sTemporaryFile; - echo $sCMD."\n"; - $hProc = proc_open($sCMD, $aSpec, $aPipes); - if (!is_resource($hProc)) { - fail("Error converting osm to osc, osmosis failed\n"); - } - fwrite($aPipes[0], $sModifyXMLstr); - fclose($aPipes[0]); - $sOut = stream_get_contents($aPipes[1]); - if ($aResult['verbose']) echo $sOut; - fclose($aPipes[1]); - $sErrors = stream_get_contents($aPipes[2]); - if ($aResult['verbose']) echo $sErrors; - fclose($aPipes[2]); - if ($iError = proc_close($hProc)) { - echo $sOut; - echo $sErrors; - fail("Error converting osm to osc, osmosis returned: $iError\n"); - } } if ($bHaveDiff) { @@@ -166,7 -161,7 +167,7 @@@ if ($aResult['deduplicate']) $aPartitions = chksql($oDB->getCol($sSQL)); $aPartitions[] = 0; - // we don't care about empty search_name_* artitions, they can't contain mentions of duplicates + // we don't care about empty search_name_* partitions, they can't contain mentions of duplicates foreach ($aPartitions as $i => $sPartition) { $sSQL = "select count(*) from search_name_".$sPartition; $nEntries = chksql($oDB->getOne($sSQL)); @@@ -236,10 -231,8 +237,8 @@@ if ($aResult['import-osmosis'] || $aRes fail("Error: Update interval too low for download.geofabrik.de. Please check install documentation (http://wiki.openstreetmap.org/wiki/Nominatim/Installation#Updates)\n"); } - $sImportFile = CONST_BasePath.'/data/osmosischange.osc'; - $sOsmosisConfigDirectory = CONST_InstallPath.'/settings'; - $sCMDDownload = CONST_Osmosis_Binary.' --read-replication-interval workingDirectory='.$sOsmosisConfigDirectory.' --simplify-change --write-xml-change '.$sImportFile; - $sCMDCheckReplicationLag = CONST_Osmosis_Binary.' -q --read-replication-lag workingDirectory='.$sOsmosisConfigDirectory; + $sImportFile = CONST_InstallPath.'/osmosischange.osc'; + $sCMDDownload = CONST_Pyosmium_Get_Changes.' --server '.CONST_Replication_Url.' -o '.$sImportFile.' -s '.CONST_Replication_Max_Diff_size; $sCMDImport = $sOsm2pgsqlCmd.' '.$sImportFile; $sCMDIndex = CONST_InstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances']; @@@ -247,112 -240,95 +246,95 @@@ $fStartTime = time(); $iFileSize = 1001; - if (!file_exists($sImportFile)) { - // First check if there are new updates published (except for minutelies - there's always new diffs to process) - if (CONST_Replication_Update_Interval > 60) { - unset($aReplicationLag); - exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel); - while ($iErrorLevel > 0 || $aReplicationLag[0] < 1) { - if ($iErrorLevel) { - echo "Error: $iErrorLevel. "; - echo "Re-trying: ".$sCMDCheckReplicationLag." in ".CONST_Replication_Recheck_Interval." secs\n"; - } else { - echo "."; - } + $aLastState = chksql($oDB->getRow('SELECT * FROM import_status')); + + if (!$aLastState['sequence_id']) { + echo "Updates not set up. Please run ./utils/update.php --init-updates.\n"; + exit(1); + } + + echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n"; + + $sBatchEnd = $aLastState['lastimportdate']; + $iEndSequence = $aLastState['sequence_id']; + + if ($aLastState['indexed'] == 't') { + // Sleep if the update interval has not yet been reached. + $fNextUpdate = $aLastState['lastimportdate'] + CONST_Replication_Update_Interval; + if ($fNextUpdate > $fStartTime) { + $iSleepTime = $fNextUpdate - $fStartTime; + echo "Waiting for next update for $iSleepTime sec."; + sleep($iSleepTime); + } + + // Download the next batch of changes. + unlink($sImportFile); + do { + $fCMDStartTime = time(); + $iNextSeq = (int) $aLastState['sequence_id'] + 1; + unset($aOutput); + echo "$sCMDDownload -I $iNextSeq\n"; + exec($sCMDDownload.' -I '.$iNextSeq, $aOutput, $iResult); + + if ($iResult == 3) { + echo 'No new updates. Sleeping for '.CONST_Replication_Recheck_Interval." sec.\n"; sleep(CONST_Replication_Recheck_Interval); - unset($aReplicationLag); - exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel); + } else if ($iResult != 0) { + echo 'ERROR: updates failed.'; + exit($iResult); + } else { + $iEndSequence = (int)$aOutput[0]; } - // There are new replication files - use osmosis to download the file - echo "\n".date('Y-m-d H:i:s')." Replication Delay is ".$aReplicationLag[0]."\n"; - } - $fStartTime = time(); + } while ($iResult); + + // Import the file $fCMDStartTime = time(); - echo $sCMDDownload."\n"; - exec($sCMDDownload, $sJunk, $iErrorLevel); - while ($iErrorLevel > 0) { - echo "Error: $iErrorLevel\n"; - sleep(60); - echo 'Re-trying: '.$sCMDDownload."\n"; - exec($sCMDDownload, $sJunk, $iErrorLevel); + echo $sCMDImport."\n"; + unset($sJunk); + exec($sCMDImport, $sJunk, $iErrorLevel); + if ($iErrorLevel) { + echo "Error executing osm2pgsql: $iErrorLevel\n"; + exit($iErrorLevel); } + + // write the update logs $iFileSize = filesize($sImportFile); - $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory); - $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','osmosis')"; + $sBatchEnd = getDatabaseDate($oDB); + $sSQL = "INSERT INTO import_osmosis_log (batchend, batchseq, batchsize, starttime, endtime, event) values ('$sBatchEnd',$iEndSequence,$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','import')"; var_Dump($sSQL); - $oDB->query($sSQL); - echo date('Y-m-d H:i:s')." Completed osmosis step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; - } - - $iFileSize = filesize($sImportFile); - $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory); + chksql($oDB->query($sSQL)); - // Import the file - $fCMDStartTime = time(); - echo $sCMDImport."\n"; - exec($sCMDImport, $sJunk, $iErrorLevel); - if ($iErrorLevel) { - echo "Error: $iErrorLevel\n"; - exit($iErrorLevel); + // update the status + $sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence"; + var_Dump($sSQL); + chksql($oDB->query($sSQL)); + echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; } - $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','osm2pgsql')"; - var_Dump($sSQL); - $oDB->query($sSQL); - echo date('Y-m-d H:i:s')." Completed osm2pgsql step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; - - // Archive for debug? - unlink($sImportFile); - - $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory); // Index file - $sThisIndexCmd = $sCMDIndex; - if (!isset($aResult['index-instances'])) { - if (getLoadAverage() < 24) - $iIndexInstances = 2; - else - $iIndexInstances = 1; - } else - $iIndexInstances = $aResult['index-instances']; - - $sThisIndexCmd = $sCMDIndex.' -t '.$iIndexInstances; - $fCMDStartTime = time(); - if (!$aResult['no-index']) { + $sThisIndexCmd = $sCMDIndex; + $fCMDStartTime = time(); + echo "$sThisIndexCmd\n"; exec($sThisIndexCmd, $sJunk, $iErrorLevel); if ($iErrorLevel) { echo "Error: $iErrorLevel\n"; exit($iErrorLevel); } - } - $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','index')"; - var_Dump($sSQL); - $oDB->query($sSQL); - echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; + $sSQL = "INSERT INTO import_osmosis_log (batchend, batchseq, batchsize, starttime, endtime, event) values ('$sBatchEnd',$iEndSequence,$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','index')"; + var_Dump($sSQL); + $oDB->query($sSQL); + echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; - $sSQL = "update import_status set lastimportdate = '$sBatchEnd'"; - $oDB->query($sSQL); + $sSQL = "update import_status set indexed = true"; + $oDB->query($sSQL); + } $fDuration = time() - $fStartTime; echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n"; if (!$aResult['import-osmosis-all']) exit(0); - - if (CONST_Replication_Update_Interval > 60) { - $iSleep = max(0, (strtotime($sBatchEnd)+CONST_Replication_Update_Interval-time())); - } else { - $iSleep = max(0, CONST_Replication_Update_Interval-$fDuration); - } - echo date('Y-m-d H:i:s')." Sleeping $iSleep seconds\n"; - sleep($iSleep); } } - - function getosmosistimestamp($sOsmosisConfigDirectory) - { - $sStateFile = file_get_contents($sOsmosisConfigDirectory.'/state.txt'); - preg_match('#timestamp=(.+)#', $sStateFile, $aResult); - return str_replace('\:', ':', $aResult[1]); - }