4 require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
5 require_once(CONST_BasePath.'/lib/init-cmd.php');
6 ini_set('memory_limit', '800M');
9 "Import / update / index osm data",
10 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
11 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
12 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
14 array('max-load', '', 0, 1, 1, 1, 'float', 'Maximum load average - indexing is paused if this is exceeded'),
15 array('max-blocking', '', 0, 1, 1, 1, 'int', 'Maximum blocking processes - indexing is aborted / paused if this is exceeded'),
17 array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import using osmosis'),
18 array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import using osmosis forever'),
19 array('no-npi', '', 0, 1, 0, 0, 'bool', 'Do not write npi index files'),
20 array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
22 array('import-npi-all', '', 0, 1, 0, 0, 'bool', 'Import npi pre-indexed files'),
24 array('import-hourly', '', 0, 1, 0, 0, 'bool', 'Import hourly diffs'),
25 array('import-daily', '', 0, 1, 0, 0, 'bool', 'Import daily diffs'),
26 array('import-all', '', 0, 1, 0, 0, 'bool', 'Import all available files'),
28 array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
29 array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
30 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
32 array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
33 array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
34 array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
35 array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
37 array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
38 array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
39 array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
40 array('index-estrate', '', 0, 1, 1, 1, 'int', 'Estimated indexed items per second (def:30)'),
42 array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
44 getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
46 if ($aResult['import-hourly'] + $aResult['import-daily'] + isset($aResult['import-diff']) > 1)
48 showUsage($aCMDOptions, true, 'Select either import of hourly or daily');
51 if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
52 if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
55 // Lock to prevent multiple copies running
56 if (exec('/bin/ps uww | grep '.basename(__FILE__).' | grep -v /dev/null | grep -v grep -c', $aOutput2, $iResult) > 1)
58 fail("Copy already running\n");
60 if (!isset($aResult['max-load'])) $aResult['max-load'] = 1.9;
61 if (!isset($aResult['max-blocking'])) $aResult['max-blocking'] = 3;
62 if (getBlockingProcesses() > $aResult['max-blocking'])
64 fail("Too many blocking processes for import\n");
68 date_default_timezone_set('Etc/UTC');
72 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
73 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
75 // cache memory to be used by osm2pgsql, should not be more than the available memory
76 $iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000);
77 if ($iCacheMemory + 500 > getTotalMemoryMB())
79 $iCacheMemory = getCacheMemoryMB();
80 echo "WARNING: resetting cache memory to $iCacheMemory\n";
82 $sOsm2pgsqlCmd = CONST_Osm2pgsql_Binary.' -klas --number-processes 1 -C '.$iCacheMemory.' -O gazetteer -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'];
83 if (!is_null(CONST_Osm2pgsql_Flatnode_File))
85 $sOsm2pgsqlCmd .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
90 $bContinue = $aResult['import-all'];
91 while ($bContinue || $bFirst)
95 if ($aResult['import-hourly'])
97 // Mirror the hourly diffs
98 exec('wget --quiet --mirror -l 1 -P '.$sMirrorDir.' http://planet.openstreetmap.org/hourly');
99 $sNextFile = $oDB->getOne('select TO_CHAR(lastimportdate,\'YYYYMMDDHH24\')||\'-\'||TO_CHAR(lastimportdate+\'1 hour\'::interval,\'YYYYMMDDHH24\')||\'.osc.gz\' from import_status');
100 $sNextFile = $sMirrorDir.'planet.openstreetmap.org/hourly/'.$sNextFile;
101 $sUpdateSQL = 'update import_status set lastimportdate = lastimportdate+\'1 hour\'::interval';
104 if ($aResult['import-daily'])
106 // Mirror the daily diffs
107 exec('wget --quiet --mirror -l 1 -P '.$sMirrorDir.' http://planet.openstreetmap.org/daily');
108 $sNextFile = $oDB->getOne('select TO_CHAR(lastimportdate,\'YYYYMMDD\')||\'-\'||TO_CHAR(lastimportdate+\'1 day\'::interval,\'YYYYMMDD\')||\'.osc.gz\' from import_status');
109 $sNextFile = $sMirrorDir.'planet.openstreetmap.org/daily/'.$sNextFile;
110 $sUpdateSQL = 'update import_status set lastimportdate = lastimportdate::date + 1';
113 if (isset($aResult['import-diff']))
115 // import diff directly (e.g. from osmosis --rri)
116 $sNextFile = $aResult['import-diff'];
117 if (!file_exists($sNextFile))
119 fail("Cannot open $sNextFile\n");
121 // Don't update the import status - we don't know what this file contains
122 $sUpdateSQL = 'update import_status set lastimportdate = now() where false';
125 // Missing file is not an error - it might not be created yet
126 if (($aResult['import-hourly'] || $aResult['import-daily'] || isset($aResult['import-diff'])) && file_exists($sNextFile))
129 $sCMD = $sOsm2pgsqlCmd.' '.$sNextFile;
131 exec($sCMD, $sJunk, $iErrorLevel);
135 fail("Error from osm2pgsql, $iErrorLevel\n");
138 // Move the date onwards
139 $oDB->query($sUpdateSQL);
149 $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];
150 if (isset($aResult['import-file']) && $aResult['import-file'])
154 if (isset($aResult['import-node']) && $aResult['import-node'])
159 $sModifyXMLstr = file_get_contents('http://www.openstreetmap.org/api/0.6/node/'.$aResult['import-node']);
163 $sModifyXMLstr = file_get_contents('http://overpass-api.de/api/interpreter?data=node('.$aResult['import-node'].');out%20meta;');
166 if (isset($aResult['import-way']) && $aResult['import-way'])
171 $sCmd = 'http://www.openstreetmap.org/api/0.6/way/'.$aResult['import-way'].'/full';
175 $sCmd = 'http://overpass-api.de/api/interpreter?data=(way('.$aResult['import-way'].');node(w););out%20meta;';
177 $sModifyXMLstr = file_get_contents($sCmd);
179 if (isset($aResult['import-relation']) && $aResult['import-relation'])
184 $sModifyXMLstr = file_get_contents('http://www.openstreetmap.org/api/0.6/relation/'.$aResult['import-relation'].'/full');
188 $sModifyXMLstr = file_get_contents('http://overpass-api.de/api/interpreter?data=((rel('.$aResult['import-relation'].');way(r);node(w));node(r));out%20meta;');
193 // derive change from normal osm file with osmosis
194 $sTemporaryFile = CONST_BasePath.'/data/osmosischange.osc';
195 if (isset($aResult['import-file']) && $aResult['import-file'])
197 $sCMD = CONST_Osmosis_Binary.' --read-xml \''.$aResult['import-file'].'\' --read-empty --derive-change --write-xml-change '.$sTemporaryFile;
199 exec($sCMD, $sJunk, $iErrorLevel);
202 fail("Error converting osm to osc, osmosis returned: $iErrorLevel\n");
208 0 => array("pipe", "r"), // stdin
209 1 => array("pipe", "w"), // stdout
210 2 => array("pipe", "w") // stderr
212 $sCMD = CONST_Osmosis_Binary.' --read-xml - --read-empty --derive-change --write-xml-change '.$sTemporaryFile;
214 $hProc = proc_open($sCMD, $aSpec, $aPipes);
215 if (!is_resource($hProc))
217 fail("Error converting osm to osc, osmosis failed\n");
219 fwrite($aPipes[0], $sModifyXMLstr);
221 $sOut = stream_get_contents($aPipes[1]);
222 if ($aResult['verbose']) echo $sOut;
224 $sErrors = stream_get_contents($aPipes[2]);
225 if ($aResult['verbose']) echo $sErrors;
227 if ($iError = proc_close($hProc))
229 echo "Error converting osm to osc, osmosis returned: $iError\n";
236 // import generated change file
237 $sCMD = $sOsm2pgsqlCmd.' '.$sTemporaryFile;
239 exec($sCMD, $sJunk, $iErrorLevel);
242 fail("osm2pgsql exited with error level $iErrorLevel\n");
246 if ($aResult['deduplicate'])
249 $pgver = (float) CONST_Postgresql_Version;
251 fail("ERROR: deduplicate is only currently supported in postgresql 9.3");
255 $sSQL = 'select partition from country_name order by country_code';
256 $aPartitions = $oDB->getCol($sSQL);
257 if (PEAR::isError($aPartitions))
259 fail($aPartitions->getMessage());
263 $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' ' and class is null and type is null and country_code is null group by word_token having count(*) > 1 order by word_token";
264 $aDuplicateTokens = $oDB->getAll($sSQL);
265 foreach($aDuplicateTokens as $aToken)
267 if (trim($aToken['word_token']) == '' || trim($aToken['word_token']) == '-') continue;
268 echo "Deduping ".$aToken['word_token']."\n";
269 $sSQL = "select word_id,(select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num from word where word_token = '".$aToken['word_token']."' and class is null and type is null and country_code is null order by num desc";
270 $aTokenSet = $oDB->getAll($sSQL);
271 if (PEAR::isError($aTokenSet))
273 var_dump($aTokenSet, $sSQL);
277 $aKeep = array_shift($aTokenSet);
278 $iKeepID = $aKeep['word_id'];
280 foreach($aTokenSet as $aRemove)
282 $sSQL = "update search_name set";
283 $sSQL .= " name_vector = array_replace(name_vector,".$aRemove['word_id'].",".$iKeepID."),";
284 $sSQL .= " nameaddress_vector = array_replace(nameaddress_vector,".$aRemove['word_id'].",".$iKeepID.")";
285 $sSQL .= " where name_vector @> ARRAY[".$aRemove['word_id']."]";
286 $x = $oDB->query($sSQL);
287 if (PEAR::isError($x))
293 $sSQL = "update search_name set";
294 $sSQL .= " nameaddress_vector = array_replace(nameaddress_vector,".$aRemove['word_id'].",".$iKeepID.")";
295 $sSQL .= " where nameaddress_vector @> ARRAY[".$aRemove['word_id']."]";
296 $x = $oDB->query($sSQL);
297 if (PEAR::isError($x))
303 $sSQL = "update location_area_country set";
304 $sSQL .= " keywords = array_replace(keywords,".$aRemove['word_id'].",".$iKeepID.")";
305 $sSQL .= " where keywords @> ARRAY[".$aRemove['word_id']."]";
306 $x = $oDB->query($sSQL);
307 if (PEAR::isError($x))
313 foreach ($aPartitions as $sPartition)
315 $sSQL = "update search_name_".$sPartition." set";
316 $sSQL .= " name_vector = array_replace(name_vector,".$aRemove['word_id'].",".$iKeepID.")";
317 $sSQL .= " where name_vector @> ARRAY[".$aRemove['word_id']."]";
318 $x = $oDB->query($sSQL);
319 if (PEAR::isError($x))
325 $sSQL = "update location_area_country set";
326 $sSQL .= " keywords = array_replace(keywords,".$aRemove['word_id'].",".$iKeepID.")";
327 $sSQL .= " where keywords @> ARRAY[".$aRemove['word_id']."]";
328 $x = $oDB->query($sSQL);
329 if (PEAR::isError($x))
336 $sSQL = "delete from word where word_id = ".$aRemove['word_id'];
337 $x = $oDB->query($sSQL);
338 if (PEAR::isError($x))
348 if ($aResult['index'])
350 passthru(CONST_InstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']);
353 if ($aResult['import-osmosis'] || $aResult['import-osmosis-all'])
356 if (strpos(CONST_Replication_Url, 'download.geofabrik.de') !== false && CONST_Replication_Update_Interval < 86400) {
357 fail("Error: Update interval too low for download.geofabrik.de. Please check install documentation (http://wiki.openstreetmap.org/wiki/Nominatim/Installation#Updates)\n");
360 $sImportFile = CONST_BasePath.'/data/osmosischange.osc';
361 $sOsmosisCMD = CONST_Osmosis_Binary;
362 $sOsmosisConfigDirectory = CONST_InstallPath.'/settings';
363 $sCMDDownload = $sOsmosisCMD.' --read-replication-interval workingDirectory='.$sOsmosisConfigDirectory.' --simplify-change --write-xml-change '.$sImportFile;
364 $sCMDCheckReplicationLag = $sOsmosisCMD.' -q --read-replication-lag workingDirectory='.$sOsmosisConfigDirectory;
365 $sCMDImport = $sOsm2pgsqlCmd.' '.$sImportFile;
366 $sCMDIndex = $sInstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'];
367 if (!$aResult['no-npi']) {
372 $fStartTime = time();
375 // Logic behind this is that osm2pgsql locks the database quite a bit
376 // So it is better to import lots of small files
377 // But indexing works most efficiently on large amounts of data
378 // So do lots of small imports and a BIG index
380 // while($aResult['import-osmosis-all'] && $iFileSize > 1000)
382 if (!file_exists($sImportFile))
384 // First check if there are new updates published (except for minutelies - there's always new diffs to process)
385 if ( CONST_Replication_Update_Interval > 60 )
388 unset($aReplicationLag);
389 exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel);
390 while ($iErrorLevel > 0 || $aReplicationLag[0] < 1)
394 echo "Error: $iErrorLevel. ";
395 echo "Re-trying: ".$sCMDCheckReplicationLag." in ".CONST_Replication_Recheck_Interval." secs\n";
401 sleep(CONST_Replication_Recheck_Interval);
402 unset($aReplicationLag);
403 exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel);
405 // There are new replication files - use osmosis to download the file
406 echo "\n".date('Y-m-d H:i:s')." Replication Delay is ".$aReplicationLag[0]."\n";
408 $fStartTime = time();
409 $fCMDStartTime = time();
410 echo $sCMDDownload."\n";
411 exec($sCMDDownload, $sJunk, $iErrorLevel);
412 while ($iErrorLevel > 0)
414 echo "Error: $iErrorLevel\n";
416 echo 'Re-trying: '.$sCMDDownload."\n";
417 exec($sCMDDownload, $sJunk, $iErrorLevel);
419 $iFileSize = filesize($sImportFile);
420 $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
421 $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s',$fCMDStartTime)."','".date('Y-m-d H:i:s')."','osmosis')";
424 echo date('Y-m-d H:i:s')." Completed osmosis step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60,2)." minutes\n";
427 $iFileSize = filesize($sImportFile);
428 $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
431 $fCMDStartTime = time();
432 echo $sCMDImport."\n";
433 exec($sCMDImport, $sJunk, $iErrorLevel);
436 echo "Error: $iErrorLevel\n";
439 $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s',$fCMDStartTime)."','".date('Y-m-d H:i:s')."','osm2pgsql')";
442 echo date('Y-m-d H:i:s')." Completed osm2pgsql step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60,2)." minutes\n";
444 // Archive for debug?
445 unlink($sImportFile);
448 $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
451 $sThisIndexCmd = $sCMDIndex;
452 $fCMDStartTime = time();
454 if (!$aResult['no-npi'])
456 $iFileID = $oDB->getOne('select nextval(\'file\')');
457 if (PEAR::isError($iFileID))
459 echo $iFileID->getMessage()."\n";
462 $sFileDir = CONST_BasePath.'/export/diff/';
463 $sFileDir .= str_pad(floor($iFileID/1000000), 3, '0', STR_PAD_LEFT);
464 $sFileDir .= '/'.str_pad(floor($iFileID/1000) % 1000, 3, '0', STR_PAD_LEFT);
466 if (!is_dir($sFileDir)) mkdir($sFileDir, 0777, true);
467 $sThisIndexCmd .= $sFileDir;
468 $sThisIndexCmd .= '/'.str_pad($iFileID % 1000, 3, '0', STR_PAD_LEFT);
469 $sThisIndexCmd .= ".npi.out";
471 preg_match('#^([0-9]{4})-([0-9]{2})-([0-9]{2})#', $sBatchEnd, $aBatchMatch);
472 $sFileDir = CONST_BasePath.'/export/index/';
473 $sFileDir .= $aBatchMatch[1].'/'.$aBatchMatch[2];
475 if (!is_dir($sFileDir)) mkdir($sFileDir, 0777, true);
476 file_put_contents($sFileDir.'/'.$aBatchMatch[3].'.idx', "$sBatchEnd\t$iFileID\n", FILE_APPEND);
479 if (!$aResult['no-index'])
481 echo "$sThisIndexCmd\n";
482 exec($sThisIndexCmd, $sJunk, $iErrorLevel);
485 echo "Error: $iErrorLevel\n";
489 if (!$aResult['no-npi'])
491 $sFileDir = CONST_BasePath.'/export/diff/';
492 $sFileDir .= str_pad(floor($iFileID/1000000), 3, '0', STR_PAD_LEFT);
493 $sFileDir .= '/'.str_pad(floor($iFileID/1000) % 1000, 3, '0', STR_PAD_LEFT);
495 $sThisIndexCmd = 'bzip2 -z9 '.$sFileDir.'/'.str_pad($iFileID % 1000, 3, '0', STR_PAD_LEFT).".npi.out";
496 echo "$sThisIndexCmd\n";
497 exec($sThisIndexCmd, $sJunk, $iErrorLevel);
500 echo "Error: $iErrorLevel\n";
504 rename($sFileDir.'/'.str_pad($iFileID % 1000, 3, '0', STR_PAD_LEFT).".npi.out.bz2",
505 $sFileDir.'/'.str_pad($iFileID % 1000, 3, '0', STR_PAD_LEFT).".npi.bz2");
509 $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s',$fCMDStartTime)."','".date('Y-m-d H:i:s')."','index')";
512 echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60,2)." minutes\n";
514 $sSQL = "update import_status set lastimportdate = '$sBatchEnd'";
517 $fDuration = time() - $fStartTime;
518 echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60,2)." minutes\n";
519 if (!$aResult['import-osmosis-all']) exit(0);
521 if ( CONST_Replication_Update_Interval > 60 )
523 $iSleep = max(0,(strtotime($sBatchEnd)+CONST_Replication_Update_Interval-time()));
527 $iSleep = max(0,CONST_Replication_Update_Interval-$fDuration);
529 echo date('Y-m-d H:i:s')." Sleeping $iSleep seconds\n";
535 if ($aResult['import-npi-all'])
537 $iNPIID = $oDB->getOne('select max(npiid) from import_npi_log');
538 if (PEAR::isError($iNPIID))
543 $sConfigDirectory = CONST_InstallPath.'/settings';
544 $sCMDImportTemplate = $sBasePath.'/nominatim/nominatim -d gazetteer -P 5433 -I -T '.CONST_BasePath.'/nominatim/partitionedtags.def -F ';
547 $fStartTime = time();
551 $sImportFile = CONST_BasePath.'/export/diff/';
552 $sImportFile .= str_pad(floor($iNPIID/1000000), 3, '0', STR_PAD_LEFT);
553 $sImportFile .= '/'.str_pad(floor($iNPIID/1000) % 1000, 3, '0', STR_PAD_LEFT);
554 $sImportFile .= '/'.str_pad($iNPIID % 1000, 3, '0', STR_PAD_LEFT);
555 $sImportFile .= ".npi";
556 while(!file_exists($sImportFile) && !file_exists($sImportFile.'.bz2'))
558 echo "sleep (waiting for $sImportFile)\n";
561 if (file_exists($sImportFile.'.bz2')) $sImportFile .= '.bz2';
563 $iFileSize = filesize($sImportFile);
566 $fCMDStartTime = time();
567 $sCMDImport = $sCMDImportTemplate . $sImportFile;
568 echo $sCMDImport."\n";
569 exec($sCMDImport, $sJunk, $iErrorLevel);
572 fail("Error: $iErrorLevel\n");
574 $sBatchEnd = $iNPIID;
575 echo "Completed for $sBatchEnd in ".round((time()-$fCMDStartTime)/60,2)." minutes\n";
576 $sSQL = "INSERT INTO import_npi_log values ($iNPIID, null, $iFileSize,'".date('Y-m-d H:i:s',$fCMDStartTime)."','".date('Y-m-d H:i:s')."','import')";
583 function getosmosistimestamp($sOsmosisConfigDirectory)
585 $sStateFile = file_get_contents($sOsmosisConfigDirectory.'/state.txt');
586 preg_match('#timestamp=(.+)#', $sStateFile, $aResult);
587 return str_replace('\:',':',$aResult[1]);