]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
add switch for osm2pgsql cache size
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
14                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
15
16                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
17
18                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
19                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
20                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
21                 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
22                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
23                 array('create-minimal-tables', '', 0, 1, 0, 0, 'bool', 'Create minimal main tables'),
24                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
25                 array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
26                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
27                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
28                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
29                 array('create-roads', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
30                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
31                 array('osmosis-init-date', '', 0, 1, 1, 1, 'string', 'Generate default osmosis configuration'),
32                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
33                 array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
34                 array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
35                 array('create-website', '', 0, 1, 1, 1, 'realpath', 'Create symlinks to setup web directory'),
36         );
37         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
38
39         $bDidSomething = false;
40
41         // This is a pretty hard core defult - the number of processors in the box - 1
42         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
43         if ($iInstances < 1)
44         {
45                 $iInstances = 1;
46                 echo "WARNING: resetting threads to $iInstances\n";
47         }
48         if ($iInstances > getProcessorCount())
49         {
50                 $iInstances = getProcessorCount();
51                 echo "WARNING: resetting threads to $iInstances\n";
52         }
53         if (isset($aCMDResult['osm-file']) && !isset($aCMDResult['osmosis-init-date']))
54         {
55                 $sBaseFile = basename($aCMDResult['osm-file']);
56                 if (preg_match('#^planet-([0-9]{2})([0-9]{2})([0-9]{2})[.]#', $sBaseFile, $aMatch))
57                 {
58                         $iTime = mktime(0, 0, 0, $aMatch[2], $aMatch[3], '20'.$aMatch[1]);
59                         $iTime -= (60*60*24);
60                         $aCMDResult['osmosis-init-date'] = date('Y-m-d', $iTime).'T22:00:00Z';
61                 }
62         }
63         $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
64         if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
65
66         if ($aCMDResult['create-db'] || $aCMDResult['all'])
67         {
68                 echo "Create DB\n";
69                 $bDidSomething = true;
70                 $oDB =& DB::connect(CONST_Database_DSN, false);
71                 if (!PEAR::isError($oDB))
72                 {
73                         fail('database already exists ('.CONST_Database_DSN.')');
74                 }
75                 passthru('createdb -E UTF-8 '.$aDSNInfo['database']);
76         }
77
78         if ($aCMDResult['create-db'] || $aCMDResult['all'])
79         {
80                 echo "Create DB (2)\n";
81                 $bDidSomething = true;
82                 // TODO: path detection, detection memory, etc.
83
84                 $oDB =& getDB();
85                 passthru('createlang plpgsql '.$aDSNInfo['database']);
86         $pgver = (float) CONST_Postgresql_Version;
87                 if ($pgver < 9.1) {
88                         pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
89                 } else {
90                         pgsqlRunScript('CREATE EXTENSION hstore');
91                 }
92                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
93                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
94                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
95                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
96                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
97                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode.sql');
98                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
99                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
100                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
101                 pgsqlRunScriptFile(CONST_BasePath.'/data/worldboundaries.sql');
102         }
103
104         if ($aCMDResult['import-data'] || $aCMDResult['all'])
105         {
106                 echo "Import\n";
107                 $bDidSomething = true;
108
109                 $osm2pgsql = CONST_Osm2pgsql_Binary;
110                 if (!file_exists($osm2pgsql)) fail("please download and build osm2pgsql");
111                 $osm2pgsql .= ' -lsc -O gazetteer --hstore';
112                 if (isset($aCMDResult['osm2pgsql-cache']))
113                 {
114                         $osm2pgsql .= ' -C '.$aCMDResult['osm2pgsql-cache'];
115                 }
116                 else
117                 {
118                         $osm2pgsql .= ' -C 15000';
119                 }
120                 $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file'];
121                 passthru($osm2pgsql);
122
123                 $oDB =& getDB();
124                 $x = $oDB->getRow('select * from place limit 1');
125                 if (!$x || PEAR::isError($x)) fail('No Data');
126         }
127
128         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
129         {
130                 echo "Functions\n";
131                 $bDidSomething = true;
132                 if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
133                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
134                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
135                 pgsqlRunScript($sTemplate);
136         }
137
138         if ($aCMDResult['create-minimal-tables'])
139         {
140                 echo "Minimal Tables\n";
141                 $bDidSomething = true;
142                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables-minimal.sql');
143
144                 $sScript = '';
145
146                 // Backstop the import process - easliest possible import id
147                 $sScript .= "insert into import_npi_log values (18022);\n";
148
149                 $hFile = @fopen(CONST_BasePath.'/settings/partitionedtags.def', "r");
150                 if (!$hFile) fail('unable to open list of partitions: '.CONST_BasePath.'/settings/partitionedtags.def');
151
152                 while (($sLine = fgets($hFile, 4096)) !== false && $sLine && substr($sLine,0,1) !='#')
153                 {
154                         list($sClass, $sType) = explode(' ', trim($sLine));
155                         $sScript .= "create table place_classtype_".$sClass."_".$sType." as ";
156                         $sScript .= "select place_id as place_id,geometry as centroid from placex limit 0;\n";
157
158                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_centroid ";
159                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING GIST (centroid);\n";
160
161                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_place_id ";
162                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING btree(place_id);\n";
163                 }
164                 fclose($hFile);
165                 pgsqlRunScript($sScript);
166         }
167
168         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
169         {
170                 echo "Tables\n";
171                 $bDidSomething = true;
172                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
173
174                 // re-run the functions
175                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
176                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
177                 pgsqlRunScript($sTemplate);
178         }
179
180         if ($aCMDResult['create-partitions'] || $aCMDResult['all'])
181         {
182                 echo "Partitions\n";
183                 $bDidSomething = true;
184                 $oDB =& getDB();
185                 $sSQL = 'select partition from country_name order by country_code';
186                 $aPartitions = $oDB->getCol($sSQL);
187                 if (PEAR::isError($aPartitions))
188                 {
189                         fail($aPartitions->getMessage());
190                 }
191                 $aPartitions[] = 0;
192
193                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partitions.src.sql');
194                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
195                 foreach($aMatches as $aMatch)
196                 {
197                         $sResult = '';
198                         foreach($aPartitions as $sPartitionName)
199                         {
200                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
201                         }
202                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
203                 }
204
205                 pgsqlRunScript($sTemplate);
206         }
207
208         if ($aCMDResult['load-data'] || $aCMDResult['all'])
209         {
210                 echo "Load Data\n";
211                 $bDidSomething = true;
212
213                 $oDB =& getDB();
214                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
215                 echo '.';
216                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
217                 echo '.';
218                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
219                 echo '.';
220                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
221                 echo '.';
222                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
223                 echo '.';
224                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
225                 echo '.';
226                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
227                 echo '.';
228                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
229                 echo '.';
230                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
231                 echo '.';
232
233                 $aDBInstances = array();
234                 for($i = 0; $i < $iInstances; $i++)
235                 {
236                         $aDBInstances[$i] =& getDB(true);
237                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
238                         $sSQL .= 'housenumber, street, isin, postcode, country_code, extratags, ';
239                         $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
240                         if ($aCMDResult['verbose']) echo "$sSQL\n";
241                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
242                 }
243                 $bAnyBusy = true;
244                 while($bAnyBusy)
245                 {
246                         $bAnyBusy = false;
247                         for($i = 0; $i < $iInstances; $i++)
248                         {
249                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
250                         }
251                         sleep(1);
252                         echo '.';
253                 }
254                 echo "\n";
255                 echo "Reanalysing database...\n";
256                 pgsqlRunScript('ANALYSE');
257         }
258
259         if ($aCMDResult['create-roads'])
260         {
261                 $bDidSomething = true;
262
263                 $oDB =& getDB();
264                 $aDBInstances = array();
265                 for($i = 0; $i < $iInstances; $i++)
266                 {
267                         $aDBInstances[$i] =& getDB(true);
268                         if (!pg_query($aDBInstances[$i]->connection, 'set enable_bitmapscan = off')) fail(pg_last_error($oDB->connection));
269                         $sSQL = 'select count(*) from (select insertLocationRoad(partition, place_id, country_code, geometry) from ';
270                         $sSQL .= 'placex where osm_id % '.$iInstances.' = '.$i.' and rank_search between 26 and 27 and class = \'highway\') as x ';
271                         if ($aCMDResult['verbose']) echo "$sSQL\n";
272                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
273                 }
274                 $bAnyBusy = true;
275                 while($bAnyBusy)
276                 {
277                         $bAnyBusy = false;
278                         for($i = 0; $i < $iInstances; $i++)
279                         {
280                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
281                         }
282                         sleep(1);
283                         echo '.';
284                 }
285                 echo "\n";
286         }
287
288         if ($aCMDResult['import-tiger-data'])
289         {
290                 $bDidSomething = true;
291
292                 $aDBInstances = array();
293                 for($i = 0; $i < $iInstances; $i++)
294                 {
295                         $aDBInstances[$i] =& getDB(true);
296                 }
297
298                 foreach(glob(CONST_BasePath.'/data/tiger2011/*.sql') as $sFile)
299                 {
300                         echo $sFile.': ';
301                         $hFile = fopen($sFile, "r");
302                         $sSQL = fgets($hFile, 100000);
303                         $iLines = 0;
304
305                         while(true)
306                         {
307                                 for($i = 0; $i < $iInstances; $i++)
308                                 {
309                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
310                                         {
311                                                 while(pg_get_result($aDBInstances[$i]->connection));
312                                                 $sSQL = fgets($hFile, 100000);
313                                                 if (!$sSQL) break 2;
314                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
315                                                 $iLines++;
316                                                 if ($iLines == 1000)
317                                                 {
318                                                         echo ".";
319                                                         $iLines = 0;
320                                                 }
321                                         }
322                                 }
323                                 usleep(10);
324                         }
325
326                         fclose($hFile);
327         
328                         $bAnyBusy = true;
329                         while($bAnyBusy)
330                         {
331                                 $bAnyBusy = false;
332                                 for($i = 0; $i < $iInstances; $i++)
333                                 {
334                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
335                                 }
336                                 usleep(10);
337                         }
338                         echo "\n";
339                 }
340         }
341
342         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
343         {
344                 $bDidSomething = true;
345                 $oDB =& getDB();
346                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
347                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
348                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,country_code,";
349                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,postcode,";
350                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
351                 $sSQL .= "from placex where postcode is not null group by country_code,postcode) as x";
352                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
353
354                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
355                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
356                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
357                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
358         }
359
360         if (($aCMDResult['osmosis-init'] || $aCMDResult['all']) && isset($aCMDResult['osmosis-init-date']))
361         {
362                 $bDidSomething = true;
363
364                 if (!file_exists(CONST_Osmosis_Binary)) fail("please download osmosis");
365                 if (file_exists(CONST_BasePath.'/settings/configuration.txt')) echo "settings/configuration.txt already exists\n";
366                 else passthru(CONST_Osmosis_Binary.' --read-replication-interval-init '.CONST_BasePath.'/settings');
367
368                 $sDate = $aCMDResult['osmosis-init-date'];
369                 $sURL = 'http://toolserver.org/~mazder/replicate-sequences/?'.$sDate;
370                 echo "Getting state file: $sURL\n";
371                 $sStateFile = file_get_contents($sURL);
372                 if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
373                 file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile);
374         }
375
376         if ($aCMDResult['index'] || $aCMDResult['all'])
377         {
378                 $bDidSomething = true;
379                 $sOutputFile = '';
380                 if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
381                 $sBaseCmd = CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$iInstances.$sOutputFile;
382                 passthru($sBaseCmd.' -R 4');
383                 pgsqlRunScript('ANALYSE');
384                 passthru($sBaseCmd.' -r 5 -R 25');
385                 pgsqlRunScript('ANALYSE');
386                 passthru($sBaseCmd.' -r 26');
387         }
388
389         if ($aCMDResult['create-search-indices'] || $aCMDResult['all'])
390         {
391                 echo "Search indices\n";
392                 $bDidSomething = true;
393                 $oDB =& getDB();
394                 $sSQL = 'select partition from country_name order by country_code';
395                 $aPartitions = $oDB->getCol($sSQL);
396                 if (PEAR::isError($aPartitions))
397                 {
398                         fail($aPartitions->getMessage());
399                 }
400                 $aPartitions[] = 0;
401
402                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/indices.src.sql');
403                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
404                 foreach($aMatches as $aMatch)
405                 {
406                         $sResult = '';
407                         foreach($aPartitions as $sPartitionName)
408                         {
409                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
410                         }
411                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
412                 }
413
414                 pgsqlRunScript($sTemplate);
415         }
416
417         if (isset($aCMDResult['create-website']))
418         {
419                 $bDidSomething = true;
420                 $sTargetDir = $aCMDResult['create-website'];
421                 if (!is_dir($sTargetDir)) fail('please specify a directory to setup');
422                 @symlink(CONST_BasePath.'/website/details.php', $sTargetDir.'/details.php');
423                 @symlink(CONST_BasePath.'/website/reverse.php', $sTargetDir.'/reverse.php');
424                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/search.php');
425                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/index.php');
426                 @symlink(CONST_BasePath.'/website/images', $sTargetDir.'/images');
427                 @symlink(CONST_BasePath.'/website/js', $sTargetDir.'/js');
428                 echo "Symlinks created\n";
429         }
430
431         if (!$bDidSomething)
432         {
433                 showUsage($aCMDOptions, true);
434         }
435
436         function pgsqlRunScriptFile($sFilename)
437         {
438                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
439
440                 // Convert database DSN to psql paramaters
441                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
442                 $sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database'];
443
444                 $aDescriptors = array(
445                         0 => array('pipe', 'r'),
446                         1 => array('pipe', 'w'),
447                         2 => array('file', '/dev/null', 'a')
448                 );
449                 $ahPipes = null;
450                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
451                 if (!is_resource($hProcess)) fail('unable to start pgsql');
452
453                 fclose($ahPipes[0]);
454
455                 // TODO: error checking
456                 while(!feof($ahPipes[1]))
457                 {
458                         echo fread($ahPipes[1], 4096);
459                 }
460                 fclose($ahPipes[1]);
461
462                 proc_close($hProcess);
463         }
464
465         function pgsqlRunScript($sScript)
466         {
467                 // Convert database DSN to psql paramaters
468                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
469                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
470                 $sCMD = 'psql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database'];
471                 $aDescriptors = array(
472                         0 => array('pipe', 'r'),
473                         1 => STDOUT, 
474                         2 => STDERR
475                 );
476                 $ahPipes = null;
477                 $hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes);
478                 if (!is_resource($hProcess)) fail('unable to start pgsql');
479
480                 while(strlen($sScript))
481                 {
482                         $written = fwrite($ahPipes[0], $sScript);
483                         $sScript = substr($sScript, $written);
484                 }
485                 fclose($ahPipes[0]);
486                 proc_close($hProcess);
487         }