]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
finally fix adminitrative spelling error, disable fast_update on indexes by default...
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
14                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
15
16                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
17
18                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
19                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
20                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
21                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
22                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
23                 array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
24                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
25                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
26                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
27                 array('create-roads', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
28                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
29                 array('osmosis-init-date', '', 0, 1, 1, 1, 'string', 'Generate default osmosis configuration'),
30                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
31                 array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
32         );
33         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
34
35         $bDidSomething = false;
36
37         // This is a pretty hard core defult - the number of processors in the box - 1
38         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
39         if ($iInstances < 1)
40         {
41                 $iInstances = 1;
42                 echo "WARNING: resetting threads to $iInstances\n";
43         }
44         if ($iInstances > getProcessorCount())
45         {
46                 $iInstances = getProcessorCount();
47                 echo "WARNING: resetting threads to $iInstances\n";
48         }
49         if (isset($aCMDResult['osm-file']) && !isset($aCMDResult['osmosis-init-date']))
50         {
51                 $sBaseFile = basename($aCMDResult['osm-file']);
52                 if (preg_match('#^planet-([0-9]{2})([0-9]{2})([0-9]{2})[.]#', $sBaseFile, $aMatch))
53                 {
54                         $iTime = mktime(0, 0, 0, $aMatch[2], $aMatch[3], '20'.$aMatch[1]);
55                         $iTime -= (60*60*24);
56                         $aCMDResult['osmosis-init-date'] = date('Y-m-d', $iTime).'T22:00:00Z';
57                 }
58         }
59
60         if ($aCMDResult['create-db'] || $aCMDResult['all'])
61         {
62                 echo "Create DB\n";
63                 $bDidSomething = true;
64                 $oDB =& DB::connect(CONST_Database_DSN, false);
65                 if (!PEAR::isError($oDB))
66                 {
67                         fail('database already exists');
68                 }
69                 passthru('createdb nominatim');
70         }
71
72         if ($aCMDResult['create-db'] || $aCMDResult['all'])
73         {
74                 echo "Create DB (2)\n";
75                 $bDidSomething = true;
76                 // TODO: path detection, detection memory, etc.
77
78                 $oDB =& getDB();
79                 passthru('createlang plpgsql nominatim');
80                 pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/_int.sql');
81                 pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
82                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
83                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
84                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
85                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
86                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
87                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode.sql');
88                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
89                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
90                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
91                 pgsqlRunScriptFile(CONST_BasePath.'/data/worldboundaries.sql');
92         }
93
94         if ($aCMDResult['import-data'] || $aCMDResult['all'])
95         {
96                 echo "Import\n";
97                 $bDidSomething = true;
98
99                 if (!file_exists(CONST_BasePath.'/osm2pgsql/osm2pgsql')) fail("please download and build osm2pgsql");
100                 passthru(CONST_BasePath.'/osm2pgsql/osm2pgsql -lsc -O gazetteer -C 10000 --hstore -d nominatim '.$aCMDResult['osm-file']);
101
102                 $oDB =& getDB();
103                 $x = $oDB->getRow('select * from place limit 1');
104                 if (!$x || PEAR::isError($x)) fail('No Data');
105         }
106
107         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
108         {
109                 echo "Functions\n";
110                 $bDidSomething = true;
111                 if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
112                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
113                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
114                 pgsqlRunScript($sTemplate);
115         }
116
117         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
118         {
119                 echo "Tables\n";
120                 $bDidSomething = true;
121                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
122
123                 // re-run the functions
124                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
125                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
126                 pgsqlRunScript($sTemplate);
127         }
128
129         if ($aCMDResult['create-partitions'] || $aCMDResult['all'])
130         {
131                 echo "Partitions\n";
132                 $bDidSomething = true;
133 echo "here";
134                 $oDB =& getDB();
135 echo "there";
136                 $sSQL = 'select partition from country_name order by country_code';
137 var_dump($sSQL);
138                 $aPartitions = $oDB->getCol($sSQL);
139 var_dump($aPartitions);
140                 if (PEAR::isError($aPartitions))
141                 {
142                         fail($aPartitions->getMessage());
143                 }
144                 $aPartitions[] = 0;
145
146                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partitions.src.sql');
147                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
148                 foreach($aMatches as $aMatch)
149                 {
150 var_dump($aMatch);
151                         $sResult = '';
152                         foreach($aPartitions as $sPartitionName)
153                         {
154 var_dump($sPartitionName);
155                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
156                         }
157                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
158                 }
159
160                 pgsqlRunScript($sTemplate);
161         }
162
163         if ($aCMDResult['load-data'] || $aCMDResult['all'])
164         {
165                 echo "Load Data\n";
166                 $bDidSomething = true;
167
168                 $oDB =& getDB();
169                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
170                 echo '.';
171                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
172                 echo '.';
173                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
174                 echo '.';
175                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
176                 echo '.';
177                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
178                 echo '.';
179                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
180                 echo '.';
181                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
182                 echo '.';
183                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
184                 echo '.';
185                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
186                 echo '.';
187
188                 $aDBInstances = array();
189                 for($i = 0; $i < $iInstances; $i++)
190                 {
191                         $aDBInstances[$i] =& getDB(true);
192                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
193                         $sSQL .= 'housenumber, street, isin, postcode, country_code, extratags, ';
194                         $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
195                         if ($aCMDResult['verbose']) echo "$sSQL\n";
196                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
197                 }
198                 $bAnyBusy = true;
199                 while($bAnyBusy)
200                 {
201                         $bAnyBusy = false;
202                         for($i = 0; $i < $iInstances; $i++)
203                         {
204                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
205                         }
206                         sleep(1);
207                         echo '.';
208                 }
209                 echo "\n";
210         }
211
212         if ($aCMDResult['create-roads'])
213         {
214                 $bDidSomething = true;
215
216                 $oDB =& getDB();
217                 $aDBInstances = array();
218                 for($i = 0; $i < $iInstances; $i++)
219                 {
220                         $aDBInstances[$i] =& getDB(true);
221                         if (!pg_query($aDBInstances[$i]->connection, 'set enable_bitmapscan = off')) fail(pg_last_error($oDB->connection));
222                         $sSQL = 'select count(*) from (select insertLocationRoad(partition, place_id, country_code, geometry) from ';
223                         $sSQL .= 'placex where osm_id % '.$iInstances.' = '.$i.' and rank_search between 26 and 27 and class = \'highway\') as x ';
224                         if ($aCMDResult['verbose']) echo "$sSQL\n";
225                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
226                 }
227                 $bAnyBusy = true;
228                 while($bAnyBusy)
229                 {
230                         $bAnyBusy = false;
231                         for($i = 0; $i < $iInstances; $i++)
232                         {
233                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
234                         }
235                         sleep(1);
236                         echo '.';
237                 }
238                 echo "\n";
239         }
240
241         if ($aCMDResult['import-tiger-data'])
242         {
243                 $bDidSomething = true;
244
245                 $aDBInstances = array();
246                 for($i = 0; $i < $iInstances; $i++)
247                 {
248                         $aDBInstances[$i] =& getDB(true);
249                 }
250
251                 foreach(glob(CONST_BasePath.'/data/tiger2009/*.sql') as $sFile)
252                 {
253                         echo $sFile.': ';
254                         $hFile = fopen($sFile, "r");
255                         $sSQL = fgets($hFile, 100000);
256                         $iLines = 0;
257
258                         while(true)
259                         {
260                                 for($i = 0; $i < $iInstances; $i++)
261                                 {
262                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
263                                         {
264                                                 while(pg_get_result($aDBInstances[$i]->connection));
265                                                 $sSQL = fgets($hFile, 100000);
266                                                 if (!$sSQL) break 2;
267                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
268                                                 $iLines++;
269                                                 if ($iLines == 1000)
270                                                 {
271                                                         echo ".";
272                                                         $iLines = 0;
273                                                 }
274                                         }
275                                 }
276                                 usleep(10);
277                         }
278
279                         fclose($hFile);
280         
281                         $bAnyBusy = true;
282                         while($bAnyBusy)
283                         {
284                                 $bAnyBusy = false;
285                                 for($i = 0; $i < $iInstances; $i++)
286                                 {
287                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
288                                 }
289                                 usleep(10);
290                         }
291                         echo "\n";
292                 }
293         }
294
295         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
296         {
297                 $oDB =& getDB();
298                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
299                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
300                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,country_code,";
301                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,postcode,";
302                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
303                 $sSQL .= "from placex where postcode is not null group by country_code,postcode) as x";
304                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
305
306                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
307                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
308                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
309                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
310         }
311
312         if (($aCMDResult['osmosis-init'] || $aCMDResult['all']) && isset($aCMDResult['osmosis-init-date']))
313         {
314                 $bDidSomething = true;
315
316                 if (!file_exists(CONST_BasePath.'/osmosis-0.38/bin/osmosis')) fail("please download osmosis");
317                 if (file_exists(CONST_BasePath.'/settings/configuration.txt')) echo "settings/configuration.txt already exists\n";
318                 else passthru(CONST_BasePath.'/osmosis-0.38/bin/osmosis --read-replication-interval-init '.CONST_BasePath.'/settings');
319
320                 $sDate = $aCMDResult['osmosis-init-date'];
321                 $sStateFile = file_get_contents('http://toolserver.org/~mazder/replicate-sequences/?'.$sDate);
322                 if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
323                 file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile);
324         }
325
326         if ($aCMDResult['index'] || $aCMDResult['all'])
327         {
328                 $bDidSomething = true;
329                 $sOutputFile = '';
330                 if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
331                 passthru(CONST_BasePath.'/nominatim/nominatim -i -d nominatim -t '.$iInstances.$sOutputFile);
332         }
333
334         if (!$bDidSomething)
335         {
336                 showUsage($aCMDOptions, true);
337         }
338
339         function pgsqlRunScriptFile($sFilename)
340         {
341                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
342
343                 // Convert database DSN to psql paramaters
344                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
345                 $sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database'];
346
347                 $aDescriptors = array(
348                         0 => array('pipe', 'r'),
349                         1 => array('pipe', 'w'),
350                         2 => array('file', '/dev/null', 'a')
351                 );
352                 $ahPipes = null;
353                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
354                 if (!is_resource($hProcess)) fail('unable to start pgsql');
355
356                 fclose($ahPipes[0]);
357
358                 // TODO: error checking
359                 while(!feof($ahPipes[1]))
360                 {
361                         echo fread($ahPipes[1], 4096);
362                 }
363                 fclose($ahPipes[1]);
364
365                 proc_close($hProcess);
366         }
367
368         function pgsqlRunScript($sScript)
369         {
370                 // Convert database DSN to psql paramaters
371                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
372                 $sCMD = 'psql '.$aDSNInfo['database'];
373
374                 $aDescriptors = array(
375                         0 => array('pipe', 'r'),
376                         1 => array('pipe', 'w'),
377                         2 => array('file', '/dev/null', 'a')
378                 );
379                 $ahPipes = null;
380                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
381                 if (!is_resource($hProcess)) fail('unable to start pgsql');
382
383 echo "write";
384                 fwrite($ahPipes[0], $sScript);
385 echo "close";
386                 fclose($ahPipes[0]);
387 echo "done";
388
389                 // TODO: error checking
390                 while(!feof($ahPipes[1]))
391                 {
392 echo "read";
393                         echo fread($ahPipes[1], 4096);
394                 }
395 echo "done";
396                 fclose($ahPipes[1]);
397
398 echo "done1";
399                 proc_close($hProcess);
400 echo "done2";
401         }