]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
* use osm2pgsql from $PATH if none present locally
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
14                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
15
16                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
17
18                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
19                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
20                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
21                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
22                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
23                 array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
24                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
25                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
26                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
27                 array('create-roads', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
28                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
29                 array('osmosis-init-date', '', 0, 1, 1, 1, 'string', 'Generate default osmosis configuration'),
30                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
31                 array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
32         );
33         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
34
35         $bDidSomething = false;
36
37         // This is a pretty hard core defult - the number of processors in the box - 1
38         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
39         if ($iInstances < 1)
40         {
41                 $iInstances = 1;
42                 echo "WARNING: resetting threads to $iInstances\n";
43         }
44         if ($iInstances > getProcessorCount())
45         {
46                 $iInstances = getProcessorCount();
47                 echo "WARNING: resetting threads to $iInstances\n";
48         }
49         if (isset($aCMDResult['osm-file']) && !isset($aCMDResult['osmosis-init-date']))
50         {
51                 $sBaseFile = basename($aCMDResult['osm-file']);
52                 if (preg_match('#^planet-([0-9]{2})([0-9]{2})([0-9]{2})[.]#', $sBaseFile, $aMatch))
53                 {
54                         $iTime = mktime(0, 0, 0, $aMatch[2], $aMatch[3], '20'.$aMatch[1]);
55                         $iTime -= (60*60*24);
56                         $aCMDResult['osmosis-init-date'] = date('Y-m-d', $iTime).'T22:00:00Z';
57                 }
58         }
59
60         if ($aCMDResult['create-db'] || $aCMDResult['all'])
61         {
62                 echo "Create DB\n";
63                 $bDidSomething = true;
64                 $oDB =& DB::connect(CONST_Database_DSN, false);
65                 if (!PEAR::isError($oDB))
66                 {
67                         fail('database already exists');
68                 }
69                 passthru('createdb nominatim');
70         }
71
72         if ($aCMDResult['create-db'] || $aCMDResult['all'])
73         {
74                 echo "Create DB (2)\n";
75                 $bDidSomething = true;
76                 // TODO: path detection, detection memory, etc.
77
78                 $oDB =& getDB();
79                 passthru('createlang plpgsql nominatim');
80                 pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/_int.sql');
81                 pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
82                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
83                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
84                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
85                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
86                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
87                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode.sql');
88                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
89                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
90                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
91                 pgsqlRunScriptFile(CONST_BasePath.'/data/worldboundaries.sql');
92         }
93
94         if ($aCMDResult['import-data'] || $aCMDResult['all'])
95         {
96                 echo "Import\n";
97                 $bDidSomething = true;
98
99         $osm2pgsql = CONST_BasePath.'/osm2pgsql/osm2pgsql';
100         if (!file_exists($osm2pgsql)) $osm2pgsql = trim(`which osm2pgsql`);
101         if (!file_exists($osm2pgsql)) fail("please download and build osm2pgsql");
102         passthru($osm2pgsql.' -lsc -O gazetteer -C 10000 --hstore -d nominatim '.$aCMDResult['osm-file']);
103
104                 $oDB =& getDB();
105                 $x = $oDB->getRow('select * from place limit 1');
106                 if (!$x || PEAR::isError($x)) fail('No Data');
107         }
108
109         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
110         {
111                 echo "Functions\n";
112                 $bDidSomething = true;
113                 if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
114                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
115                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
116                 pgsqlRunScript($sTemplate);
117         }
118
119         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
120         {
121                 echo "Tables\n";
122                 $bDidSomething = true;
123                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
124
125                 // re-run the functions
126                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
127                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
128                 pgsqlRunScript($sTemplate);
129         }
130
131         if ($aCMDResult['create-partitions'] || $aCMDResult['all'])
132         {
133                 echo "Partitions\n";
134                 $bDidSomething = true;
135                 $oDB =& getDB();
136                 $sSQL = 'select partition from country_name order by country_code';
137                 $aPartitions = $oDB->getCol($sSQL);
138                 if (PEAR::isError($aPartitions))
139                 {
140                         fail($aPartitions->getMessage());
141                 }
142                 $aPartitions[] = 0;
143
144                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partitions.src.sql');
145                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
146                 foreach($aMatches as $aMatch)
147                 {
148                         $sResult = '';
149                         foreach($aPartitions as $sPartitionName)
150                         {
151                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
152                         }
153                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
154                 }
155
156                 pgsqlRunScript($sTemplate);
157         }
158
159         if ($aCMDResult['load-data'] || $aCMDResult['all'])
160         {
161                 echo "Load Data\n";
162                 $bDidSomething = true;
163
164                 $oDB =& getDB();
165                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
166                 echo '.';
167                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
168                 echo '.';
169                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
170                 echo '.';
171                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
172                 echo '.';
173                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
174                 echo '.';
175                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
176                 echo '.';
177                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
178                 echo '.';
179                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
180                 echo '.';
181                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
182                 echo '.';
183
184                 $aDBInstances = array();
185                 for($i = 0; $i < $iInstances; $i++)
186                 {
187                         $aDBInstances[$i] =& getDB(true);
188                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
189                         $sSQL .= 'housenumber, street, isin, postcode, country_code, extratags, ';
190                         $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
191                         if ($aCMDResult['verbose']) echo "$sSQL\n";
192                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
193                 }
194                 $bAnyBusy = true;
195                 while($bAnyBusy)
196                 {
197                         $bAnyBusy = false;
198                         for($i = 0; $i < $iInstances; $i++)
199                         {
200                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
201                         }
202                         sleep(1);
203                         echo '.';
204                 }
205                 echo "\n";
206         }
207
208         if ($aCMDResult['create-roads'])
209         {
210                 $bDidSomething = true;
211
212                 $oDB =& getDB();
213                 $aDBInstances = array();
214                 for($i = 0; $i < $iInstances; $i++)
215                 {
216                         $aDBInstances[$i] =& getDB(true);
217                         if (!pg_query($aDBInstances[$i]->connection, 'set enable_bitmapscan = off')) fail(pg_last_error($oDB->connection));
218                         $sSQL = 'select count(*) from (select insertLocationRoad(partition, place_id, country_code, geometry) from ';
219                         $sSQL .= 'placex where osm_id % '.$iInstances.' = '.$i.' and rank_search between 26 and 27 and class = \'highway\') as x ';
220                         if ($aCMDResult['verbose']) echo "$sSQL\n";
221                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
222                 }
223                 $bAnyBusy = true;
224                 while($bAnyBusy)
225                 {
226                         $bAnyBusy = false;
227                         for($i = 0; $i < $iInstances; $i++)
228                         {
229                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
230                         }
231                         sleep(1);
232                         echo '.';
233                 }
234                 echo "\n";
235         }
236
237         if ($aCMDResult['import-tiger-data'])
238         {
239                 $bDidSomething = true;
240
241                 $aDBInstances = array();
242                 for($i = 0; $i < $iInstances; $i++)
243                 {
244                         $aDBInstances[$i] =& getDB(true);
245                 }
246
247                 foreach(glob(CONST_BasePath.'/data/tiger2009/*.sql') as $sFile)
248                 {
249                         echo $sFile.': ';
250                         $hFile = fopen($sFile, "r");
251                         $sSQL = fgets($hFile, 100000);
252                         $iLines = 0;
253
254                         while(true)
255                         {
256                                 for($i = 0; $i < $iInstances; $i++)
257                                 {
258                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
259                                         {
260                                                 while(pg_get_result($aDBInstances[$i]->connection));
261                                                 $sSQL = fgets($hFile, 100000);
262                                                 if (!$sSQL) break 2;
263                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
264                                                 $iLines++;
265                                                 if ($iLines == 1000)
266                                                 {
267                                                         echo ".";
268                                                         $iLines = 0;
269                                                 }
270                                         }
271                                 }
272                                 usleep(10);
273                         }
274
275                         fclose($hFile);
276         
277                         $bAnyBusy = true;
278                         while($bAnyBusy)
279                         {
280                                 $bAnyBusy = false;
281                                 for($i = 0; $i < $iInstances; $i++)
282                                 {
283                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
284                                 }
285                                 usleep(10);
286                         }
287                         echo "\n";
288                 }
289         }
290
291         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
292         {
293                 $bDidSomething = true;
294                 $oDB =& getDB();
295                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
296                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
297                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,country_code,";
298                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,postcode,";
299                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
300                 $sSQL .= "from placex where postcode is not null group by country_code,postcode) as x";
301                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
302
303                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
304                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
305                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
306                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
307         }
308
309         if (($aCMDResult['osmosis-init'] || $aCMDResult['all']) && isset($aCMDResult['osmosis-init-date']))
310         {
311                 $bDidSomething = true;
312
313                 if (!file_exists(CONST_BasePath.'/osmosis-0.38/bin/osmosis')) fail("please download osmosis");
314                 if (file_exists(CONST_BasePath.'/settings/configuration.txt')) echo "settings/configuration.txt already exists\n";
315                 else passthru(CONST_BasePath.'/osmosis-0.38/bin/osmosis --read-replication-interval-init '.CONST_BasePath.'/settings');
316
317                 $sDate = $aCMDResult['osmosis-init-date'];
318                 $sURL = 'http://toolserver.org/~mazder/replicate-sequences/?'.$sDate;
319                 echo "Getting state file: $sURL\n";
320                 $sStateFile = file_get_contents($sURL);
321                 if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
322                 file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile);
323         }
324
325         if ($aCMDResult['index'] || $aCMDResult['all'])
326         {
327                 $bDidSomething = true;
328                 $sOutputFile = '';
329                 if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
330                 passthru(CONST_BasePath.'/nominatim/nominatim -i -d nominatim -t '.$iInstances.$sOutputFile);
331         }
332
333         if (!$bDidSomething)
334         {
335                 showUsage($aCMDOptions, true);
336         }
337
338         function pgsqlRunScriptFile($sFilename)
339         {
340                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
341
342                 // Convert database DSN to psql paramaters
343                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
344                 $sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database'];
345
346                 $aDescriptors = array(
347                         0 => array('pipe', 'r'),
348                         1 => array('pipe', 'w'),
349                         2 => array('file', '/dev/null', 'a')
350                 );
351                 $ahPipes = null;
352                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
353                 if (!is_resource($hProcess)) fail('unable to start pgsql');
354
355                 fclose($ahPipes[0]);
356
357                 // TODO: error checking
358                 while(!feof($ahPipes[1]))
359                 {
360                         echo fread($ahPipes[1], 4096);
361                 }
362                 fclose($ahPipes[1]);
363
364                 proc_close($hProcess);
365         }
366
367         function pgsqlRunScript($sScript)
368         {
369                 // Convert database DSN to psql paramaters
370                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
371                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
372                 $sCMD = 'psql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database'];
373                 $aDescriptors = array(
374                         0 => array('pipe', 'r'),
375                         1 => array('pipe', 'w'),
376                         2 => array('file', '/dev/null', 'a')
377                 );
378                 $ahPipes = null;
379                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
380                 if (!is_resource($hProcess)) fail('unable to start pgsql');
381
382                 fwrite($ahPipes[0], $sScript);
383                 fclose($ahPipes[0]);
384
385                 // TODO: error checking
386                 while(!feof($ahPipes[1]))
387                 {
388                         echo fread($ahPipes[1], 4096);
389                 }
390                 fclose($ahPipes[1]);
391
392                 proc_close($hProcess);
393         }