]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
352aa943edc620bfa744de92993c8835fb1b84b7
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
14                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
15
16                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
17
18                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
19                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
20                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
21                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
22                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
23                 array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
24                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
25                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
26                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
27                 array('create-roads', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
28                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
29                 array('osmosis-init-date', '', 0, 1, 1, 1, 'string', 'Generate default osmosis configuration'),
30                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
31                 array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
32         );
33         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
34
35         $bDidSomething = false;
36
37         // This is a pretty hard core defult - the number of processors in the box - 1
38         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
39         if ($iInstances < 1)
40         {
41                 $iInstances = 1;
42                 echo "WARNING: resetting threads to $iInstances\n";
43         }
44         if ($iInstances > getProcessorCount())
45         {
46                 $iInstances = getProcessorCount();
47                 echo "WARNING: resetting threads to $iInstances\n";
48         }
49         if (isset($aCMDResult['osm-file']) && !isset($aCMDResult['osmosis-init-date']))
50         {
51                 $sBaseFile = basename($aCMDResult['osm-file']);
52                 if (preg_match('#^planet-([0-9]{2})([0-9]{2})([0-9]{2})[.]#', $sBaseFile, $aMatch))
53                 {
54                         $iTime = mktime(0, 0, 0, $aMatch[2], $aMatch[3], '20'.$aMatch[1]);
55                         $iTime -= (60*60*24);
56                         $aCMDResult['osmosis-init-date'] = date('Y-m-d', $iTime).'T22:00:00Z';
57                 }
58         }
59
60         if ($aCMDResult['create-db'] || $aCMDResult['all'])
61         {
62                 echo "Create DB\n";
63                 $bDidSomething = true;
64                 $oDB =& DB::connect(CONST_Database_DSN, false);
65                 if (!PEAR::isError($oDB))
66                 {
67                         fail('database already exists');
68                 }
69                 passthru('createdb nominatim');
70         }
71
72         if ($aCMDResult['create-db'] || $aCMDResult['all'])
73         {
74                 echo "Create DB (2)\n";
75                 $bDidSomething = true;
76                 // TODO: path detection, detection memory, etc.
77
78                 $oDB =& getDB();
79                 passthru('createlang plpgsql nominatim');
80                 pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/_int.sql');
81                 pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
82                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
83                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
84                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
85                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
86                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
87                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode.sql');
88                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
89                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
90                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
91                 pgsqlRunScriptFile(CONST_BasePath.'/data/worldboundaries.sql');
92         }
93
94         if ($aCMDResult['import-data'] || $aCMDResult['all'])
95         {
96                 echo "Import\n";
97                 $bDidSomething = true;
98
99                 if (!file_exists(CONST_BasePath.'/osm2pgsql/osm2pgsql')) fail("please download and build osm2pgsql");
100                 passthru(CONST_BasePath.'/osm2pgsql/osm2pgsql -lsc -O gazetteer -C 10000 --hstore -d nominatim '.$aCMDResult['osm-file']);
101
102                 $oDB =& getDB();
103                 $x = $oDB->getRow('select * from place limit 1');
104                 if (!$x || PEAR::isError($x)) fail('No Data');
105         }
106
107         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
108         {
109                 echo "Functions\n";
110                 $bDidSomething = true;
111                 if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
112                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
113                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
114                 pgsqlRunScript($sTemplate);
115         }
116
117         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
118         {
119                 echo "Tables\n";
120                 $bDidSomething = true;
121                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
122
123                 // re-run the functions
124                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
125                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
126                 pgsqlRunScript($sTemplate);
127         }
128
129         if ($aCMDResult['create-partitions'] || $aCMDResult['all'])
130         {
131                 echo "Partitions\n";
132                 $bDidSomething = true;
133                 $oDB =& getDB();
134                 $sSQL = 'select partition from country_name order by country_code';
135                 $aPartitions = $oDB->getCol($sSQL);
136                 if (PEAR::isError($aPartitions))
137                 {
138                         fail($aPartitions->getMessage());
139                 }
140                 $aPartitions[] = 0;
141
142                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partitions.src.sql');
143                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
144                 foreach($aMatches as $aMatch)
145                 {
146                         $sResult = '';
147                         foreach($aPartitions as $sPartitionName)
148                         {
149                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
150                         }
151                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
152                 }
153
154                 pgsqlRunScript($sTemplate);
155         }
156
157         if ($aCMDResult['load-data'] || $aCMDResult['all'])
158         {
159                 echo "Load Data\n";
160                 $bDidSomething = true;
161
162                 $oDB =& getDB();
163                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
164                 echo '.';
165                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
166                 echo '.';
167                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
168                 echo '.';
169                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
170                 echo '.';
171                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
172                 echo '.';
173                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
174                 echo '.';
175                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
176                 echo '.';
177                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
178                 echo '.';
179                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
180                 echo '.';
181
182                 $aDBInstances = array();
183                 for($i = 0; $i < $iInstances; $i++)
184                 {
185                         $aDBInstances[$i] =& getDB(true);
186                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
187                         $sSQL .= 'housenumber, street, isin, postcode, country_code, extratags, ';
188                         $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
189                         if ($aCMDResult['verbose']) echo "$sSQL\n";
190                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
191                 }
192                 $bAnyBusy = true;
193                 while($bAnyBusy)
194                 {
195                         $bAnyBusy = false;
196                         for($i = 0; $i < $iInstances; $i++)
197                         {
198                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
199                         }
200                         sleep(1);
201                         echo '.';
202                 }
203                 echo "\n";
204         }
205
206         if ($aCMDResult['create-roads'])
207         {
208                 $bDidSomething = true;
209
210                 $oDB =& getDB();
211                 $aDBInstances = array();
212                 for($i = 0; $i < $iInstances; $i++)
213                 {
214                         $aDBInstances[$i] =& getDB(true);
215                         if (!pg_query($aDBInstances[$i]->connection, 'set enable_bitmapscan = off')) fail(pg_last_error($oDB->connection));
216                         $sSQL = 'select count(*) from (select insertLocationRoad(partition, place_id, country_code, geometry) from ';
217                         $sSQL .= 'placex where osm_id % '.$iInstances.' = '.$i.' and rank_search between 26 and 27 and class = \'highway\') as x ';
218                         if ($aCMDResult['verbose']) echo "$sSQL\n";
219                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
220                 }
221                 $bAnyBusy = true;
222                 while($bAnyBusy)
223                 {
224                         $bAnyBusy = false;
225                         for($i = 0; $i < $iInstances; $i++)
226                         {
227                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
228                         }
229                         sleep(1);
230                         echo '.';
231                 }
232                 echo "\n";
233         }
234
235         if ($aCMDResult['import-tiger-data'])
236         {
237                 $bDidSomething = true;
238
239                 $aDBInstances = array();
240                 for($i = 0; $i < $iInstances; $i++)
241                 {
242                         $aDBInstances[$i] =& getDB(true);
243                 }
244
245                 foreach(glob(CONST_BasePath.'/data/tiger2009/*.sql') as $sFile)
246                 {
247                         echo $sFile.': ';
248                         $hFile = fopen($sFile, "r");
249                         $sSQL = fgets($hFile, 100000);
250                         $iLines = 0;
251
252                         while(true)
253                         {
254                                 for($i = 0; $i < $iInstances; $i++)
255                                 {
256                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
257                                         {
258                                                 while(pg_get_result($aDBInstances[$i]->connection));
259                                                 $sSQL = fgets($hFile, 100000);
260                                                 if (!$sSQL) break 2;
261                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
262                                                 $iLines++;
263                                                 if ($iLines == 1000)
264                                                 {
265                                                         echo ".";
266                                                         $iLines = 0;
267                                                 }
268                                         }
269                                 }
270                                 usleep(10);
271                         }
272
273                         fclose($hFile);
274         
275                         $bAnyBusy = true;
276                         while($bAnyBusy)
277                         {
278                                 $bAnyBusy = false;
279                                 for($i = 0; $i < $iInstances; $i++)
280                                 {
281                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
282                                 }
283                                 usleep(10);
284                         }
285                         echo "\n";
286                 }
287         }
288
289         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
290         {
291                 $oDB =& getDB();
292                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
293                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
294                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,country_code,";
295                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,postcode,";
296                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
297                 $sSQL .= "from placex where postcode is not null group by country_code,postcode) as x";
298                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
299
300                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
301                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
302                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
303                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
304         }
305
306         if (($aCMDResult['osmosis-init'] || $aCMDResult['all']) && isset($aCMDResult['osmosis-init-date']))
307         {
308                 $bDidSomething = true;
309
310                 if (!file_exists(CONST_BasePath.'/osmosis-0.38/bin/osmosis')) fail("please download osmosis");
311                 if (file_exists(CONST_BasePath.'/settings/configuration.txt')) echo "settings/configuration.txt already exists\n";
312                 else passthru(CONST_BasePath.'/osmosis-0.38/bin/osmosis --read-replication-interval-init '.CONST_BasePath.'/settings');
313
314                 $sDate = $aCMDResult['osmosis-init-date'];
315                 $sURL = 'http://toolserver.org/~mazder/replicate-sequences/?'.$sDate;
316                 echo "Getting state file: $sURL\n";
317                 $sStateFile = file_get_contents($sURL);
318                 if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
319                 file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile);
320         }
321
322         if ($aCMDResult['index'] || $aCMDResult['all'])
323         {
324                 $bDidSomething = true;
325                 $sOutputFile = '';
326                 if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
327                 passthru(CONST_BasePath.'/nominatim/nominatim -i -d nominatim -t '.$iInstances.$sOutputFile);
328         }
329
330         if (!$bDidSomething)
331         {
332                 showUsage($aCMDOptions, true);
333         }
334
335         function pgsqlRunScriptFile($sFilename)
336         {
337                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
338
339                 // Convert database DSN to psql paramaters
340                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
341                 $sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database'];
342
343                 $aDescriptors = array(
344                         0 => array('pipe', 'r'),
345                         1 => array('pipe', 'w'),
346                         2 => array('file', '/dev/null', 'a')
347                 );
348                 $ahPipes = null;
349                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
350                 if (!is_resource($hProcess)) fail('unable to start pgsql');
351
352                 fclose($ahPipes[0]);
353
354                 // TODO: error checking
355                 while(!feof($ahPipes[1]))
356                 {
357                         echo fread($ahPipes[1], 4096);
358                 }
359                 fclose($ahPipes[1]);
360
361                 proc_close($hProcess);
362         }
363
364         function pgsqlRunScript($sScript)
365         {
366                 // Convert database DSN to psql paramaters
367                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
368                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
369                 $sCMD = 'psql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database'];
370                 $aDescriptors = array(
371                         0 => array('pipe', 'r'),
372                         1 => array('pipe', 'w'),
373                         2 => array('file', '/dev/null', 'a')
374                 );
375                 $ahPipes = null;
376                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
377                 if (!is_resource($hProcess)) fail('unable to start pgsql');
378
379                 fwrite($ahPipes[0], $sScript);
380                 fclose($ahPipes[0]);
381
382                 // TODO: error checking
383                 while(!feof($ahPipes[1]))
384                 {
385                         echo fread($ahPipes[1], 4096);
386                 }
387                 fclose($ahPipes[1]);
388
389                 proc_close($hProcess);
390         }