]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
Coding style adaptions and correcting small errors from merge.
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
5         require_once(CONST_BasePath.'/lib/init-cmd.php');
6         ini_set('memory_limit', '800M');
7
8         $aCMDOptions = array(
9                 "Create and setup nominatim search system",
10                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
11                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
12                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
13
14                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
15                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
16
17                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
18
19                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
20                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
21                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
22                 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
23                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
24                 array('enable-diff-updates', '', 0, 1, 0, 0, 'bool', 'Turn on the code required to make diff updates work'),
25                 array('enable-debug-statements', '', 0, 1, 0, 0, 'bool', 'Include debug warning statements in pgsql commands'),
26                 array('ignore-errors', '', 0, 1, 0, 0, 'bool', 'Continue import even when errors in SQL are present (EXPERT)'),
27                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
28                 array('create-partition-tables', '', 0, 1, 0, 0, 'bool', 'Create required partition tables'),
29                 array('create-partition-functions', '', 0, 1, 0, 0, 'bool', 'Create required partition triggers'),
30                 array('no-partitions', '', 0, 1, 0, 0, 'bool', "Do not partition search indices (speeds up import of single country extracts)"),
31                 array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'),
32                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
33                 array('disable-token-precalc', '', 0, 1, 0, 0, 'bool', 'Disable name precalculation (EXPERT)'),
34                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
35                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
36                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
37                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
38                 array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'),
39                 array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
40                 array('create-website', '', 0, 1, 1, 1, 'realpath', 'Create symlinks to setup web directory'),
41                 array('drop', '', 0, 1, 0, 0, 'bool', 'Drop tables needed for updates, making the database readonly (EXPERIMENTAL)'),
42         );
43         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
44
45         $bDidSomething = false;
46
47         // Check if osm-file is set and points to a valid file if --all or --import-data is given
48         if ($aCMDResult['import-data'] || $aCMDResult['all'])
49         {
50                 if (!isset($aCMDResult['osm-file']))
51                 {
52                         fail('missing --osm-file for data import');
53                 }
54
55                 if (!file_exists($aCMDResult['osm-file']))
56                 {
57                         fail('the path supplied to --osm-file does not exist');
58                 }
59
60                 if (!is_readable($aCMDResult['osm-file']))
61                 {
62                         fail('osm-file "'.$aCMDResult['osm-file'].'" not readable');
63                 }
64         }
65
66
67         // This is a pretty hard core default - the number of processors in the box - 1
68         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
69         if ($iInstances < 1)
70         {
71                 $iInstances = 1;
72                 echo "WARNING: resetting threads to $iInstances\n";
73         }
74         if ($iInstances > getProcessorCount())
75         {
76                 $iInstances = getProcessorCount();
77                 echo "WARNING: resetting threads to $iInstances\n";
78         }
79
80         // Assume we can steal all the cache memory in the box (unless told otherwise)
81         if (isset($aCMDResult['osm2pgsql-cache']))
82         {
83                 $iCacheMemory = $aCMDResult['osm2pgsql-cache'];
84         }
85         else
86         {
87                 $iCacheMemory = getCacheMemoryMB();
88         }
89
90         $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
91         if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
92
93         if ($aCMDResult['create-db'] || $aCMDResult['all'])
94         {
95                 echo "Create DB\n";
96                 $bDidSomething = true;
97                 $oDB =& DB::connect(CONST_Database_DSN, false);
98                 if (!PEAR::isError($oDB))
99                 {
100                         fail('database already exists ('.CONST_Database_DSN.')');
101                 }
102                 passthruCheckReturn('createdb -E UTF-8 -p '.$aDSNInfo['port'].' '.$aDSNInfo['database']);
103         }
104
105         if ($aCMDResult['setup-db'] || $aCMDResult['all'])
106         {
107                 echo "Setup DB\n";
108                 $bDidSomething = true;
109                 // TODO: path detection, detection memory, etc.
110
111                 $oDB =& getDB();
112
113                 $fPostgresVersion = getPostgresVersion($oDB);
114                 echo 'Postgres version found: '.$fPostgresVersion."\n";
115
116                 if ($fPostgresVersion < 9.1)
117                 {
118                         fail("Minimum supported version of Postgresql is 9.1.");
119                 }
120
121                 pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS hstore');
122                 pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS postgis');
123
124                 // For extratags and namedetails the hstore_to_json converter is
125                 // needed which is only available from Postgresql 9.3+. For older
126                 // versions add a dummy function that returns nothing.
127                 $iNumFunc = $oDB->getOne("select count(*) from pg_proc where proname = 'hstore_to_json'");
128                 if (PEAR::isError($iNumFunc))
129                 {
130                         fail("Cannot query stored procedures.", $iNumFunc);
131                 }
132                 if ($iNumFunc == 0)
133                 {
134                         pgsqlRunScript("create function hstore_to_json(dummy hstore) returns text AS 'select null::text' language sql immutable");
135                         echo "WARNING: Postgresql is too old. extratags and namedetails API not available.";
136                 }
137
138                 $fPostgisVersion = getPostgisVersion($oDB);
139                 echo 'Postgis version found: '.$fPostgisVersion."\n";
140
141                 if ($fPostgisVersion < 2.1)
142                 {
143                         // Function was renamed in 2.1 and throws an annoying deprecation warning
144                         pgsqlRunScript('ALTER FUNCTION st_line_interpolate_point(geometry, double precision) RENAME TO ST_LineInterpolatePoint');
145                 }
146
147                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
148                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
149                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
150                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_table.sql');
151                 if (file_exists(CONST_BasePath.'/data/gb_postcode_data.sql.gz'))
152                 {
153                         pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_data.sql.gz');
154                 }
155                 else
156                 {
157                         echo "WARNING: external UK postcode table not found.\n";
158                 }
159                 if (CONST_Use_Extra_US_Postcodes)
160                 {
161                         pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
162                 }
163
164                 if ($aCMDResult['no-partitions'])
165                 {
166                         pgsqlRunScript('update country_name set partition = 0');
167                 }
168
169                 // the following will be needed by create_functions later but
170                 // is only defined in the subsequently called create_tables.
171                 // Create dummies here that will be overwritten by the proper
172                 // versions in create-tables.
173                 pgsqlRunScript('CREATE TABLE place_boundingbox ()');
174                 pgsqlRunScript('create type wikipedia_article_match as ()');
175         }
176
177         if ($aCMDResult['import-data'] || $aCMDResult['all'])
178         {
179                 echo "Import\n";
180                 $bDidSomething = true;
181
182                 $osm2pgsql = CONST_Osm2pgsql_Binary;
183                 if (!file_exists($osm2pgsql))
184                 {
185                         echo "Please download and build osm2pgsql.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
186                         fail("osm2pgsql not found in '$osm2pgsql'");
187                 }
188
189                 if (!is_null(CONST_Osm2pgsql_Flatnode_File))
190                 {
191                         $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
192                 }
193                 if (CONST_Tablespace_Osm2pgsql_Data)
194                         $osm2pgsql .= ' --tablespace-slim-data '.CONST_Tablespace_Osm2pgsql_Data;
195                 if (CONST_Tablespace_Osm2pgsql_Index)
196                         $osm2pgsql .= ' --tablespace-slim-index '.CONST_Tablespace_Osm2pgsql_Index;
197                 if (CONST_Tablespace_Place_Data)
198                         $osm2pgsql .= ' --tablespace-main-data '.CONST_Tablespace_Place_Data;
199                 if (CONST_Tablespace_Place_Index)
200                         $osm2pgsql .= ' --tablespace-main-index '.CONST_Tablespace_Place_Index;
201                 $osm2pgsql .= ' -lsc -O gazetteer --hstore --number-processes 1';
202                 $osm2pgsql .= ' -C '.$iCacheMemory;
203                 $osm2pgsql .= ' -P '.$aDSNInfo['port'];
204                 $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file'];
205                 passthruCheckReturn($osm2pgsql);
206
207                 $oDB =& getDB();
208                 $x = $oDB->getRow('select * from place limit 1');
209                 if (PEAR::isError($x)) {
210                         fail($x->getMessage());
211                 }
212                 if (!$x) fail('No Data');
213         }
214
215         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
216         {
217                 echo "Functions\n";
218                 $bDidSomething = true;
219                 if (!file_exists(CONST_InstallPath.'/module/nominatim.so')) fail("nominatim module not built");
220                 create_sql_functions($aCMDResult);
221         }
222
223         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
224         {
225                 $bDidSomething = true;
226
227                 echo "Tables\n";
228                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/tables.sql');
229                 $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
230                 $sTemplate = replace_tablespace('{ts:address-data}',
231                                                 CONST_Tablespace_Address_Data, $sTemplate);
232                 $sTemplate = replace_tablespace('{ts:address-index}',
233                                                 CONST_Tablespace_Address_Index, $sTemplate);
234                 $sTemplate = replace_tablespace('{ts:search-data}',
235                                                 CONST_Tablespace_Search_Data, $sTemplate);
236                 $sTemplate = replace_tablespace('{ts:search-index}',
237                                                 CONST_Tablespace_Search_Index, $sTemplate);
238                 $sTemplate = replace_tablespace('{ts:aux-data}',
239                                                 CONST_Tablespace_Aux_Data, $sTemplate);
240                 $sTemplate = replace_tablespace('{ts:aux-index}',
241                                                 CONST_Tablespace_Aux_Index, $sTemplate);
242                 pgsqlRunScript($sTemplate, false);
243
244                 // re-run the functions
245                 echo "Functions\n";
246                 create_sql_functions($aCMDResult);
247         }
248
249         if ($aCMDResult['create-partition-tables'] || $aCMDResult['all'])
250         {
251                 echo "Partition Tables\n";
252                 $bDidSomething = true;
253                 $oDB =& getDB();
254                 $sSQL = 'select distinct partition from country_name';
255                 $aPartitions = $oDB->getCol($sSQL);
256                 if (PEAR::isError($aPartitions))
257                 {
258                         fail($aPartitions->getMessage());
259                 }
260                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
261
262                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-tables.src.sql');
263                 $sTemplate = replace_tablespace('{ts:address-data}',
264                                                 CONST_Tablespace_Address_Data, $sTemplate);
265                 $sTemplate = replace_tablespace('{ts:address-index}',
266                                                 CONST_Tablespace_Address_Index, $sTemplate);
267                 $sTemplate = replace_tablespace('{ts:search-data}',
268                                                 CONST_Tablespace_Search_Data, $sTemplate);
269                 $sTemplate = replace_tablespace('{ts:search-index}',
270                                                 CONST_Tablespace_Search_Index, $sTemplate);
271                 $sTemplate = replace_tablespace('{ts:aux-data}',
272                                                 CONST_Tablespace_Aux_Data, $sTemplate);
273                 $sTemplate = replace_tablespace('{ts:aux-index}',
274                                                 CONST_Tablespace_Aux_Index, $sTemplate);
275                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
276                 foreach($aMatches as $aMatch)
277                 {
278                         $sResult = '';
279                         foreach($aPartitions as $sPartitionName)
280                         {
281                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
282                         }
283                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
284                 }
285
286                 pgsqlRunScript($sTemplate);
287         }
288
289
290         if ($aCMDResult['create-partition-functions'] || $aCMDResult['all'])
291         {
292                 echo "Partition Functions\n";
293                 $bDidSomething = true;
294                 $oDB =& getDB();
295                 $sSQL = 'select distinct partition from country_name';
296                 $aPartitions = $oDB->getCol($sSQL);
297                 if (PEAR::isError($aPartitions))
298                 {
299                         fail($aPartitions->getMessage());
300                 }
301                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
302
303                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-functions.src.sql');
304                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
305                 foreach($aMatches as $aMatch)
306                 {
307                         $sResult = '';
308                         foreach($aPartitions as $sPartitionName)
309                         {
310                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
311                         }
312                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
313                 }
314
315                 pgsqlRunScript($sTemplate);
316         }
317
318         if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all'])
319         {
320                 $bDidSomething = true;
321                 $sWikiArticlesFile = CONST_BasePath.'/data/wikipedia_article.sql.bin';
322                 $sWikiRedirectsFile = CONST_BasePath.'/data/wikipedia_redirect.sql.bin';
323                 if (file_exists($sWikiArticlesFile))
324                 {
325                         echo "Importing wikipedia articles...";
326                         pgsqlRunDropAndRestore($sWikiArticlesFile);
327                         echo "...done\n";
328                 }
329                 else
330                 {
331                         echo "WARNING: wikipedia article dump file not found - places will have default importance\n";
332                 }
333                 if (file_exists($sWikiRedirectsFile))
334                 {
335                         echo "Importing wikipedia redirects...";
336                         pgsqlRunDropAndRestore($sWikiRedirectsFile);
337                         echo "...done\n";
338                 }
339                 else
340                 {
341                         echo "WARNING: wikipedia redirect dump file not found - some place importance values may be missing\n";
342                 }
343         }
344
345
346         if ($aCMDResult['load-data'] || $aCMDResult['all'])
347         {
348                 echo "Drop old Data\n";
349                 $bDidSomething = true;
350
351                 $oDB =& getDB();
352                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
353                 echo '.';
354                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
355                 echo '.';
356                 if (!pg_query($oDB->connection, 'TRUNCATE location_property_osmline')) fail(pg_last_error($oDB->connection));
357                 echo '.';
358                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
359                 echo '.';
360                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
361                 echo '.';
362                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
363                 echo '.';
364                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
365                 echo '.';
366                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
367                 echo '.';
368                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
369                 echo '.';
370                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
371                 echo '.';
372
373                 $sSQL = 'select distinct partition from country_name';
374                 $aPartitions = $oDB->getCol($sSQL);
375                 if (PEAR::isError($aPartitions))
376                 {
377                         fail($aPartitions->getMessage());
378                 }
379                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
380                 foreach($aPartitions as $sPartition)
381                 {
382                         if (!pg_query($oDB->connection, 'TRUNCATE location_road_'.$sPartition)) fail(pg_last_error($oDB->connection));
383                         echo '.';
384                 }
385
386                 // used by getorcreate_word_id to ignore frequent partial words
387                 if (!pg_query($oDB->connection, 'CREATE OR REPLACE FUNCTION get_maxwordfreq() RETURNS integer AS $$ SELECT '.CONST_Max_Word_Frequency.' as maxwordfreq; $$ LANGUAGE SQL IMMUTABLE')) fail(pg_last_error($oDB->connection));
388                 echo ".\n";
389
390                 // pre-create the word list
391                 if (!$aCMDResult['disable-token-precalc'])
392                 {
393                         echo "Loading word list\n";
394                         pgsqlRunScriptFile(CONST_BasePath.'/data/words.sql');
395                 }
396
397                 echo "Load Data\n";
398                 $aDBInstances = array();
399                 $aQueriesPlacex = array();
400                 $aQueriesOsmline = array();
401                 // the query is divided into parcels, so that the work between the processes, i.e. the DBInstances, will be evenly distributed
402                 $iNumberOfParcels = 100;
403                 for($i = 0; $i < $iNumberOfParcels; $i++)
404                 {
405                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
406                         $sSQL .= 'housenumber, street, addr_place, isin, postcode, country_code, extratags, ';
407                         $sSQL .= 'geometry) select * from place where osm_id % '.$iNumberOfParcels.' = '.$i.' and not ';
408                         $sSQL .= '(class=\'place\' and type=\'houses\' and osm_type=\'W\' and ST_GeometryType(geometry) = \'ST_LineString\');';
409                         array_push($aQueriesPlacex, $sSQL);
410                         $sSQL = 'select insert_osmline (osm_id, housenumber, street, addr_place, postcode, country_code, ';
411                         $sSQL .= 'geometry) from place where osm_id % '.$iNumberOfParcels.' = '.$i.' and ';
412                         $sSQL .= 'class=\'place\' and type=\'houses\' and osm_type=\'W\' and ST_GeometryType(geometry) = \'ST_LineString\'';
413                         array_push($aQueriesOsmline, $sSQL);
414                 }
415         
416                 for($i = 0; $i < $iInstances; $i++)
417                 {
418                         $aDBInstances[$i] =& getDB(true);
419                 }
420                 // now execute the query blocks, in the first round for placex, then for osmline, 
421                 // because insert_osmline depends on the placex table
422                 echo 'Inserting from place to placex.';
423                 $aQueries = $aQueriesPlacex;
424                 for($j = 0; $j < 2; $j++)
425                 {
426                         $bAnyBusy = true;
427                         while($bAnyBusy)
428                         {
429                                 $bAnyBusy = false;
430
431                                 for($i = 0; $i < $iInstances; $i++)
432                                 {
433                                         if (pg_connection_busy($aDBInstances[$i]->connection)) 
434                                         {
435                                                 $bAnyBusy = true;
436                                         }
437                                         else if (count($aQueries) > 0)
438                                         {
439                                                 $query = array_pop($aQueries);
440                                                 if (!pg_send_query($aDBInstances[$i]->connection, $query))
441                                                 {
442                                                         fail(pg_last_error($oDB->connection));
443                                                 }
444                                                 else
445                                                 {
446                                                         pg_get_result($aDBInstances[$i]->connection);
447                                                         $bAnyBusy = true;
448                                                 }
449                                         }
450                                 }
451                                 sleep(1);
452                                 echo '.';
453                         }
454                         echo "\n";
455                         if ($j == 0)  //for the second round with osmline
456                         {
457                                 echo 'Inserting from place to osmline.';
458                                 $aQueries = $aQueriesOsmline;
459                         }
460                 }
461                 
462                 echo "Reanalysing database...\n";
463                 pgsqlRunScript('ANALYSE');
464         }
465
466         if ($aCMDResult['import-tiger-data'])
467         {
468                 $bDidSomething = true;
469
470                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_start.sql');
471                 $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
472                 $sTemplate = replace_tablespace('{ts:aux-data}',
473                                                 CONST_Tablespace_Aux_Data, $sTemplate);
474                 $sTemplate = replace_tablespace('{ts:aux-index}',
475                                                 CONST_Tablespace_Aux_Index, $sTemplate);
476                 pgsqlRunScript($sTemplate, false);
477
478                 $aDBInstances = array();
479                 for($i = 0; $i < $iInstances; $i++)
480                 {
481                         $aDBInstances[$i] =& getDB(true);
482                 }
483
484                 foreach(glob(CONST_Tiger_Data_Path.'/*.sql') as $sFile)
485                 {
486                         echo $sFile.': ';
487                         $hFile = fopen($sFile, "r");
488                         $sSQL = fgets($hFile, 100000);
489                         $iLines = 0;
490
491                         while(true)
492                         {
493                                 for($i = 0; $i < $iInstances; $i++)
494                                 {
495                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
496                                         {
497                                                 while(pg_get_result($aDBInstances[$i]->connection));
498                                                 $sSQL = fgets($hFile, 100000);
499                                                 if (!$sSQL) break 2;
500                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
501                                                 $iLines++;
502                                                 if ($iLines == 1000)
503                                                 {
504                                                         echo ".";
505                                                         $iLines = 0;
506                                                 }
507                                         }
508                                 }
509                                 usleep(10);
510                         }
511
512                         fclose($hFile);
513
514                         $bAnyBusy = true;
515                         while($bAnyBusy)
516                         {
517                                 $bAnyBusy = false;
518                                 for($i = 0; $i < $iInstances; $i++)
519                                 {
520                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
521                                 }
522                                 usleep(10);
523                         }
524                         echo "\n";
525                 }
526
527                 echo "Creating indexes\n";
528                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_finish.sql');
529                 $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
530                 $sTemplate = replace_tablespace('{ts:aux-data}',
531                                                 CONST_Tablespace_Aux_Data, $sTemplate);
532                 $sTemplate = replace_tablespace('{ts:aux-index}',
533                                                 CONST_Tablespace_Aux_Index, $sTemplate);
534                 pgsqlRunScript($sTemplate, false);
535         }
536
537         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
538         {
539                 $bDidSomething = true;
540                 $oDB =& getDB();
541                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
542                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
543                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,";
544                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,";
545                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
546                 $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x";
547                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
548
549                 if (CONST_Use_Extra_US_Postcodes)
550                 {
551                         $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
552                         $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
553                         $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
554                         if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
555                 }
556         }
557
558         if ($aCMDResult['osmosis-init'] || ($aCMDResult['all'] && !$aCMDResult['drop'])) // no use doing osmosis-init when dropping update tables
559         {
560                 $bDidSomething = true;
561                 $oDB =& getDB();
562
563                 if (!file_exists(CONST_Osmosis_Binary))
564                 {
565                         echo "Please download osmosis.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
566                         if (!$aCMDResult['all'])
567                         {
568                                 fail("osmosis not found in '".CONST_Osmosis_Binary."'");
569                         }
570                 }
571                 else
572                 {
573                         if (file_exists(CONST_InstallPath.'/settings/configuration.txt'))
574                         {
575                                 echo "settings/configuration.txt already exists\n";
576                         }
577                         else
578                         {
579                                 passthru(CONST_Osmosis_Binary.' --read-replication-interval-init '.CONST_InstallPath.'/settings');
580                                 // update osmosis configuration.txt with our settings
581                                 passthru("sed -i 's!baseUrl=.*!baseUrl=".CONST_Replication_Url."!' ".CONST_InstallPath.'/settings/configuration.txt');
582                                 passthru("sed -i 's:maxInterval = .*:maxInterval = ".CONST_Replication_MaxInterval.":' ".CONST_InstallPath.'/settings/configuration.txt');
583                         }
584
585                         // Find the last node in the DB
586                         $iLastOSMID = $oDB->getOne("select max(osm_id) from place where osm_type = 'N'");
587
588                         // Lookup the timestamp that node was created (less 3 hours for margin for changsets to be closed)
589                         $sLastNodeURL = 'http://www.openstreetmap.org/api/0.6/node/'.$iLastOSMID."/1";
590                         $sLastNodeXML = file_get_contents($sLastNodeURL);
591                         preg_match('#timestamp="(([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})Z)"#', $sLastNodeXML, $aLastNodeDate);
592                         $iLastNodeTimestamp = strtotime($aLastNodeDate[1]) - (3*60*60);
593
594                         // Search for the correct state file - uses file timestamps so need to sort by date descending
595                         $sRepURL = CONST_Replication_Url."/";
596                         $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1");
597                         // download.geofabrik.de:    <a href="000/">000/</a></td><td align="right">26-Feb-2013 11:53  </td>
598                         // planet.openstreetmap.org: <a href="273/">273/</a>                    2013-03-11 07:41    -
599                         preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a>\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER);
600                         if ($aRepMatches)
601                         {
602                                 $aPrevRepMatch = false;
603                                 foreach($aRepMatches as $aRepMatch)
604                                 {
605                                         if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
606                                         $aPrevRepMatch = $aRepMatch;
607                                 }
608                                 if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
609
610                                 $sRepURL .= $aRepMatch[1];
611                                 $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1");
612                                 preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a>\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER);
613                                 $aPrevRepMatch = false;
614                                 foreach($aRepMatches as $aRepMatch)
615                                 {
616                                         if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
617                                         $aPrevRepMatch = $aRepMatch;
618                                 }
619                                 if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
620
621                                 $sRepURL .= $aRepMatch[1];
622                                 $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1");
623                                 preg_match_all('#<a href="[0-9]{3}.state.txt">([0-9]{3}).state.txt</a>\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER);
624                                 $aPrevRepMatch = false;
625                                 foreach($aRepMatches as $aRepMatch)
626                                 {
627                                         if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
628                                         $aPrevRepMatch = $aRepMatch;
629                                 }
630                                 if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
631
632                                 $sRepURL .= $aRepMatch[1].'.state.txt';
633                                 echo "Getting state file: $sRepURL\n";
634                                 $sStateFile = file_get_contents($sRepURL);
635                                 if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
636                                 file_put_contents(CONST_InstallPath.'/settings/state.txt', $sStateFile);
637                                 echo "Updating DB status\n";
638                                 pg_query($oDB->connection, 'TRUNCATE import_status');
639                                 $sSQL = "INSERT INTO import_status VALUES('".$aRepMatch[2]."')";
640                                 pg_query($oDB->connection, $sSQL);
641                         }
642                         else
643                         {
644                                 if (!$aCMDResult['all'])
645                                 {
646                                         fail("Cannot read state file directory.");
647                                 }
648                         }
649                 }
650         }
651
652         if ($aCMDResult['index'] || $aCMDResult['all'])
653         {
654                 $bDidSomething = true;
655                 $sOutputFile = '';
656                 $sBaseCmd = CONST_InstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$iInstances.$sOutputFile;
657                 passthruCheckReturn($sBaseCmd.' -R 4');
658                 if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE');
659                 passthruCheckReturn($sBaseCmd.' -r 5 -R 25');
660                 if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE');
661                 passthruCheckReturn($sBaseCmd.' -r 26');
662         }
663
664         if ($aCMDResult['create-search-indices'] || $aCMDResult['all'])
665         {
666                 echo "Search indices\n";
667                 $bDidSomething = true;
668
669                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/indices.src.sql');
670                 $sTemplate = replace_tablespace('{ts:address-index}',
671                                                 CONST_Tablespace_Address_Index, $sTemplate);
672                 $sTemplate = replace_tablespace('{ts:search-index}',
673                                                 CONST_Tablespace_Search_Index, $sTemplate);
674                 $sTemplate = replace_tablespace('{ts:aux-index}',
675                                                 CONST_Tablespace_Aux_Index, $sTemplate);
676
677                 pgsqlRunScript($sTemplate);
678         }
679
680         if (isset($aCMDResult['create-website']))
681         {
682                 $bDidSomething = true;
683                 $sTargetDir = $aCMDResult['create-website'];
684                 if (!is_dir($sTargetDir))
685                 {
686                         echo "You must create the website directory before calling this function.\n";
687                         fail("Target directory does not exist.");
688                 }
689
690                 @symlink(CONST_InstallPath.'/website/details.php', $sTargetDir.'/details.php');
691                 @symlink(CONST_InstallPath.'/website/reverse.php', $sTargetDir.'/reverse.php');
692                 @symlink(CONST_InstallPath.'/website/search.php', $sTargetDir.'/search.php');
693                 @symlink(CONST_InstallPath.'/website/search.php', $sTargetDir.'/index.php');
694                 @symlink(CONST_InstallPath.'/website/lookup.php', $sTargetDir.'/lookup.php');
695                 @symlink(CONST_InstallPath.'/website/deletable.php', $sTargetDir.'/deletable.php');
696                 @symlink(CONST_InstallPath.'/website/polygons.php', $sTargetDir.'/polygons.php');
697                 @symlink(CONST_InstallPath.'/website/status.php', $sTargetDir.'/status.php');
698                 @symlink(CONST_BasePath.'/website/images', $sTargetDir.'/images');
699                 @symlink(CONST_BasePath.'/website/js', $sTargetDir.'/js');
700                 @symlink(CONST_BasePath.'/website/css', $sTargetDir.'/css');
701                 echo "Symlinks created\n";
702
703                 $sTestFile = @file_get_contents(CONST_Website_BaseURL.'js/tiles.js');
704                 if (!$sTestFile)
705                 {
706                         echo "\nWARNING: Unable to access the website at ".CONST_Website_BaseURL."\n";
707                         echo "You may want to update settings/local.php with @define('CONST_Website_BaseURL', 'http://[HOST]/[PATH]/');\n";
708                 }
709         }
710
711         if ($aCMDResult['drop'])
712         {
713                 // The implementation is potentially a bit dangerous because it uses
714                 // a positive selection of tables to keep, and deletes everything else.
715                 // Including any tables that the unsuspecting user might have manually
716                 // created. USE AT YOUR OWN PERIL.
717                 $bDidSomething = true;
718
719                 // tables we want to keep. everything else goes.
720                 $aKeepTables = array(
721                    "*columns",
722                    "import_polygon_*",
723                    "import_status",
724                    "place_addressline",
725                    "location_property*",
726                    "placex",
727                    "search_name",
728                    "seq_*",
729                    "word",
730                    "query_log",
731                    "new_query_log",
732                    "gb_postcode",
733                    "spatial_ref_sys",
734                    "country_name",
735                    "place_classtype_*"
736                 );
737
738                 $oDB =& getDB();
739                 $aDropTables = array();
740                 $aHaveTables = $oDB->getCol("SELECT tablename FROM pg_tables WHERE schemaname='public'");
741                 if (PEAR::isError($aHaveTables))
742                 {
743                         fail($aPartitions->getMessage());
744                 }
745                 foreach($aHaveTables as $sTable)
746                 {
747                         $bFound = false;
748                         foreach ($aKeepTables as $sKeep)
749                         {
750                                 if (fnmatch($sKeep, $sTable))
751                                 {
752                                         $bFound = true;
753                                         break;
754                                 }
755                         }
756                         if (!$bFound) array_push($aDropTables, $sTable);
757                 }
758
759                 foreach ($aDropTables as $sDrop)
760                 {
761                         if ($aCMDResult['verbose']) echo "dropping table $sDrop\n";
762                         @pg_query($oDB->connection, "DROP TABLE $sDrop CASCADE");
763                         // ignore warnings/errors as they might be caused by a table having
764                         // been deleted already by CASCADE
765                 }
766
767                 if (!is_null(CONST_Osm2pgsql_Flatnode_File))
768                 {
769                         if ($aCMDResult['verbose']) echo "deleting ".CONST_Osm2pgsql_Flatnode_File."\n";
770                         unlink(CONST_Osm2pgsql_Flatnode_File);
771                 }
772         }
773
774         if (!$bDidSomething)
775         {
776                 showUsage($aCMDOptions, true);
777         }
778         else
779         {
780                 echo "Setup finished.\n";
781         }
782
783         function pgsqlRunScriptFile($sFilename)
784         {
785                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
786
787                 // Convert database DSN to psql parameters
788                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
789                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
790                 $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
791
792                 $ahGzipPipes = null;
793                 if (preg_match('/\\.gz$/', $sFilename))
794                 {
795                         $aDescriptors = array(
796                                 0 => array('pipe', 'r'),
797                                 1 => array('pipe', 'w'),
798                                 2 => array('file', '/dev/null', 'a')
799                         );
800                         $hGzipProcess = proc_open('zcat '.$sFilename, $aDescriptors, $ahGzipPipes);
801                         if (!is_resource($hGzipProcess)) fail('unable to start zcat');
802                         $aReadPipe = $ahGzipPipes[1];
803                         fclose($ahGzipPipes[0]);
804                 }
805                 else
806                 {
807                         $sCMD .= ' -f '.$sFilename;
808                         $aReadPipe = array('pipe', 'r');
809                 }
810
811                 $aDescriptors = array(
812                         0 => $aReadPipe,
813                         1 => array('pipe', 'w'),
814                         2 => array('file', '/dev/null', 'a')
815                 );
816                 $ahPipes = null;
817                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
818                 if (!is_resource($hProcess)) fail('unable to start pgsql');
819
820
821                 // TODO: error checking
822                 while(!feof($ahPipes[1]))
823                 {
824                         echo fread($ahPipes[1], 4096);
825                 }
826                 fclose($ahPipes[1]);
827
828                 $iReturn = proc_close($hProcess);
829                 if ($iReturn > 0)
830                 {
831                         fail("pgsql returned with error code ($iReturn)");
832                 }
833                 if ($ahGzipPipes)
834                 {
835                         fclose($ahGzipPipes[1]);
836                         proc_close($hGzipProcess);
837                 }
838
839         }
840
841         function pgsqlRunScript($sScript, $bfatal = true)
842         {
843                 global $aCMDResult;
844                 // Convert database DSN to psql parameters
845                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
846                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
847                 $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
848                 if ($bfatal && !$aCMDResult['ignore-errors'])
849                         $sCMD .= ' -v ON_ERROR_STOP=1';
850                 $aDescriptors = array(
851                         0 => array('pipe', 'r'),
852                         1 => STDOUT, 
853                         2 => STDERR
854                 );
855                 $ahPipes = null;
856                 $hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes);
857                 if (!is_resource($hProcess)) fail('unable to start pgsql');
858
859                 while(strlen($sScript))
860                 {
861                         $written = fwrite($ahPipes[0], $sScript);
862                         if ($written <= 0) break;
863                         $sScript = substr($sScript, $written);
864                 }
865                 fclose($ahPipes[0]);
866                 $iReturn = proc_close($hProcess);
867                 if ($bfatal && $iReturn > 0)
868                 {
869                         fail("pgsql returned with error code ($iReturn)");
870                 }
871         }
872
873         function pgsqlRunRestoreData($sDumpFile)
874         {
875                 // Convert database DSN to psql parameters
876                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
877                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
878                 $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc -a '.$sDumpFile;
879
880                 $aDescriptors = array(
881                         0 => array('pipe', 'r'),
882                         1 => array('pipe', 'w'),
883                         2 => array('file', '/dev/null', 'a')
884                 );
885                 $ahPipes = null;
886                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
887                 if (!is_resource($hProcess)) fail('unable to start pg_restore');
888
889                 fclose($ahPipes[0]);
890
891                 // TODO: error checking
892                 while(!feof($ahPipes[1]))
893                 {
894                         echo fread($ahPipes[1], 4096);
895                 }
896                 fclose($ahPipes[1]);
897
898                 $iReturn = proc_close($hProcess);
899         }
900
901         function pgsqlRunDropAndRestore($sDumpFile)
902         {
903                 // Convert database DSN to psql parameters
904                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
905                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
906                 $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc --clean '.$sDumpFile;
907
908                 $aDescriptors = array(
909                         0 => array('pipe', 'r'),
910                         1 => array('pipe', 'w'),
911                         2 => array('file', '/dev/null', 'a')
912                 );
913                 $ahPipes = null;
914                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
915                 if (!is_resource($hProcess)) fail('unable to start pg_restore');
916
917                 fclose($ahPipes[0]);
918
919                 // TODO: error checking
920                 while(!feof($ahPipes[1]))
921                 {
922                         echo fread($ahPipes[1], 4096);
923                 }
924                 fclose($ahPipes[1]);
925
926                 $iReturn = proc_close($hProcess);
927         }
928
929         function passthruCheckReturn($cmd)
930         {
931                 $result = -1;
932                 passthru($cmd, $result);
933                 if ($result != 0) fail('Error executing external command: '.$cmd);
934         }
935
936         function replace_tablespace($sTemplate, $sTablespace, $sSql)
937         {
938                 if ($sTablespace)
939                         $sSql = str_replace($sTemplate, 'TABLESPACE "'.$sTablespace.'"',
940                                             $sSql);
941                 else
942                         $sSql = str_replace($sTemplate, '', $sSql);
943
944                 return $sSql;
945         }
946
947         function create_sql_functions($aCMDResult)
948         {
949                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
950                 $sTemplate = str_replace('{modulepath}', CONST_InstallPath.'/module', $sTemplate);
951                 if ($aCMDResult['enable-diff-updates'])
952                 {
953                         $sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate);
954                 }
955                 if ($aCMDResult['enable-debug-statements'])
956                 {
957                         $sTemplate = str_replace('--DEBUG:', '', $sTemplate);
958                 }
959                 if (CONST_Limit_Reindexing)
960                 {
961                         $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate);
962                 }
963                 if (!CONST_Use_US_Tiger_Data)
964                 {
965                         $sTemplate = str_replace('-- %NOTIGERDATA% ', '', $sTemplate);
966                 }
967                 if (!CONST_Use_Aux_Location_data)
968                 {
969                         $sTemplate = str_replace('-- %NOAUXDATA% ', '', $sTemplate);
970                 }
971                 pgsqlRunScript($sTemplate);
972
973         }
974