]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
94c7edb5209d13fc3d6d7d1f7e6c365a0ddb0641
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
5         require_once(CONST_BasePath.'/lib/init-cmd.php');
6         ini_set('memory_limit', '800M');
7
8         $aCMDOptions = array(
9                 "Create and setup nominatim search system",
10                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
11                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
12                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
13
14                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
15                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
16
17                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
18
19                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
20                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
21                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
22                 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
23                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
24                 array('enable-diff-updates', '', 0, 1, 0, 0, 'bool', 'Turn on the code required to make diff updates work'),
25                 array('enable-debug-statements', '', 0, 1, 0, 0, 'bool', 'Include debug warning statements in pgsql commands'),
26                 array('ignore-errors', '', 0, 1, 0, 0, 'bool', 'Continue import even when errors in SQL are present (EXPERT)'),
27                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
28                 array('create-partition-tables', '', 0, 1, 0, 0, 'bool', 'Create required partition tables'),
29                 array('create-partition-functions', '', 0, 1, 0, 0, 'bool', 'Create required partition triggers'),
30                 array('no-partitions', '', 0, 1, 0, 0, 'bool', "Do not partition search indices (speeds up import of single country extracts)"),
31                 array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'),
32                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
33                 array('disable-token-precalc', '', 0, 1, 0, 0, 'bool', 'Disable name precalculation (EXPERT)'),
34                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
35                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
36                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
37                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
38                 array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'),
39                 array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
40                 array('drop', '', 0, 1, 0, 0, 'bool', 'Drop tables needed for updates, making the database readonly (EXPERIMENTAL)'),
41         );
42         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
43
44         $bDidSomething = false;
45
46         // Check if osm-file is set and points to a valid file if --all or --import-data is given
47         if ($aCMDResult['import-data'] || $aCMDResult['all'])
48         {
49                 if (!isset($aCMDResult['osm-file']))
50                 {
51                         fail('missing --osm-file for data import');
52                 }
53
54                 if (!file_exists($aCMDResult['osm-file']))
55                 {
56                         fail('the path supplied to --osm-file does not exist');
57                 }
58
59                 if (!is_readable($aCMDResult['osm-file']))
60                 {
61                         fail('osm-file "'.$aCMDResult['osm-file'].'" not readable');
62                 }
63         }
64
65
66         // This is a pretty hard core default - the number of processors in the box - 1
67         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
68         if ($iInstances < 1)
69         {
70                 $iInstances = 1;
71                 echo "WARNING: resetting threads to $iInstances\n";
72         }
73         if ($iInstances > getProcessorCount())
74         {
75                 $iInstances = getProcessorCount();
76                 echo "WARNING: resetting threads to $iInstances\n";
77         }
78
79         // Assume we can steal all the cache memory in the box (unless told otherwise)
80         if (isset($aCMDResult['osm2pgsql-cache']))
81         {
82                 $iCacheMemory = $aCMDResult['osm2pgsql-cache'];
83         }
84         else
85         {
86                 $iCacheMemory = getCacheMemoryMB();
87         }
88
89         $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
90         if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
91
92         if ($aCMDResult['create-db'] || $aCMDResult['all'])
93         {
94                 echo "Create DB\n";
95                 $bDidSomething = true;
96                 $oDB = DB::connect(CONST_Database_DSN, false);
97                 if (!PEAR::isError($oDB))
98                 {
99                         fail('database already exists ('.CONST_Database_DSN.')');
100                 }
101                 passthruCheckReturn('createdb -E UTF-8 -p '.$aDSNInfo['port'].' '.$aDSNInfo['database']);
102         }
103
104         if ($aCMDResult['setup-db'] || $aCMDResult['all'])
105         {
106                 echo "Setup DB\n";
107                 $bDidSomething = true;
108                 // TODO: path detection, detection memory, etc.
109
110                 $oDB =& getDB();
111
112                 $fPostgresVersion = getPostgresVersion($oDB);
113                 echo 'Postgres version found: '.$fPostgresVersion."\n";
114
115                 if ($fPostgresVersion < 9.1)
116                 {
117                         fail("Minimum supported version of Postgresql is 9.1.");
118                 }
119
120                 pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS hstore');
121                 pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS postgis');
122
123                 // For extratags and namedetails the hstore_to_json converter is
124                 // needed which is only available from Postgresql 9.3+. For older
125                 // versions add a dummy function that returns nothing.
126                 $iNumFunc = chksql($oDB->getOne("select count(*) from pg_proc where proname = 'hstore_to_json'"));
127
128                 if ($iNumFunc == 0)
129                 {
130                         pgsqlRunScript("create function hstore_to_json(dummy hstore) returns text AS 'select null::text' language sql immutable");
131                         echo "WARNING: Postgresql is too old. extratags and namedetails API not available.";
132                 }
133
134                 $fPostgisVersion = getPostgisVersion($oDB);
135                 echo 'Postgis version found: '.$fPostgisVersion."\n";
136
137                 if ($fPostgisVersion < 2.1)
138                 {
139                         // Function was renamed in 2.1 and throws an annoying deprecation warning
140                         pgsqlRunScript('ALTER FUNCTION st_line_interpolate_point(geometry, double precision) RENAME TO ST_LineInterpolatePoint');
141                 }
142
143                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
144                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
145                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
146                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_table.sql');
147                 if (file_exists(CONST_BasePath.'/data/gb_postcode_data.sql.gz'))
148                 {
149                         pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_data.sql.gz');
150                 }
151                 else
152                 {
153                         echo "WARNING: external UK postcode table not found.\n";
154                 }
155                 if (CONST_Use_Extra_US_Postcodes)
156                 {
157                         pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
158                 }
159
160                 if ($aCMDResult['no-partitions'])
161                 {
162                         pgsqlRunScript('update country_name set partition = 0');
163                 }
164
165                 // the following will be needed by create_functions later but
166                 // is only defined in the subsequently called create_tables.
167                 // Create dummies here that will be overwritten by the proper
168                 // versions in create-tables.
169                 pgsqlRunScript('CREATE TABLE place_boundingbox ()');
170                 pgsqlRunScript('create type wikipedia_article_match as ()');
171         }
172
173         if ($aCMDResult['import-data'] || $aCMDResult['all'])
174         {
175                 echo "Import\n";
176                 $bDidSomething = true;
177
178                 $osm2pgsql = CONST_Osm2pgsql_Binary;
179                 if (!file_exists($osm2pgsql))
180                 {
181                         echo "Please download and build osm2pgsql.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
182                         fail("osm2pgsql not found in '$osm2pgsql'");
183                 }
184
185                 if (!is_null(CONST_Osm2pgsql_Flatnode_File))
186                 {
187                         $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
188                 }
189                 if (CONST_Tablespace_Osm2pgsql_Data)
190                         $osm2pgsql .= ' --tablespace-slim-data '.CONST_Tablespace_Osm2pgsql_Data;
191                 if (CONST_Tablespace_Osm2pgsql_Index)
192                         $osm2pgsql .= ' --tablespace-slim-index '.CONST_Tablespace_Osm2pgsql_Index;
193                 if (CONST_Tablespace_Place_Data)
194                         $osm2pgsql .= ' --tablespace-main-data '.CONST_Tablespace_Place_Data;
195                 if (CONST_Tablespace_Place_Index)
196                         $osm2pgsql .= ' --tablespace-main-index '.CONST_Tablespace_Place_Index;
197                 $osm2pgsql .= ' -lsc -O gazetteer --hstore --number-processes 1';
198                 $osm2pgsql .= ' -C '.$iCacheMemory;
199                 $osm2pgsql .= ' -P '.$aDSNInfo['port'];
200                 $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file'];
201                 passthruCheckReturn($osm2pgsql);
202
203                 $oDB =& getDB();
204                 if (!chksql($oDB->getRow('select * from place limit 1')))
205                 {
206                         fail('No Data');
207                 }
208         }
209
210         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
211         {
212                 echo "Functions\n";
213                 $bDidSomething = true;
214                 if (!file_exists(CONST_InstallPath.'/module/nominatim.so')) fail("nominatim module not built");
215                 create_sql_functions($aCMDResult);
216         }
217
218         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
219         {
220                 $bDidSomething = true;
221
222                 echo "Tables\n";
223                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/tables.sql');
224                 $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
225                 $sTemplate = replace_tablespace('{ts:address-data}',
226                                                 CONST_Tablespace_Address_Data, $sTemplate);
227                 $sTemplate = replace_tablespace('{ts:address-index}',
228                                                 CONST_Tablespace_Address_Index, $sTemplate);
229                 $sTemplate = replace_tablespace('{ts:search-data}',
230                                                 CONST_Tablespace_Search_Data, $sTemplate);
231                 $sTemplate = replace_tablespace('{ts:search-index}',
232                                                 CONST_Tablespace_Search_Index, $sTemplate);
233                 $sTemplate = replace_tablespace('{ts:aux-data}',
234                                                 CONST_Tablespace_Aux_Data, $sTemplate);
235                 $sTemplate = replace_tablespace('{ts:aux-index}',
236                                                 CONST_Tablespace_Aux_Index, $sTemplate);
237                 pgsqlRunScript($sTemplate, false);
238
239                 // re-run the functions
240                 echo "Functions\n";
241                 create_sql_functions($aCMDResult);
242         }
243
244         if ($aCMDResult['create-partition-tables'] || $aCMDResult['all'])
245         {
246                 echo "Partition Tables\n";
247                 $bDidSomething = true;
248
249                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-tables.src.sql');
250                 $sTemplate = replace_tablespace('{ts:address-data}',
251                                                 CONST_Tablespace_Address_Data, $sTemplate);
252                 $sTemplate = replace_tablespace('{ts:address-index}',
253                                                 CONST_Tablespace_Address_Index, $sTemplate);
254                 $sTemplate = replace_tablespace('{ts:search-data}',
255                                                 CONST_Tablespace_Search_Data, $sTemplate);
256                 $sTemplate = replace_tablespace('{ts:search-index}',
257                                                 CONST_Tablespace_Search_Index, $sTemplate);
258                 $sTemplate = replace_tablespace('{ts:aux-data}',
259                                                 CONST_Tablespace_Aux_Data, $sTemplate);
260                 $sTemplate = replace_tablespace('{ts:aux-index}',
261                                                 CONST_Tablespace_Aux_Index, $sTemplate);
262
263                 pgsqlRunPartitionScript($sTemplate);
264         }
265
266
267         if ($aCMDResult['create-partition-functions'] || $aCMDResult['all'])
268         {
269                 echo "Partition Functions\n";
270                 $bDidSomething = true;
271
272                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-functions.src.sql');
273
274                 pgsqlRunPartitionScript($sTemplate);
275         }
276
277         if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all'])
278         {
279                 $bDidSomething = true;
280                 $sWikiArticlesFile = CONST_BasePath.'/data/wikipedia_article.sql.bin';
281                 $sWikiRedirectsFile = CONST_BasePath.'/data/wikipedia_redirect.sql.bin';
282                 if (file_exists($sWikiArticlesFile))
283                 {
284                         echo "Importing wikipedia articles...";
285                         pgsqlRunDropAndRestore($sWikiArticlesFile);
286                         echo "...done\n";
287                 }
288                 else
289                 {
290                         echo "WARNING: wikipedia article dump file not found - places will have default importance\n";
291                 }
292                 if (file_exists($sWikiRedirectsFile))
293                 {
294                         echo "Importing wikipedia redirects...";
295                         pgsqlRunDropAndRestore($sWikiRedirectsFile);
296                         echo "...done\n";
297                 }
298                 else
299                 {
300                         echo "WARNING: wikipedia redirect dump file not found - some place importance values may be missing\n";
301                 }
302         }
303
304
305         if ($aCMDResult['load-data'] || $aCMDResult['all'])
306         {
307                 echo "Drop old Data\n";
308                 $bDidSomething = true;
309
310                 $oDB =& getDB();
311                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
312                 echo '.';
313                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
314                 echo '.';
315                 if (!pg_query($oDB->connection, 'TRUNCATE location_property_osmline')) fail(pg_last_error($oDB->connection));
316                 echo '.';
317                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
318                 echo '.';
319                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
320                 echo '.';
321                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
322                 echo '.';
323                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
324                 echo '.';
325                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
326                 echo '.';
327                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
328                 echo '.';
329                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
330                 echo '.';
331
332                 $sSQL = 'select distinct partition from country_name';
333                 $aPartitions = chksql($oDB->getCol($sSQL));
334                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
335                 foreach($aPartitions as $sPartition)
336                 {
337                         if (!pg_query($oDB->connection, 'TRUNCATE location_road_'.$sPartition)) fail(pg_last_error($oDB->connection));
338                         echo '.';
339                 }
340
341                 // used by getorcreate_word_id to ignore frequent partial words
342                 if (!pg_query($oDB->connection, 'CREATE OR REPLACE FUNCTION get_maxwordfreq() RETURNS integer AS $$ SELECT '.CONST_Max_Word_Frequency.' as maxwordfreq; $$ LANGUAGE SQL IMMUTABLE')) fail(pg_last_error($oDB->connection));
343                 echo ".\n";
344
345                 // pre-create the word list
346                 if (!$aCMDResult['disable-token-precalc'])
347                 {
348                         echo "Loading word list\n";
349                         pgsqlRunScriptFile(CONST_BasePath.'/data/words.sql');
350                 }
351
352                 echo "Load Data\n";
353                 $aDBInstances = array();
354                 $iLoadThreads = max(1, $iInstances - 1);
355                 for($i = 0; $i < $iLoadThreads; $i++)
356                 {
357                         $aDBInstances[$i] =& getDB(true);
358                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
359                         $sSQL .= 'housenumber, street, addr_place, isin, postcode, country_code, extratags, ';
360                         $sSQL .= 'geometry) select * from place where osm_id % '.$iLoadThreads.' = '.$i;
361                         $sSQL .= " and not (class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString')";
362                         if ($aCMDResult['verbose']) echo "$sSQL\n";
363                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
364                 }
365                 // last thread for interpolation lines
366                 $aDBInstances[$iLoadThreads] =& getDB(true);
367                 $sSQL = 'select insert_osmline (osm_id, housenumber, street, addr_place, postcode, country_code, ';
368                 $sSQL .= 'geometry) from place where ';
369                 $sSQL .= "class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString'";
370                 if ($aCMDResult['verbose']) echo "$sSQL\n";
371                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
372
373                 $bAnyBusy = true;
374                 while($bAnyBusy)
375                 {
376                         $bAnyBusy = false;
377                         for($i = 0; $i <= $iLoadThreads; $i++)
378                         {
379                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
380                         }
381                         sleep(1);
382                         echo '.';
383                 }
384                 echo "\n";
385                 echo "Reanalysing database...\n";
386                 pgsqlRunScript('ANALYSE');
387         }
388
389         if ($aCMDResult['import-tiger-data'])
390         {
391                 $bDidSomething = true;
392
393                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_start.sql');
394                 $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
395                 $sTemplate = replace_tablespace('{ts:aux-data}',
396                                                 CONST_Tablespace_Aux_Data, $sTemplate);
397                 $sTemplate = replace_tablespace('{ts:aux-index}',
398                                                 CONST_Tablespace_Aux_Index, $sTemplate);
399                 pgsqlRunScript($sTemplate, false);
400
401                 $aDBInstances = array();
402                 for($i = 0; $i < $iInstances; $i++)
403                 {
404                         $aDBInstances[$i] =& getDB(true);
405                 }
406
407                 foreach(glob(CONST_Tiger_Data_Path.'/*.sql') as $sFile)
408                 {
409                         echo $sFile.': ';
410                         $hFile = fopen($sFile, "r");
411                         $sSQL = fgets($hFile, 100000);
412                         $iLines = 0;
413
414                         while(true)
415                         {
416                                 for($i = 0; $i < $iInstances; $i++)
417                                 {
418                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
419                                         {
420                                                 while(pg_get_result($aDBInstances[$i]->connection));
421                                                 $sSQL = fgets($hFile, 100000);
422                                                 if (!$sSQL) break 2;
423                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
424                                                 $iLines++;
425                                                 if ($iLines == 1000)
426                                                 {
427                                                         echo ".";
428                                                         $iLines = 0;
429                                                 }
430                                         }
431                                 }
432                                 usleep(10);
433                         }
434
435                         fclose($hFile);
436
437                         $bAnyBusy = true;
438                         while($bAnyBusy)
439                         {
440                                 $bAnyBusy = false;
441                                 for($i = 0; $i < $iInstances; $i++)
442                                 {
443                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
444                                 }
445                                 usleep(10);
446                         }
447                         echo "\n";
448                 }
449
450                 echo "Creating indexes\n";
451                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_finish.sql');
452                 $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
453                 $sTemplate = replace_tablespace('{ts:aux-data}',
454                                                 CONST_Tablespace_Aux_Data, $sTemplate);
455                 $sTemplate = replace_tablespace('{ts:aux-index}',
456                                                 CONST_Tablespace_Aux_Index, $sTemplate);
457                 pgsqlRunScript($sTemplate, false);
458         }
459
460         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
461         {
462                 $bDidSomething = true;
463                 $oDB =& getDB();
464                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
465                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
466                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,";
467                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,";
468                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
469                 $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x";
470                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
471
472                 if (CONST_Use_Extra_US_Postcodes)
473                 {
474                         $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
475                         $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
476                         $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
477                         if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
478                 }
479         }
480
481         if ($aCMDResult['osmosis-init'] || ($aCMDResult['all'] && !$aCMDResult['drop'])) // no use doing osmosis-init when dropping update tables
482         {
483                 $bDidSomething = true;
484                 $oDB =& getDB();
485
486                 if (!file_exists(CONST_Osmosis_Binary))
487                 {
488                         echo "Please download osmosis.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
489                         if (!$aCMDResult['all'])
490                         {
491                                 fail("osmosis not found in '".CONST_Osmosis_Binary."'");
492                         }
493                 }
494                 else
495                 {
496                         if (file_exists(CONST_InstallPath.'/settings/configuration.txt'))
497                         {
498                                 echo "settings/configuration.txt already exists\n";
499                         }
500                         else
501                         {
502                                 passthru(CONST_Osmosis_Binary.' --read-replication-interval-init '.CONST_InstallPath.'/settings');
503                                 // update osmosis configuration.txt with our settings
504                                 passthru("sed -i 's!baseUrl=.*!baseUrl=".CONST_Replication_Url."!' ".CONST_InstallPath.'/settings/configuration.txt');
505                                 passthru("sed -i 's:maxInterval = .*:maxInterval = ".CONST_Replication_MaxInterval.":' ".CONST_InstallPath.'/settings/configuration.txt');
506                         }
507
508                         // Find the last node in the DB
509                         $iLastOSMID = $oDB->getOne("select max(osm_id) from place where osm_type = 'N'");
510
511                         // Lookup the timestamp that node was created (less 3 hours for margin for changsets to be closed)
512                         $sLastNodeURL = 'http://www.openstreetmap.org/api/0.6/node/'.$iLastOSMID."/1";
513                         $sLastNodeXML = file_get_contents($sLastNodeURL);
514                         preg_match('#timestamp="(([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})Z)"#', $sLastNodeXML, $aLastNodeDate);
515                         $iLastNodeTimestamp = strtotime($aLastNodeDate[1]) - (3*60*60);
516
517                         // Search for the correct state file - uses file timestamps so need to sort by date descending
518                         $sRepURL = CONST_Replication_Url."/";
519                         $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1");
520                         // download.geofabrik.de:    <a href="000/">000/</a></td><td align="right">26-Feb-2013 11:53  </td>
521                         // planet.openstreetmap.org: <a href="273/">273/</a>                    2013-03-11 07:41    -
522                         preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a>\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER);
523                         if ($aRepMatches)
524                         {
525                                 $aPrevRepMatch = false;
526                                 foreach($aRepMatches as $aRepMatch)
527                                 {
528                                         if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
529                                         $aPrevRepMatch = $aRepMatch;
530                                 }
531                                 if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
532
533                                 $sRepURL .= $aRepMatch[1];
534                                 $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1");
535                                 preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a>\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER);
536                                 $aPrevRepMatch = false;
537                                 foreach($aRepMatches as $aRepMatch)
538                                 {
539                                         if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
540                                         $aPrevRepMatch = $aRepMatch;
541                                 }
542                                 if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
543
544                                 $sRepURL .= $aRepMatch[1];
545                                 $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1");
546                                 preg_match_all('#<a href="[0-9]{3}.state.txt">([0-9]{3}).state.txt</a>\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER);
547                                 $aPrevRepMatch = false;
548                                 foreach($aRepMatches as $aRepMatch)
549                                 {
550                                         if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
551                                         $aPrevRepMatch = $aRepMatch;
552                                 }
553                                 if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
554
555                                 $sRepURL .= $aRepMatch[1].'.state.txt';
556                                 echo "Getting state file: $sRepURL\n";
557                                 $sStateFile = file_get_contents($sRepURL);
558                                 if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
559                                 file_put_contents(CONST_InstallPath.'/settings/state.txt', $sStateFile);
560                                 echo "Updating DB status\n";
561                                 pg_query($oDB->connection, 'TRUNCATE import_status');
562                                 $sSQL = "INSERT INTO import_status VALUES('".$aRepMatch[2]."')";
563                                 pg_query($oDB->connection, $sSQL);
564                         }
565                         else
566                         {
567                                 if (!$aCMDResult['all'])
568                                 {
569                                         fail("Cannot read state file directory.");
570                                 }
571                         }
572                 }
573         }
574
575         if ($aCMDResult['index'] || $aCMDResult['all'])
576         {
577                 $bDidSomething = true;
578                 $sOutputFile = '';
579                 $sBaseCmd = CONST_InstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$iInstances.$sOutputFile;
580                 passthruCheckReturn($sBaseCmd.' -R 4');
581                 if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE');
582                 passthruCheckReturn($sBaseCmd.' -r 5 -R 25');
583                 if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE');
584                 passthruCheckReturn($sBaseCmd.' -r 26');
585         }
586
587         if ($aCMDResult['create-search-indices'] || $aCMDResult['all'])
588         {
589                 echo "Search indices\n";
590                 $bDidSomething = true;
591
592                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/indices.src.sql');
593                 $sTemplate = replace_tablespace('{ts:address-index}',
594                                                 CONST_Tablespace_Address_Index, $sTemplate);
595                 $sTemplate = replace_tablespace('{ts:search-index}',
596                                                 CONST_Tablespace_Search_Index, $sTemplate);
597                 $sTemplate = replace_tablespace('{ts:aux-index}',
598                                                 CONST_Tablespace_Aux_Index, $sTemplate);
599
600                 pgsqlRunScript($sTemplate);
601         }
602
603         if ($aCMDResult['drop'])
604         {
605                 // The implementation is potentially a bit dangerous because it uses
606                 // a positive selection of tables to keep, and deletes everything else.
607                 // Including any tables that the unsuspecting user might have manually
608                 // created. USE AT YOUR OWN PERIL.
609                 $bDidSomething = true;
610
611                 // tables we want to keep. everything else goes.
612                 $aKeepTables = array(
613                    "*columns",
614                    "import_polygon_*",
615                    "import_status",
616                    "place_addressline",
617                    "location_property*",
618                    "placex",
619                    "search_name",
620                    "seq_*",
621                    "word",
622                    "query_log",
623                    "new_query_log",
624                    "gb_postcode",
625                    "spatial_ref_sys",
626                    "country_name",
627                    "place_classtype_*"
628                 );
629
630                 $oDB =& getDB();
631                 $aDropTables = array();
632                 $aHaveTables = chksql($oDB->getCol("SELECT tablename FROM pg_tables WHERE schemaname='public'"));
633
634                 foreach($aHaveTables as $sTable)
635                 {
636                         $bFound = false;
637                         foreach ($aKeepTables as $sKeep)
638                         {
639                                 if (fnmatch($sKeep, $sTable))
640                                 {
641                                         $bFound = true;
642                                         break;
643                                 }
644                         }
645                         if (!$bFound) array_push($aDropTables, $sTable);
646                 }
647
648                 foreach ($aDropTables as $sDrop)
649                 {
650                         if ($aCMDResult['verbose']) echo "dropping table $sDrop\n";
651                         @pg_query($oDB->connection, "DROP TABLE $sDrop CASCADE");
652                         // ignore warnings/errors as they might be caused by a table having
653                         // been deleted already by CASCADE
654                 }
655
656                 if (!is_null(CONST_Osm2pgsql_Flatnode_File))
657                 {
658                         if ($aCMDResult['verbose']) echo "deleting ".CONST_Osm2pgsql_Flatnode_File."\n";
659                         unlink(CONST_Osm2pgsql_Flatnode_File);
660                 }
661         }
662
663         if (!$bDidSomething)
664         {
665                 showUsage($aCMDOptions, true);
666         }
667         else
668         {
669                 echo "Setup finished.\n";
670         }
671
672         function pgsqlRunScriptFile($sFilename)
673         {
674                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
675
676                 // Convert database DSN to psql parameters
677                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
678                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
679                 $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
680
681                 $ahGzipPipes = null;
682                 if (preg_match('/\\.gz$/', $sFilename))
683                 {
684                         $aDescriptors = array(
685                                 0 => array('pipe', 'r'),
686                                 1 => array('pipe', 'w'),
687                                 2 => array('file', '/dev/null', 'a')
688                         );
689                         $hGzipProcess = proc_open('zcat '.$sFilename, $aDescriptors, $ahGzipPipes);
690                         if (!is_resource($hGzipProcess)) fail('unable to start zcat');
691                         $aReadPipe = $ahGzipPipes[1];
692                         fclose($ahGzipPipes[0]);
693                 }
694                 else
695                 {
696                         $sCMD .= ' -f '.$sFilename;
697                         $aReadPipe = array('pipe', 'r');
698                 }
699
700                 $aDescriptors = array(
701                         0 => $aReadPipe,
702                         1 => array('pipe', 'w'),
703                         2 => array('file', '/dev/null', 'a')
704                 );
705                 $ahPipes = null;
706                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
707                 if (!is_resource($hProcess)) fail('unable to start pgsql');
708
709
710                 // TODO: error checking
711                 while(!feof($ahPipes[1]))
712                 {
713                         echo fread($ahPipes[1], 4096);
714                 }
715                 fclose($ahPipes[1]);
716
717                 $iReturn = proc_close($hProcess);
718                 if ($iReturn > 0)
719                 {
720                         fail("pgsql returned with error code ($iReturn)");
721                 }
722                 if ($ahGzipPipes)
723                 {
724                         fclose($ahGzipPipes[1]);
725                         proc_close($hGzipProcess);
726                 }
727
728         }
729
730         function pgsqlRunScript($sScript, $bfatal = true)
731         {
732                 global $aCMDResult;
733                 // Convert database DSN to psql parameters
734                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
735                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
736                 $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
737                 if ($bfatal && !$aCMDResult['ignore-errors'])
738                         $sCMD .= ' -v ON_ERROR_STOP=1';
739                 $aDescriptors = array(
740                         0 => array('pipe', 'r'),
741                         1 => STDOUT, 
742                         2 => STDERR
743                 );
744                 $ahPipes = null;
745                 $hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes);
746                 if (!is_resource($hProcess)) fail('unable to start pgsql');
747
748                 while(strlen($sScript))
749                 {
750                         $written = fwrite($ahPipes[0], $sScript);
751                         if ($written <= 0) break;
752                         $sScript = substr($sScript, $written);
753                 }
754                 fclose($ahPipes[0]);
755                 $iReturn = proc_close($hProcess);
756                 if ($bfatal && $iReturn > 0)
757                 {
758                         fail("pgsql returned with error code ($iReturn)");
759                 }
760         }
761
762         function pgsqlRunPartitionScript($sTemplate)
763         {
764                 $oDB =& getDB();
765
766                 $sSQL = 'select distinct partition from country_name';
767                 $aPartitions = chksql($oDB->getCol($sSQL));
768                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
769
770                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
771                 foreach($aMatches as $aMatch)
772                 {
773                         $sResult = '';
774                         foreach($aPartitions as $sPartitionName)
775                         {
776                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
777                         }
778                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
779                 }
780
781                 pgsqlRunScript($sTemplate);
782         }
783
784         function pgsqlRunRestoreData($sDumpFile)
785         {
786                 // Convert database DSN to psql parameters
787                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
788                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
789                 $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc -a '.$sDumpFile;
790
791                 $aDescriptors = array(
792                         0 => array('pipe', 'r'),
793                         1 => array('pipe', 'w'),
794                         2 => array('file', '/dev/null', 'a')
795                 );
796                 $ahPipes = null;
797                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
798                 if (!is_resource($hProcess)) fail('unable to start pg_restore');
799
800                 fclose($ahPipes[0]);
801
802                 // TODO: error checking
803                 while(!feof($ahPipes[1]))
804                 {
805                         echo fread($ahPipes[1], 4096);
806                 }
807                 fclose($ahPipes[1]);
808
809                 $iReturn = proc_close($hProcess);
810         }
811
812         function pgsqlRunDropAndRestore($sDumpFile)
813         {
814                 // Convert database DSN to psql parameters
815                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
816                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
817                 $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc --clean '.$sDumpFile;
818
819                 $aDescriptors = array(
820                         0 => array('pipe', 'r'),
821                         1 => array('pipe', 'w'),
822                         2 => array('file', '/dev/null', 'a')
823                 );
824                 $ahPipes = null;
825                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
826                 if (!is_resource($hProcess)) fail('unable to start pg_restore');
827
828                 fclose($ahPipes[0]);
829
830                 // TODO: error checking
831                 while(!feof($ahPipes[1]))
832                 {
833                         echo fread($ahPipes[1], 4096);
834                 }
835                 fclose($ahPipes[1]);
836
837                 $iReturn = proc_close($hProcess);
838         }
839
840         function passthruCheckReturn($cmd)
841         {
842                 $result = -1;
843                 passthru($cmd, $result);
844                 if ($result != 0) fail('Error executing external command: '.$cmd);
845         }
846
847         function replace_tablespace($sTemplate, $sTablespace, $sSql)
848         {
849                 if ($sTablespace)
850                         $sSql = str_replace($sTemplate, 'TABLESPACE "'.$sTablespace.'"',
851                                             $sSql);
852                 else
853                         $sSql = str_replace($sTemplate, '', $sSql);
854
855                 return $sSql;
856         }
857
858         function create_sql_functions($aCMDResult)
859         {
860                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
861                 $sTemplate = str_replace('{modulepath}', CONST_InstallPath.'/module', $sTemplate);
862                 if ($aCMDResult['enable-diff-updates'])
863                 {
864                         $sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate);
865                 }
866                 if ($aCMDResult['enable-debug-statements'])
867                 {
868                         $sTemplate = str_replace('--DEBUG:', '', $sTemplate);
869                 }
870                 if (CONST_Limit_Reindexing)
871                 {
872                         $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate);
873                 }
874                 if (!CONST_Use_US_Tiger_Data)
875                 {
876                         $sTemplate = str_replace('-- %NOTIGERDATA% ', '', $sTemplate);
877                 }
878                 if (!CONST_Use_Aux_Location_data)
879                 {
880                         $sTemplate = str_replace('-- %NOAUXDATA% ', '', $sTemplate);
881                 }
882                 pgsqlRunScript($sTemplate);
883
884         }
885