]> git.openstreetmap.org Git - nominatim.git/blob - lib-php/setup/SetupClass.php
Merge pull request #2192 from lonvia/database-versioning
[nominatim.git] / lib-php / setup / SetupClass.php
1 <?php
2
3 namespace Nominatim\Setup;
4
5 require_once(CONST_LibDir.'/Shell.php');
6
7 class SetupFunctions
8 {
9     protected $iInstances;
10     protected $aDSNInfo;
11     protected $bQuiet;
12     protected $bVerbose;
13     protected $sIgnoreErrors;
14     protected $bEnableDiffUpdates;
15     protected $bEnableDebugStatements;
16     protected $bDrop;
17     protected $oDB = null;
18     protected $oNominatimCmd;
19
20     public function __construct(array $aCMDResult)
21     {
22         // by default, use all but one processor, but never more than 15.
23         $this->iInstances = isset($aCMDResult['threads'])
24             ? $aCMDResult['threads']
25             : (min(16, getProcessorCount()) - 1);
26
27         if ($this->iInstances < 1) {
28             $this->iInstances = 1;
29             warn('resetting threads to '.$this->iInstances);
30         }
31
32         // parse database string
33         $this->aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
34         if (!isset($this->aDSNInfo['port'])) {
35             $this->aDSNInfo['port'] = 5432;
36         }
37
38         // setting member variables based on command line options stored in $aCMDResult
39         $this->bQuiet = isset($aCMDResult['quiet']) && $aCMDResult['quiet'];
40         $this->bVerbose = $aCMDResult['verbose'];
41
42         //setting default values which are not set by the update.php array
43         if (isset($aCMDResult['ignore-errors'])) {
44             $this->sIgnoreErrors = $aCMDResult['ignore-errors'];
45         } else {
46             $this->sIgnoreErrors = false;
47         }
48         if (isset($aCMDResult['enable-debug-statements'])) {
49             $this->bEnableDebugStatements = $aCMDResult['enable-debug-statements'];
50         } else {
51             $this->bEnableDebugStatements = false;
52         }
53         if (isset($aCMDResult['enable-diff-updates'])) {
54             $this->bEnableDiffUpdates = $aCMDResult['enable-diff-updates'];
55         } else {
56             $this->bEnableDiffUpdates = false;
57         }
58
59         $this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop'];
60
61         $this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
62         if ($this->bQuiet) {
63             $this->oNominatimCmd->addParams('--quiet');
64         }
65         if ($this->bVerbose) {
66             $this->oNominatimCmd->addParams('--verbose');
67         }
68     }
69
70     public function createFunctions()
71     {
72         info('Create Functions');
73
74         // Try accessing the C module, so we know early if something is wrong
75         $this->checkModulePresence(); // raises exception on failure
76
77         $this->createSqlFunctions();
78     }
79
80     public function createTables($bReverseOnly = false)
81     {
82         info('Create Tables');
83
84         $sTemplate = file_get_contents(CONST_SqlDir.'/tables.sql');
85         $sTemplate = $this->replaceSqlPatterns($sTemplate);
86
87         $this->pgsqlRunScript($sTemplate, false);
88
89         if ($bReverseOnly) {
90             $this->dropTable('search_name');
91         }
92
93         (clone($this->oNominatimCmd))->addParams('refresh', '--address-levels')->run();
94     }
95
96     public function createTableTriggers()
97     {
98         info('Create Tables');
99
100         $sTemplate = file_get_contents(CONST_SqlDir.'/table-triggers.sql');
101         $sTemplate = $this->replaceSqlPatterns($sTemplate);
102
103         $this->pgsqlRunScript($sTemplate, false);
104     }
105
106     public function createPartitionTables()
107     {
108         info('Create Partition Tables');
109
110         $sTemplate = file_get_contents(CONST_SqlDir.'/partition-tables.src.sql');
111         $sTemplate = $this->replaceSqlPatterns($sTemplate);
112
113         $this->pgsqlRunPartitionScript($sTemplate);
114     }
115
116     public function importTigerData($sTigerPath)
117     {
118         info('Import Tiger data');
119
120         $aFilenames = glob($sTigerPath.'/*.sql');
121         info('Found '.count($aFilenames).' SQL files in path '.$sTigerPath);
122         if (empty($aFilenames)) {
123             warn('Tiger data import selected but no files found in path '.$sTigerPath);
124             return;
125         }
126         $sTemplate = file_get_contents(CONST_SqlDir.'/tiger_import_start.sql');
127         $sTemplate = $this->replaceSqlPatterns($sTemplate);
128
129         $this->pgsqlRunScript($sTemplate, false);
130
131         $aDBInstances = array();
132         for ($i = 0; $i < $this->iInstances; $i++) {
133             // https://secure.php.net/manual/en/function.pg-connect.php
134             $DSN = getSetting('DATABASE_DSN');
135             $DSN = preg_replace('/^pgsql:/', '', $DSN);
136             $DSN = preg_replace('/;/', ' ', $DSN);
137             $aDBInstances[$i] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW | PGSQL_CONNECT_ASYNC);
138             pg_ping($aDBInstances[$i]);
139         }
140
141         foreach ($aFilenames as $sFile) {
142             echo $sFile.': ';
143             $hFile = fopen($sFile, 'r');
144             $sSQL = fgets($hFile, 100000);
145             $iLines = 0;
146             while (true) {
147                 for ($i = 0; $i < $this->iInstances; $i++) {
148                     if (!pg_connection_busy($aDBInstances[$i])) {
149                         while (pg_get_result($aDBInstances[$i]));
150                         $sSQL = fgets($hFile, 100000);
151                         if (!$sSQL) break 2;
152                         if (!pg_send_query($aDBInstances[$i], $sSQL)) fail(pg_last_error($aDBInstances[$i]));
153                         $iLines++;
154                         if ($iLines == 1000) {
155                             echo '.';
156                             $iLines = 0;
157                         }
158                     }
159                 }
160                 usleep(10);
161             }
162             fclose($hFile);
163
164             $bAnyBusy = true;
165             while ($bAnyBusy) {
166                 $bAnyBusy = false;
167                 for ($i = 0; $i < $this->iInstances; $i++) {
168                     if (pg_connection_busy($aDBInstances[$i])) $bAnyBusy = true;
169                 }
170                 usleep(10);
171             }
172             echo "\n";
173         }
174
175         for ($i = 0; $i < $this->iInstances; $i++) {
176             pg_close($aDBInstances[$i]);
177         }
178
179         info('Creating indexes on Tiger data');
180         $sTemplate = file_get_contents(CONST_SqlDir.'/tiger_import_finish.sql');
181         $sTemplate = $this->replaceSqlPatterns($sTemplate);
182
183         $this->pgsqlRunScript($sTemplate, false);
184     }
185
186     public function calculatePostcodes($bCMDResultAll)
187     {
188         info('Calculate Postcodes');
189         $this->pgsqlRunScriptFile(CONST_SqlDir.'/postcode_tables.sql');
190
191         $sPostcodeFilename = CONST_InstallDir.'/gb_postcode_data.sql.gz';
192         if (file_exists($sPostcodeFilename)) {
193             $this->pgsqlRunScriptFile($sPostcodeFilename);
194         } else {
195             warn('optional external GB postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
196         }
197
198         $sPostcodeFilename = CONST_InstallDir.'/us_postcode_data.sql.gz';
199         if (file_exists($sPostcodeFilename)) {
200             $this->pgsqlRunScriptFile($sPostcodeFilename);
201         } else {
202             warn('optional external US postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
203         }
204
205
206         $this->db()->exec('TRUNCATE location_postcode');
207
208         $sSQL  = 'INSERT INTO location_postcode';
209         $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
210         $sSQL .= "SELECT nextval('seq_place'), 1, country_code,";
211         $sSQL .= "       upper(trim (both ' ' from address->'postcode')) as pc,";
212         $sSQL .= '       ST_Centroid(ST_Collect(ST_Centroid(geometry)))';
213         $sSQL .= '  FROM placex';
214         $sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'";
215         $sSQL .= '       AND geometry IS NOT null';
216         $sSQL .= ' GROUP BY country_code, pc';
217         $this->db()->exec($sSQL);
218
219         // only add postcodes that are not yet available in OSM
220         $sSQL  = 'INSERT INTO location_postcode';
221         $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
222         $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
223         $sSQL .= '       ST_SetSRID(ST_Point(x,y),4326)';
224         $sSQL .= '  FROM us_postcode WHERE postcode NOT IN';
225         $sSQL .= '        (SELECT postcode FROM location_postcode';
226         $sSQL .= "          WHERE country_code = 'us')";
227         $this->db()->exec($sSQL);
228
229         // add missing postcodes for GB (if available)
230         $sSQL  = 'INSERT INTO location_postcode';
231         $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
232         $sSQL .= "SELECT nextval('seq_place'), 1, 'gb', postcode, geometry";
233         $sSQL .= '  FROM gb_postcode WHERE postcode NOT IN';
234         $sSQL .= '           (SELECT postcode FROM location_postcode';
235         $sSQL .= "             WHERE country_code = 'gb')";
236         $this->db()->exec($sSQL);
237
238         if (!$bCMDResultAll) {
239             $sSQL = "DELETE FROM word WHERE class='place' and type='postcode'";
240             $sSQL .= 'and word NOT IN (SELECT postcode FROM location_postcode)';
241             $this->db()->exec($sSQL);
242         }
243
244         $sSQL = 'SELECT count(getorcreate_postcode_id(v)) FROM ';
245         $sSQL .= '(SELECT distinct(postcode) as v FROM location_postcode) p';
246         $this->db()->exec($sSQL);
247     }
248
249     public function createSearchIndices()
250     {
251         info('Create Search indices');
252
253         $sSQL = 'SELECT relname FROM pg_class, pg_index ';
254         $sSQL .= 'WHERE pg_index.indisvalid = false AND pg_index.indexrelid = pg_class.oid';
255         $aInvalidIndices = $this->db()->getCol($sSQL);
256
257         foreach ($aInvalidIndices as $sIndexName) {
258             info("Cleaning up invalid index $sIndexName");
259             $this->db()->exec("DROP INDEX $sIndexName;");
260         }
261
262         $sTemplate = file_get_contents(CONST_SqlDir.'/indices.src.sql');
263         if (!$this->bDrop) {
264             $sTemplate .= file_get_contents(CONST_SqlDir.'/indices_updates.src.sql');
265         }
266         if (!$this->dbReverseOnly()) {
267             $sTemplate .= file_get_contents(CONST_SqlDir.'/indices_search.src.sql');
268         }
269         $sTemplate = $this->replaceSqlPatterns($sTemplate);
270
271         $this->pgsqlRunScript($sTemplate);
272     }
273
274     public function createCountryNames()
275     {
276         info('Create search index for default country names');
277
278         $this->pgsqlRunScript("select getorcreate_country(make_standard_name('uk'), 'gb')");
279         $this->pgsqlRunScript("select getorcreate_country(make_standard_name('united states'), 'us')");
280         $this->pgsqlRunScript('select count(*) from (select getorcreate_country(make_standard_name(country_code), country_code) from country_name where country_code is not null) as x');
281         $this->pgsqlRunScript("select count(*) from (select getorcreate_country(make_standard_name(name->'name'), country_code) from country_name where name ? 'name') as x");
282         $sSQL = 'select count(*) from (select getorcreate_country(make_standard_name(v),'
283             .'country_code) from (select country_code, skeys(name) as k, svals(name) as v from country_name) x where k ';
284         $sLanguages = getSetting('LANGUAGES');
285         if ($sLanguages) {
286             $sSQL .= 'in ';
287             $sDelim = '(';
288             foreach (explode(',', $sLanguages) as $sLang) {
289                 $sSQL .= $sDelim."'name:$sLang'";
290                 $sDelim = ',';
291             }
292             $sSQL .= ')';
293         } else {
294             // all include all simple name tags
295             $sSQL .= "like 'name:%'";
296         }
297         $sSQL .= ') v';
298         $this->pgsqlRunScript($sSQL);
299     }
300
301     /**
302      * Return the connection to the database.
303      *
304      * @return Database object.
305      *
306      * Creates a new connection if none exists yet. Otherwise reuses the
307      * already established connection.
308      */
309     private function db()
310     {
311         if (is_null($this->oDB)) {
312             $this->oDB = new \Nominatim\DB();
313             $this->oDB->connect();
314         }
315
316         return $this->oDB;
317     }
318
319     private function pgsqlRunScript($sScript, $bfatal = true)
320     {
321         runSQLScript(
322             $sScript,
323             $bfatal,
324             $this->bVerbose,
325             $this->sIgnoreErrors
326         );
327     }
328
329     private function createSqlFunctions()
330     {
331         $oCmd = (clone($this->oNominatimCmd))
332                 ->addParams('refresh', '--functions');
333
334         if (!$this->bEnableDiffUpdates) {
335             $oCmd->addParams('--no-diff-updates');
336         }
337
338         if ($this->bEnableDebugStatements) {
339             $oCmd->addParams('--enable-debug-statements');
340         }
341
342         $oCmd->run(!$this->sIgnoreErrors);
343     }
344
345     private function pgsqlRunPartitionScript($sTemplate)
346     {
347         $sSQL = 'select distinct partition from country_name order by partition';
348         $aPartitions = $this->db()->getCol($sSQL);
349         if ($aPartitions[0] != 0) $aPartitions[] = 0;
350
351         preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
352         foreach ($aMatches as $aMatch) {
353             $sResult = '';
354             foreach ($aPartitions as $sPartitionName) {
355                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
356             }
357             $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
358         }
359
360         $this->pgsqlRunScript($sTemplate);
361     }
362
363     private function pgsqlRunScriptFile($sFilename)
364     {
365         if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
366
367         $oCmd = (new \Nominatim\Shell('psql'))
368                 ->addParams('--port', $this->aDSNInfo['port'])
369                 ->addParams('--dbname', $this->aDSNInfo['database']);
370
371         if (!$this->bVerbose) {
372             $oCmd->addParams('--quiet');
373         }
374         if (isset($this->aDSNInfo['hostspec'])) {
375             $oCmd->addParams('--host', $this->aDSNInfo['hostspec']);
376         }
377         if (isset($this->aDSNInfo['username'])) {
378             $oCmd->addParams('--username', $this->aDSNInfo['username']);
379         }
380         if (isset($this->aDSNInfo['password'])) {
381             $oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
382         }
383         $ahGzipPipes = null;
384         if (preg_match('/\\.gz$/', $sFilename)) {
385             $aDescriptors = array(
386                              0 => array('pipe', 'r'),
387                              1 => array('pipe', 'w'),
388                              2 => array('file', '/dev/null', 'a')
389                             );
390             $oZcatCmd = new \Nominatim\Shell('zcat', $sFilename);
391
392             $hGzipProcess = proc_open($oZcatCmd->escapedCmd(), $aDescriptors, $ahGzipPipes);
393             if (!is_resource($hGzipProcess)) fail('unable to start zcat');
394             $aReadPipe = $ahGzipPipes[1];
395             fclose($ahGzipPipes[0]);
396         } else {
397             $oCmd->addParams('--file', $sFilename);
398             $aReadPipe = array('pipe', 'r');
399         }
400         $aDescriptors = array(
401                          0 => $aReadPipe,
402                          1 => array('pipe', 'w'),
403                          2 => array('file', '/dev/null', 'a')
404                         );
405         $ahPipes = null;
406
407         $hProcess = proc_open($oCmd->escapedCmd(), $aDescriptors, $ahPipes, null, $oCmd->aEnv);
408         if (!is_resource($hProcess)) fail('unable to start pgsql');
409         // TODO: error checking
410         while (!feof($ahPipes[1])) {
411             echo fread($ahPipes[1], 4096);
412         }
413         fclose($ahPipes[1]);
414         $iReturn = proc_close($hProcess);
415         if ($iReturn > 0) {
416             fail("pgsql returned with error code ($iReturn)");
417         }
418         if ($ahGzipPipes) {
419             fclose($ahGzipPipes[1]);
420             proc_close($hGzipProcess);
421         }
422     }
423
424     private function replaceSqlPatterns($sSql)
425     {
426         $sSql = str_replace('{www-user}', getSetting('DATABASE_WEBUSER'), $sSql);
427
428         $aPatterns = array(
429                       '{ts:address-data}' => getSetting('TABLESPACE_ADDRESS_DATA'),
430                       '{ts:address-index}' => getSetting('TABLESPACE_ADDRESS_INDEX'),
431                       '{ts:search-data}' => getSetting('TABLESPACE_SEARCH_DATA'),
432                       '{ts:search-index}' =>  getSetting('TABLESPACE_SEARCH_INDEX'),
433                       '{ts:aux-data}' =>  getSetting('TABLESPACE_AUX_DATA'),
434                       '{ts:aux-index}' =>  getSetting('TABLESPACE_AUX_INDEX')
435         );
436
437         foreach ($aPatterns as $sPattern => $sTablespace) {
438             if ($sTablespace) {
439                 $sSql = str_replace($sPattern, 'TABLESPACE "'.$sTablespace.'"', $sSql);
440             } else {
441                 $sSql = str_replace($sPattern, '', $sSql);
442             }
443         }
444
445         return $sSql;
446     }
447
448     /**
449      * Drop table with the given name if it exists.
450      *
451      * @param string $sName Name of table to remove.
452      *
453      * @return null
454      */
455     private function dropTable($sName)
456     {
457         if ($this->bVerbose) echo "Dropping table $sName\n";
458         $this->db()->deleteTable($sName);
459     }
460
461     /**
462      * Check if the database is in reverse-only mode.
463      *
464      * @return True if there is no search_name table and infrastructure.
465      */
466     private function dbReverseOnly()
467     {
468         return !($this->db()->tableExists('search_name'));
469     }
470
471     /**
472      * Try accessing the C module, so we know early if something is wrong.
473      *
474      * Raises Nominatim\DatabaseError on failure
475      */
476     private function checkModulePresence()
477     {
478         $sModulePath = getSetting('DATABASE_MODULE_PATH', CONST_InstallDir.'/module');
479         $sSQL = "CREATE FUNCTION nominatim_test_import_func(text) RETURNS text AS '";
480         $sSQL .= $sModulePath . "/nominatim.so', 'transliteration' LANGUAGE c IMMUTABLE STRICT";
481         $sSQL .= ';DROP FUNCTION nominatim_test_import_func(text);';
482
483         $oDB = new \Nominatim\DB();
484         $oDB->connect();
485         $oDB->exec($sSQL, null, 'Database server failed to load '.$sModulePath.'/nominatim.so module');
486     }
487 }