]> git.openstreetmap.org Git - nominatim.git/blob - lib-php/setup/SetupClass.php
move table creation to jinja-based preprocessing
[nominatim.git] / lib-php / setup / SetupClass.php
1 <?php
2
3 namespace Nominatim\Setup;
4
5 require_once(CONST_LibDir.'/Shell.php');
6
7 class SetupFunctions
8 {
9     protected $iInstances;
10     protected $aDSNInfo;
11     protected $bQuiet;
12     protected $bVerbose;
13     protected $sIgnoreErrors;
14     protected $bEnableDiffUpdates;
15     protected $bEnableDebugStatements;
16     protected $bDrop;
17     protected $oDB = null;
18     protected $oNominatimCmd;
19
20     public function __construct(array $aCMDResult)
21     {
22         // by default, use all but one processor, but never more than 15.
23         $this->iInstances = isset($aCMDResult['threads'])
24             ? $aCMDResult['threads']
25             : (min(16, getProcessorCount()) - 1);
26
27         if ($this->iInstances < 1) {
28             $this->iInstances = 1;
29             warn('resetting threads to '.$this->iInstances);
30         }
31
32         // parse database string
33         $this->aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
34         if (!isset($this->aDSNInfo['port'])) {
35             $this->aDSNInfo['port'] = 5432;
36         }
37
38         // setting member variables based on command line options stored in $aCMDResult
39         $this->bQuiet = isset($aCMDResult['quiet']) && $aCMDResult['quiet'];
40         $this->bVerbose = $aCMDResult['verbose'];
41
42         //setting default values which are not set by the update.php array
43         if (isset($aCMDResult['ignore-errors'])) {
44             $this->sIgnoreErrors = $aCMDResult['ignore-errors'];
45         } else {
46             $this->sIgnoreErrors = false;
47         }
48         if (isset($aCMDResult['enable-debug-statements'])) {
49             $this->bEnableDebugStatements = $aCMDResult['enable-debug-statements'];
50         } else {
51             $this->bEnableDebugStatements = false;
52         }
53         if (isset($aCMDResult['enable-diff-updates'])) {
54             $this->bEnableDiffUpdates = $aCMDResult['enable-diff-updates'];
55         } else {
56             $this->bEnableDiffUpdates = false;
57         }
58
59         $this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop'];
60
61         $this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
62         if ($this->bQuiet) {
63             $this->oNominatimCmd->addParams('--quiet');
64         }
65         if ($this->bVerbose) {
66             $this->oNominatimCmd->addParams('--verbose');
67         }
68     }
69
70     public function createFunctions()
71     {
72         info('Create Functions');
73
74         // Try accessing the C module, so we know early if something is wrong
75         $this->checkModulePresence(); // raises exception on failure
76
77         $this->createSqlFunctions();
78     }
79
80     public function importTigerData($sTigerPath)
81     {
82         info('Import Tiger data');
83
84         $aFilenames = glob($sTigerPath.'/*.sql');
85         info('Found '.count($aFilenames).' SQL files in path '.$sTigerPath);
86         if (empty($aFilenames)) {
87             warn('Tiger data import selected but no files found in path '.$sTigerPath);
88             return;
89         }
90         $sTemplate = file_get_contents(CONST_SqlDir.'/tiger_import_start.sql');
91         $sTemplate = $this->replaceSqlPatterns($sTemplate);
92
93         $this->pgsqlRunScript($sTemplate, false);
94
95         $aDBInstances = array();
96         for ($i = 0; $i < $this->iInstances; $i++) {
97             // https://secure.php.net/manual/en/function.pg-connect.php
98             $DSN = getSetting('DATABASE_DSN');
99             $DSN = preg_replace('/^pgsql:/', '', $DSN);
100             $DSN = preg_replace('/;/', ' ', $DSN);
101             $aDBInstances[$i] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW | PGSQL_CONNECT_ASYNC);
102             pg_ping($aDBInstances[$i]);
103         }
104
105         foreach ($aFilenames as $sFile) {
106             echo $sFile.': ';
107             $hFile = fopen($sFile, 'r');
108             $sSQL = fgets($hFile, 100000);
109             $iLines = 0;
110             while (true) {
111                 for ($i = 0; $i < $this->iInstances; $i++) {
112                     if (!pg_connection_busy($aDBInstances[$i])) {
113                         while (pg_get_result($aDBInstances[$i]));
114                         $sSQL = fgets($hFile, 100000);
115                         if (!$sSQL) break 2;
116                         if (!pg_send_query($aDBInstances[$i], $sSQL)) fail(pg_last_error($aDBInstances[$i]));
117                         $iLines++;
118                         if ($iLines == 1000) {
119                             echo '.';
120                             $iLines = 0;
121                         }
122                     }
123                 }
124                 usleep(10);
125             }
126             fclose($hFile);
127
128             $bAnyBusy = true;
129             while ($bAnyBusy) {
130                 $bAnyBusy = false;
131                 for ($i = 0; $i < $this->iInstances; $i++) {
132                     if (pg_connection_busy($aDBInstances[$i])) $bAnyBusy = true;
133                 }
134                 usleep(10);
135             }
136             echo "\n";
137         }
138
139         for ($i = 0; $i < $this->iInstances; $i++) {
140             pg_close($aDBInstances[$i]);
141         }
142
143         info('Creating indexes on Tiger data');
144         $sTemplate = file_get_contents(CONST_SqlDir.'/tiger_import_finish.sql');
145         $sTemplate = $this->replaceSqlPatterns($sTemplate);
146
147         $this->pgsqlRunScript($sTemplate, false);
148     }
149
150     public function calculatePostcodes($bCMDResultAll)
151     {
152         info('Calculate Postcodes');
153         $this->pgsqlRunScriptFile(CONST_SqlDir.'/postcode_tables.sql');
154
155         $sPostcodeFilename = CONST_InstallDir.'/gb_postcode_data.sql.gz';
156         if (file_exists($sPostcodeFilename)) {
157             $this->pgsqlRunScriptFile($sPostcodeFilename);
158         } else {
159             warn('optional external GB postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
160         }
161
162         $sPostcodeFilename = CONST_InstallDir.'/us_postcode_data.sql.gz';
163         if (file_exists($sPostcodeFilename)) {
164             $this->pgsqlRunScriptFile($sPostcodeFilename);
165         } else {
166             warn('optional external US postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
167         }
168
169
170         $this->db()->exec('TRUNCATE location_postcode');
171
172         $sSQL  = 'INSERT INTO location_postcode';
173         $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
174         $sSQL .= "SELECT nextval('seq_place'), 1, country_code,";
175         $sSQL .= "       upper(trim (both ' ' from address->'postcode')) as pc,";
176         $sSQL .= '       ST_Centroid(ST_Collect(ST_Centroid(geometry)))';
177         $sSQL .= '  FROM placex';
178         $sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'";
179         $sSQL .= '       AND geometry IS NOT null';
180         $sSQL .= ' GROUP BY country_code, pc';
181         $this->db()->exec($sSQL);
182
183         // only add postcodes that are not yet available in OSM
184         $sSQL  = 'INSERT INTO location_postcode';
185         $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
186         $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
187         $sSQL .= '       ST_SetSRID(ST_Point(x,y),4326)';
188         $sSQL .= '  FROM us_postcode WHERE postcode NOT IN';
189         $sSQL .= '        (SELECT postcode FROM location_postcode';
190         $sSQL .= "          WHERE country_code = 'us')";
191         $this->db()->exec($sSQL);
192
193         // add missing postcodes for GB (if available)
194         $sSQL  = 'INSERT INTO location_postcode';
195         $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
196         $sSQL .= "SELECT nextval('seq_place'), 1, 'gb', postcode, geometry";
197         $sSQL .= '  FROM gb_postcode WHERE postcode NOT IN';
198         $sSQL .= '           (SELECT postcode FROM location_postcode';
199         $sSQL .= "             WHERE country_code = 'gb')";
200         $this->db()->exec($sSQL);
201
202         if (!$bCMDResultAll) {
203             $sSQL = "DELETE FROM word WHERE class='place' and type='postcode'";
204             $sSQL .= 'and word NOT IN (SELECT postcode FROM location_postcode)';
205             $this->db()->exec($sSQL);
206         }
207
208         $sSQL = 'SELECT count(getorcreate_postcode_id(v)) FROM ';
209         $sSQL .= '(SELECT distinct(postcode) as v FROM location_postcode) p';
210         $this->db()->exec($sSQL);
211     }
212
213     public function createSearchIndices()
214     {
215         info('Create Search indices');
216
217         $sSQL = 'SELECT relname FROM pg_class, pg_index ';
218         $sSQL .= 'WHERE pg_index.indisvalid = false AND pg_index.indexrelid = pg_class.oid';
219         $aInvalidIndices = $this->db()->getCol($sSQL);
220
221         foreach ($aInvalidIndices as $sIndexName) {
222             info("Cleaning up invalid index $sIndexName");
223             $this->db()->exec("DROP INDEX $sIndexName;");
224         }
225
226         $sTemplate = file_get_contents(CONST_SqlDir.'/indices.src.sql');
227         if (!$this->bDrop) {
228             $sTemplate .= file_get_contents(CONST_SqlDir.'/indices_updates.src.sql');
229         }
230         if (!$this->dbReverseOnly()) {
231             $sTemplate .= file_get_contents(CONST_SqlDir.'/indices_search.src.sql');
232         }
233         $sTemplate = $this->replaceSqlPatterns($sTemplate);
234
235         $this->pgsqlRunScript($sTemplate);
236     }
237
238     public function createCountryNames()
239     {
240         info('Create search index for default country names');
241
242         $this->pgsqlRunScript("select getorcreate_country(make_standard_name('uk'), 'gb')");
243         $this->pgsqlRunScript("select getorcreate_country(make_standard_name('united states'), 'us')");
244         $this->pgsqlRunScript('select count(*) from (select getorcreate_country(make_standard_name(country_code), country_code) from country_name where country_code is not null) as x');
245         $this->pgsqlRunScript("select count(*) from (select getorcreate_country(make_standard_name(name->'name'), country_code) from country_name where name ? 'name') as x");
246         $sSQL = 'select count(*) from (select getorcreate_country(make_standard_name(v),'
247             .'country_code) from (select country_code, skeys(name) as k, svals(name) as v from country_name) x where k ';
248         $sLanguages = getSetting('LANGUAGES');
249         if ($sLanguages) {
250             $sSQL .= 'in ';
251             $sDelim = '(';
252             foreach (explode(',', $sLanguages) as $sLang) {
253                 $sSQL .= $sDelim."'name:$sLang'";
254                 $sDelim = ',';
255             }
256             $sSQL .= ')';
257         } else {
258             // all include all simple name tags
259             $sSQL .= "like 'name:%'";
260         }
261         $sSQL .= ') v';
262         $this->pgsqlRunScript($sSQL);
263     }
264
265     /**
266      * Return the connection to the database.
267      *
268      * @return Database object.
269      *
270      * Creates a new connection if none exists yet. Otherwise reuses the
271      * already established connection.
272      */
273     private function db()
274     {
275         if (is_null($this->oDB)) {
276             $this->oDB = new \Nominatim\DB();
277             $this->oDB->connect();
278         }
279
280         return $this->oDB;
281     }
282
283     private function pgsqlRunScript($sScript, $bfatal = true)
284     {
285         runSQLScript(
286             $sScript,
287             $bfatal,
288             $this->bVerbose,
289             $this->sIgnoreErrors
290         );
291     }
292
293     private function createSqlFunctions()
294     {
295         $oCmd = (clone($this->oNominatimCmd))
296                 ->addParams('refresh', '--functions');
297
298         if (!$this->bEnableDiffUpdates) {
299             $oCmd->addParams('--no-diff-updates');
300         }
301
302         if ($this->bEnableDebugStatements) {
303             $oCmd->addParams('--enable-debug-statements');
304         }
305
306         $oCmd->run(!$this->sIgnoreErrors);
307     }
308
309     private function pgsqlRunScriptFile($sFilename)
310     {
311         if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
312
313         $oCmd = (new \Nominatim\Shell('psql'))
314                 ->addParams('--port', $this->aDSNInfo['port'])
315                 ->addParams('--dbname', $this->aDSNInfo['database']);
316
317         if (!$this->bVerbose) {
318             $oCmd->addParams('--quiet');
319         }
320         if (isset($this->aDSNInfo['hostspec'])) {
321             $oCmd->addParams('--host', $this->aDSNInfo['hostspec']);
322         }
323         if (isset($this->aDSNInfo['username'])) {
324             $oCmd->addParams('--username', $this->aDSNInfo['username']);
325         }
326         if (isset($this->aDSNInfo['password'])) {
327             $oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
328         }
329         $ahGzipPipes = null;
330         if (preg_match('/\\.gz$/', $sFilename)) {
331             $aDescriptors = array(
332                              0 => array('pipe', 'r'),
333                              1 => array('pipe', 'w'),
334                              2 => array('file', '/dev/null', 'a')
335                             );
336             $oZcatCmd = new \Nominatim\Shell('zcat', $sFilename);
337
338             $hGzipProcess = proc_open($oZcatCmd->escapedCmd(), $aDescriptors, $ahGzipPipes);
339             if (!is_resource($hGzipProcess)) fail('unable to start zcat');
340             $aReadPipe = $ahGzipPipes[1];
341             fclose($ahGzipPipes[0]);
342         } else {
343             $oCmd->addParams('--file', $sFilename);
344             $aReadPipe = array('pipe', 'r');
345         }
346         $aDescriptors = array(
347                          0 => $aReadPipe,
348                          1 => array('pipe', 'w'),
349                          2 => array('file', '/dev/null', 'a')
350                         );
351         $ahPipes = null;
352
353         $hProcess = proc_open($oCmd->escapedCmd(), $aDescriptors, $ahPipes, null, $oCmd->aEnv);
354         if (!is_resource($hProcess)) fail('unable to start pgsql');
355         // TODO: error checking
356         while (!feof($ahPipes[1])) {
357             echo fread($ahPipes[1], 4096);
358         }
359         fclose($ahPipes[1]);
360         $iReturn = proc_close($hProcess);
361         if ($iReturn > 0) {
362             fail("pgsql returned with error code ($iReturn)");
363         }
364         if ($ahGzipPipes) {
365             fclose($ahGzipPipes[1]);
366             proc_close($hGzipProcess);
367         }
368     }
369
370     private function replaceSqlPatterns($sSql)
371     {
372         $sSql = str_replace('{www-user}', getSetting('DATABASE_WEBUSER'), $sSql);
373
374         $aPatterns = array(
375                       '{ts:address-data}' => getSetting('TABLESPACE_ADDRESS_DATA'),
376                       '{ts:address-index}' => getSetting('TABLESPACE_ADDRESS_INDEX'),
377                       '{ts:search-data}' => getSetting('TABLESPACE_SEARCH_DATA'),
378                       '{ts:search-index}' =>  getSetting('TABLESPACE_SEARCH_INDEX'),
379                       '{ts:aux-data}' =>  getSetting('TABLESPACE_AUX_DATA'),
380                       '{ts:aux-index}' =>  getSetting('TABLESPACE_AUX_INDEX')
381         );
382
383         foreach ($aPatterns as $sPattern => $sTablespace) {
384             if ($sTablespace) {
385                 $sSql = str_replace($sPattern, 'TABLESPACE "'.$sTablespace.'"', $sSql);
386             } else {
387                 $sSql = str_replace($sPattern, '', $sSql);
388             }
389         }
390
391         return $sSql;
392     }
393
394     /**
395      * Drop table with the given name if it exists.
396      *
397      * @param string $sName Name of table to remove.
398      *
399      * @return null
400      */
401     private function dropTable($sName)
402     {
403         if ($this->bVerbose) echo "Dropping table $sName\n";
404         $this->db()->deleteTable($sName);
405     }
406
407     /**
408      * Check if the database is in reverse-only mode.
409      *
410      * @return True if there is no search_name table and infrastructure.
411      */
412     private function dbReverseOnly()
413     {
414         return !($this->db()->tableExists('search_name'));
415     }
416
417     /**
418      * Try accessing the C module, so we know early if something is wrong.
419      *
420      * Raises Nominatim\DatabaseError on failure
421      */
422     private function checkModulePresence()
423     {
424         $sModulePath = getSetting('DATABASE_MODULE_PATH', CONST_InstallDir.'/module');
425         $sSQL = "CREATE FUNCTION nominatim_test_import_func(text) RETURNS text AS '";
426         $sSQL .= $sModulePath . "/nominatim.so', 'transliteration' LANGUAGE c IMMUTABLE STRICT";
427         $sSQL .= ';DROP FUNCTION nominatim_test_import_func(text);';
428
429         $oDB = new \Nominatim\DB();
430         $oDB->connect();
431         $oDB->exec($sSQL, null, 'Database server failed to load '.$sModulePath.'/nominatim.so module');
432     }
433 }