if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) {
$bDidSomething = true;
- $oSetup->importWikipediaArticles();
+ (clone($oNominatimCmd))->addParams('refresh', '--wiki-data')->run();
}
if ($aCMDResult['load-data'] || $aCMDResult['all']) {
if ($aCMDResult['drop']) {
$bDidSomething = true;
- $oSetup->drop($aCMDResult);
+ (clone($oNominatimCmd))->addParams('freeze')->run(true);
}
if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) {
if ($aCMDResult['setup-website'] || $aCMDResult['all']) {
$bDidSomething = true;
- $oSetup->setupWebsite();
+ (clone($oNominatimCmd))->addParams('refresh', '--website')->run(true);
}
// ******************************************************
}
if ($aResult['recompute-importance']) {
- echo "Updating importance values for database.\n";
- $oDB = new Nominatim\DB();
- $oDB->connect();
-
- $sSQL = 'ALTER TABLE placex DISABLE TRIGGER ALL;';
- $sSQL .= 'UPDATE placex SET (wikipedia, importance) =';
- $sSQL .= ' (SELECT wikipedia, importance';
- $sSQL .= ' FROM compute_importance(extratags, country_code, osm_type, osm_id));';
- $sSQL .= 'UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance';
- $sSQL .= ' FROM placex d';
- $sSQL .= ' WHERE s.place_id = d.linked_place_id and d.wikipedia is not null';
- $sSQL .= ' and (s.wikipedia is null or s.importance < d.importance);';
- $sSQL .= 'ALTER TABLE placex ENABLE TRIGGER ALL;';
- $oDB->exec($sSQL);
+ (clone($oNominatimCmd))->addParams('refresh', '--importance')->run(true);
}
if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
class SetupFunctions
{
- protected $iCacheMemory;
protected $iInstances;
protected $aDSNInfo;
protected $bQuiet;
warn('resetting threads to '.$this->iInstances);
}
- if (isset($aCMDResult['osm2pgsql-cache'])) {
- $this->iCacheMemory = $aCMDResult['osm2pgsql-cache'];
- } elseif (getSetting('FLATNODE_FILE')) {
- // When flatnode files are enabled then disable cache per default.
- $this->iCacheMemory = 0;
- } else {
- // Otherwise: Assume we can steal all the cache memory in the box.
- $this->iCacheMemory = getCacheMemoryMB();
- }
-
// parse database string
$this->aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
if (!isset($this->aDSNInfo['port'])) {
if ($this->bVerbose) {
$this->oNominatimCmd->addParams('--verbose');
}
+ $this->oNominatimCmd->addParams('--threads', $this->iInstances);
}
public function createFunctions()
$this->createSqlFunctions(); // also create partition functions
}
- public function importWikipediaArticles()
- {
- $sWikiArticlePath = getSetting('WIKIPEDIA_DATA_PATH', CONST_InstallDir);
- $sWikiArticlesFile = $sWikiArticlePath.'/wikimedia-importance.sql.gz';
- if (file_exists($sWikiArticlesFile)) {
- info('Importing wikipedia articles and redirects');
- $this->dropTable('wikipedia_article');
- $this->dropTable('wikipedia_redirect');
- $this->pgsqlRunScriptFile($sWikiArticlesFile);
- } else {
- warn('wikipedia importance dump file not found - places will have default importance');
- }
- }
-
public function loadData($bDisableTokenPrecalc)
{
info('Drop old Data');
$this->pgsqlRunScript($sSQL);
}
- public function drop()
- {
- (clone($this->oNominatimCmd))->addParams('freeze')->run();
- }
-
- /**
- * Setup the directory for the API scripts.
- *
- * @return null
- */
- public function setupWebsite()
- {
- (clone($this->oNominatimCmd))->addParams('refresh', '--website')->run();
- }
-
/**
* Return the connection to the database.
*
return $this->oDB;
}
- private function removeFlatnodeFile()
- {
- $sFName = getSetting('FLATNODE_FILE');
- if ($sFName && file_exists($sFName)) {
- if ($this->bVerbose) echo 'Deleting '.$sFName."\n";
- unlink($sFName);
- }
- }
-
private function pgsqlRunScript($sScript, $bfatal = true)
{
runSQLScript(
$oCmd->addParams('--enable-debug-statements');
}
- $oCmd->run();
+ $oCmd->run(!$this->sIgnoreErrors);
}
private function pgsqlRunPartitionScript($sTemplate)
from pathlib import Path
from ..db.connection import connect
-from ..tools.exec_utils import run_legacy_script
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
args.diffs, args.enable_debug_statements)
if args.wiki_data:
- run_legacy_script('setup.php', '--import-wikipedia-articles',
- nominatim_env=args, throw_on_fail=True)
+ data_path = Path(args.config.WIKIPEDIA_DATA_PATH
+ or args.project_dir)
+ LOG.warning('Import wikipdia article importance from %s', data_path)
+ if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
+ data_path) > 0:
+ LOG.fatal('FATAL: Wikipedia importance dump file not found')
+ return 1
+
# Attention: importance MUST come after wiki data import.
if args.importance:
- run_legacy_script('update.php', '--recompute-importance',
- nominatim_env=args, throw_on_fail=True)
+ LOG.warning('Update importance values for database')
+ with connect(args.config.get_libpq_dsn()) as conn:
+ refresh.recompute_importance(conn)
+
if args.website:
webdir = args.project_dir / 'website'
LOG.warning('Setting up website directory at %s', webdir)
return len(chunk)
-def execute_file(dsn, fname, ignore_errors=False):
+def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None):
""" Read an SQL file and run its contents against the given database
- using psql.
+ using psql. Use `pre_code` and `post_code` to run extra commands
+ before or after executing the file. The commands are run within the
+ same session, so they may be used to wrap the file execution in a
+ transaction.
"""
cmd = ['psql']
if not ignore_errors:
if not LOG.isEnabledFor(logging.INFO):
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
+ if pre_code:
+ proc.stdin.write((pre_code + ';').encode('utf-8'))
+
if fname.suffix == '.gz':
with gzip.open(str(fname), 'rb') as fdesc:
remain = _pipe_to_proc(proc, fdesc)
with fname.open('rb') as fdesc:
remain = _pipe_to_proc(proc, fdesc)
+ if remain == 0 and post_code:
+ proc.stdin.write((';' + post_code).encode('utf-8'))
+
proc.stdin.close()
ret = proc.wait()
)
+def import_wikipedia_articles(dsn, data_path, ignore_errors=False):
+ """ Replaces the wikipedia importance tables with new data.
+ The import is run in a single transaction so that the new data
+ is replace seemlessly.
+
+ Returns 0 if all was well and 1 if the importance file could not
+ be found. Throws an exception if there was an error reading the file.
+ """
+ datafile = data_path / 'wikimedia-importance.sql.gz'
+
+ if not datafile.exists():
+ return 1
+
+ pre_code = """BEGIN;
+ DROP TABLE IF EXISTS "wikipedia_article";
+ DROP TABLE IF EXISTS "wikipedia_redirect"
+ """
+ post_code = "COMMIT"
+ execute_file(dsn, datafile, ignore_errors=ignore_errors,
+ pre_code=pre_code, post_code=post_code)
+
+ return 0
+
+
+def recompute_importance(conn):
+ """ Recompute wikipedia links and importance for all entries in placex.
+ This is a long-running operations that must not be executed in
+ parallel with updates.
+ """
+ with conn.cursor() as cur:
+ cur.execute('ALTER TABLE placex DISABLE TRIGGER ALL')
+ cur.execute("""
+ UPDATE placex SET (wikipedia, importance) =
+ (SELECT wikipedia, importance
+ FROM compute_importance(extratags, country_code, osm_type, osm_id))
+ """)
+ cur.execute("""
+ UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
+ FROM placex d
+ WHERE s.place_id = d.linked_place_id and d.wikipedia is not null
+ and (s.wikipedia is null or s.importance < d.importance);
+ """)
+
+ cur.execute('ALTER TABLE placex ENABLE TRIGGER ALL')
+ conn.commit()
+
+
def setup_website(basedir, phplib_dir, config):
""" Create the website script stubs.
"""
conn.close()
+
+@pytest.fixture
+def dsn(temp_db):
+ return 'dbname=' + temp_db
+
+
@pytest.fixture
def temp_db_with_extensions(temp_db):
conn = psycopg2.connect(database=temp_db)
conn.close()
+@pytest.fixture
+def table_factory(temp_db_cursor):
+ def mk_table(name, definition='id INT'):
+ temp_db_cursor.execute('CREATE TABLE {} ({})'.format(name, definition))
+
+ return mk_table
+
+
@pytest.fixture
def def_config():
return Configuration(None, SRC_DIR.resolve() / 'settings')
assert rank_mock.called == do_ranks
-@pytest.mark.parametrize("command,params", [
- ('wiki-data', ('setup.php', '--import-wikipedia-articles')),
- ('importance', ('update.php', '--recompute-importance')),
- ])
-def test_refresh_legacy_command(mock_func_factory, temp_db, command, params):
- mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
-
- assert 0 == call_nominatim('refresh', '--' + command)
-
- assert mock_run_legacy.called == 1
- assert len(mock_run_legacy.last_args) >= len(params)
- assert mock_run_legacy.last_args[:len(params)] == params
-
@pytest.mark.parametrize("command,func", [
('postcodes', 'update_postcodes'),
('word-counts', 'recompute_word_counts'),
('address-levels', 'load_address_levels_from_file'),
('functions', 'create_functions'),
+ ('wiki-data', 'import_wikipedia_articles'),
+ ('importance', 'recompute_importance'),
('website', 'setup_website'),
])
def test_refresh_command(mock_func_factory, temp_db, command, func):
assert func_mock.called == 1
-def test_refresh_importance_computed_after_wiki_import(mock_func_factory, temp_db):
- mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
+def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
+ calls = []
+ monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles',
+ lambda *args, **kwargs: calls.append('import') or 0)
+ monkeypatch.setattr(nominatim.tools.refresh, 'recompute_importance',
+ lambda *args, **kwargs: calls.append('update'))
assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
- assert mock_run_legacy.called == 2
- assert mock_run_legacy.last_args == ('update.php', '--recompute-importance')
+ assert calls == ['import', 'update']
def test_serve_command(mock_func_factory):
yield conn
-def test_connection_table_exists(db, temp_db_cursor):
+def test_connection_table_exists(db, table_factory):
assert db.table_exists('foobar') == False
- temp_db_cursor.execute('CREATE TABLE foobar (id INT)')
+ table_factory('foobar')
assert db.table_exists('foobar') == True
assert db.index_exists('some_index', table='bar') == False
-def test_drop_table_existing(db, temp_db_cursor):
- temp_db_cursor.execute('CREATE TABLE dummy (id INT)')
-
+def test_drop_table_existing(db, table_factory):
+ table_factory('dummy')
assert db.table_exists('dummy')
+
db.drop_table('dummy')
assert not db.table_exists('dummy')
assert ver[0] >= 2
-def test_cursor_scalar(db, temp_db_cursor):
- temp_db_cursor.execute('CREATE TABLE dummy (id INT)')
+def test_cursor_scalar(db, table_factory):
+ table_factory('dummy')
with db.cursor() as cur:
assert cur.scalar('SELECT count(*) FROM dummy') == 0
import nominatim.db.utils as db_utils
from nominatim.errors import UsageError
-@pytest.fixture
-def dsn(temp_db):
- return 'dbname=' + temp_db
-
def test_execute_file_success(dsn, temp_db_cursor, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
tmpfile.write_text('CREATE STABLE test (id INT)')
db_utils.execute_file(dsn, tmpfile, ignore_errors=True)
+
+
+def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
+ tmpfile = tmp_path / 'test.sql'
+ tmpfile.write_text('INSERT INTO test VALUES(4)')
+
+ db_utils.execute_file(dsn, tmpfile, pre_code='CREATE TABLE test (id INT)')
+
+ temp_db_cursor.execute('SELECT * FROM test')
+
+ assert temp_db_cursor.rowcount == 1
+ assert temp_db_cursor.fetchone()[0] == 4
+
+
+def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor):
+ tmpfile = tmp_path / 'test.sql'
+ tmpfile.write_text('CREATE TABLE test (id INT)')
+
+ db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)')
+
+ temp_db_cursor.execute('SELECT * FROM test')
+
+ assert temp_db_cursor.rowcount == 1
+ assert temp_db_cursor.fetchone()[0] == 23
Tests for function for importing address ranks.
"""
import json
-import pytest
from pathlib import Path
+import pytest
+
from nominatim.tools.refresh import load_address_levels, load_address_levels_from_file
def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):