@staticmethod
def run(args):
from ..indexer.indexer import Indexer
+ from ..tokenizer import factory as tokenizer_factory
- indexer = Indexer(args.config.get_libpq_dsn(),
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+
+ indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or psutil.cpu_count() or 1)
if not args.no_boundaries:
def _update(args):
from ..tools import replication
from ..indexer.indexer import Indexer
+ from ..tokenizer import factory as tokenizer_factory
params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
params.update(base_url=args.config.REPLICATION_URL,
raise UsageError("Bad argument '--no-index'.")
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+
while True:
with connect(args.config.get_libpq_dsn()) as conn:
start = dt.datetime.now(dt.timezone.utc)
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
index_start = dt.datetime.now(dt.timezone.utc)
- indexer = Indexer(args.config.get_libpq_dsn(),
+ indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or 1)
indexer.index_boundaries(0, 30)
indexer.index_by_rank(0, 30)
with connect(args.config.get_libpq_dsn()) as conn:
SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
LOG.warning('Indexing places')
- indexer = Indexer(args.config.get_libpq_dsn(),
+ indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or psutil.cpu_count() or 1)
indexer.index_full(analyse=not args.index_noanalyse)
""" Main indexing routine.
"""
- def __init__(self, dsn, num_threads):
+ def __init__(self, dsn, tokenizer, num_threads):
self.dsn = dsn
+ self.tokenizer = tokenizer
self.num_threads = num_threads
When importing
Then search_name contains
| object | nameaddress_vector |
- | N1 | Rose, Street, Walltown |
+ | N1 | #Rose Street, Walltown |
When searching for "23 Rose Street, Walltown"
Then results contain
| osm_type | osm_id | name |
When importing
Then search_name contains
| object | name_vector | nameaddress_vector |
- | N1 | #Green Moss | Rose, Street, Walltown |
+ | N1 | #Green Moss | #Rose Street, Walltown |
When searching for "Green Moss, Rose Street, Walltown"
Then results contain
| osm_type | osm_id | name |
When importing
Then search_name contains
| object | name_vector | nameaddress_vector |
- | N1 | foo | the road |
+ | N1 | foo | #the road |
Scenario: Some addr: tags are added to address
Given the scene roads-with-pois
# Call directly as the refresh function does not include postcodes.
indexer.LOG.setLevel(logging.ERROR)
- indexer.Indexer(context.nominatim.get_libpq_dsn(), 1).index_full(analyse=False)
+ indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False)
check_database_integrity(context)
if exclude:
assert not present, "Found term for {}/{}: {}".format(row['object'], name, wid[1])
else:
- assert present, "Missing term for {}/{}: {}".fromat(row['object'], name, wid[1])
+ assert present, "Missing term for {}/{}: {}".format(row['object'], name, wid[1])
elif name != 'object':
assert db_row.contains(name, value), db_row.assert_msg(name, value)
+import importlib
import itertools
import sys
from pathlib import Path
from nominatim.config import Configuration
from nominatim.db import connection
from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.db import properties
+
+import dummy_tokenizer
class _TestingCursor(psycopg2.extras.DictCursor):
""" Extension to the DictCursor class that provides execution
sql=tmp_path, data=SRC_DIR / 'data')
return SQLPreprocessor(temp_db_conn, cfg)
+
+
+@pytest.fixture
+def tokenizer_mock(monkeypatch, property_table, temp_db_conn):
+ """ Sets up the configuration so that the test dummy tokenizer will be
+ loaded.
+ """
+ monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
+
+ def _import_dummy(module, *args, **kwargs):
+ return dummy_tokenizer
+
+ monkeypatch.setattr(importlib, "import_module", _import_dummy)
+ properties.set_property(temp_db_conn, 'tokenizer', 'dummy')
return get_mock
+@pytest.fixture
+def tokenizer_mock(monkeypatch):
+ class DummyTokenizer:
+ def __init__(self, *args, **kwargs):
+ self.update_sql_functions_called = False
+
+ def update_sql_functions(self, *args):
+ self.update_sql_functions_called = True
+
+ tok = DummyTokenizer()
+ monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
+ lambda *args: tok)
+
+ return tok
+
+
def test_cli_help(capsys):
""" Running nominatim tool without arguments prints help.
"""
(['--boundaries-only'], 1, 0),
(['--no-boundaries'], 0, 1),
(['--boundaries-only', '--no-boundaries'], 0, 0)])
-def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks):
+def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock,
+ params, do_bnds, do_ranks):
temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries')
rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank')
assert func_mock.called == 1
-def test_refresh_create_functions(mock_func_factory, monkeypatch, temp_db):
- class DummyTokenizer:
- def update_sql_functions(self, *args):
- self.called = True
-
+def test_refresh_create_functions(mock_func_factory, temp_db, tokenizer_mock):
func_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
- tok = DummyTokenizer()
- monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
- lambda *args: tok)
-
assert 0 == call_nominatim('refresh', '--functions')
assert func_mock.called == 1
- assert hasattr(tok, 'called')
+ assert tokenizer_mock.update_sql_functions_called
def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
cli_args=['replication'] + list(args))
@pytest.fixture
-def index_mock(monkeypatch):
+def index_mock(monkeypatch, tokenizer_mock):
mock = MockParamCapture()
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', mock)
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', mock)
@pytest.fixture
-def update_mock(mock_func_factory, init_status):
+def update_mock(mock_func_factory, init_status, tokenizer_mock):
return mock_func_factory(nominatim.tools.replication, 'update')
@pytest.mark.parametrize("params,func", [
import pytest
from nominatim.indexer import indexer
+from nominatim.tokenizer import factory
class IndexerTestDB:
yield IndexerTestDB(temp_db_conn)
+@pytest.fixture
+def test_tokenizer(tokenizer_mock, def_config, tmp_path):
+ def_config.project_dir = tmp_path
+ return factory.create_tokenizer(def_config)
+
+
@pytest.mark.parametrize("threads", [1, 15])
-def test_index_all_by_rank(test_db, threads):
+def test_index_all_by_rank(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
assert 31 == test_db.placex_unindexed()
assert 1 == test_db.osmline_unindexed()
- idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_by_rank(0, 30)
assert 0 == test_db.placex_unindexed()
@pytest.mark.parametrize("threads", [1, 15])
-def test_index_partial_without_30(test_db, threads):
+def test_index_partial_without_30(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
assert 31 == test_db.placex_unindexed()
assert 1 == test_db.osmline_unindexed()
- idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest',
+ test_tokenizer, threads)
idx.index_by_rank(4, 15)
assert 19 == test_db.placex_unindexed()
@pytest.mark.parametrize("threads", [1, 15])
-def test_index_partial_with_30(test_db, threads):
+def test_index_partial_with_30(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
assert 31 == test_db.placex_unindexed()
assert 1 == test_db.osmline_unindexed()
- idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_by_rank(28, 30)
assert 27 == test_db.placex_unindexed()
WHERE indexed_status = 0 AND rank_address between 1 and 27""")
@pytest.mark.parametrize("threads", [1, 15])
-def test_index_boundaries(test_db, threads):
+def test_index_boundaries(test_db, threads, test_tokenizer):
for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank)
for rank in range(31):
assert 37 == test_db.placex_unindexed()
assert 1 == test_db.osmline_unindexed()
- idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_boundaries(0, 30)
assert 31 == test_db.placex_unindexed()
@pytest.mark.parametrize("threads", [1, 15])
-def test_index_postcodes(test_db, threads):
+def test_index_postcodes(test_db, threads, test_tokenizer):
for postcode in range(1000):
test_db.add_postcode('de', postcode)
for postcode in range(32000, 33000):
test_db.add_postcode('us', postcode)
- idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_postcodes()
assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
@pytest.mark.parametrize("analyse", [True, False])
-def test_index_full(test_db, analyse):
+def test_index_full(test_db, analyse, test_tokenizer):
for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank)
for rank in range(31):
for postcode in range(1000):
test_db.add_postcode('de', postcode)
- idx = indexer.Indexer('dbname=test_nominatim_python_unittest', 4)
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
idx.index_full(analyse=analyse)
assert 0 == test_db.placex_unindexed()
@pytest.mark.parametrize("threads", [1, 15])
-def test_index_reopen_connection(test_db, threads, monkeypatch):
+def test_index_reopen_connection(test_db, threads, monkeypatch, test_tokenizer):
monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
for _ in range(1000):
test_db.add_place(rank_address=30, rank_search=30)
- idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
+ idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
idx.index_by_rank(28, 30)
assert 0 == test_db.placex_unindexed()
from nominatim.db import properties
from nominatim.tokenizer import factory
from nominatim.errors import UsageError
-import dummy_tokenizer
+from dummy_tokenizer import DummyTokenizer
@pytest.fixture
def test_config(def_config, tmp_path):
return def_config
-@pytest.fixture
-def tokenizer_import(monkeypatch):
- monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
-
- def _import_dummy(module, *args, **kwargs):
- return dummy_tokenizer
-
- monkeypatch.setattr(importlib, "import_module", _import_dummy)
-
-
def test_setup_dummy_tokenizer(temp_db_conn, test_config,
- tokenizer_import, property_table):
+ tokenizer_mock, property_table):
tokenizer = factory.create_tokenizer(test_config)
- assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
+ assert isinstance(tokenizer, DummyTokenizer)
assert tokenizer.init_state == "new"
assert (test_config.project_dir / 'tokenizer').is_dir()
assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
-def test_setup_tokenizer_dir_exists(test_config, tokenizer_import, property_table):
+def test_setup_tokenizer_dir_exists(test_config, tokenizer_mock, property_table):
(test_config.project_dir / 'tokenizer').mkdir()
tokenizer = factory.create_tokenizer(test_config)
- assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
+ assert isinstance(tokenizer, DummyTokenizer)
assert tokenizer.init_state == "new"
-def test_setup_tokenizer_dir_failure(test_config, tokenizer_import, property_table):
+def test_setup_tokenizer_dir_failure(test_config, tokenizer_mock, property_table):
(test_config.project_dir / 'tokenizer').write_text("foo")
with pytest.raises(UsageError):
factory.create_tokenizer(test_config)
def test_load_tokenizer(temp_db_conn, test_config,
- tokenizer_import, property_table):
+ tokenizer_mock, property_table):
factory.create_tokenizer(test_config)
tokenizer = factory.get_tokenizer_for_db(test_config)
- assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
+ assert isinstance(tokenizer, DummyTokenizer)
assert tokenizer.init_state == "loaded"
-def test_load_no_tokenizer_dir(test_config, tokenizer_import, property_table):
+def test_load_no_tokenizer_dir(test_config, tokenizer_mock, property_table):
factory.create_tokenizer(test_config)
test_config.project_dir = test_config.project_dir / 'foo'
factory.get_tokenizer_for_db(test_config)
-def test_load_missing_propoerty(temp_db_cursor, test_config, tokenizer_import, property_table):
+def test_load_missing_propoerty(temp_db_cursor, test_config, tokenizer_mock, property_table):
factory.create_tokenizer(test_config)
temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties")