help='Import special phrases from the OSM wiki to the database.')
group.add_argument('--import-from-csv', metavar='FILE',
help='Import special phrases from a CSV file.')
+ group.add_argument('--no-replace', action='store_true',
+ help='Keep the old phrases and only add the new ones.')
@staticmethod
def run(args):
from ..tokenizer import factory as tokenizer_factory
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+ should_replace = not args.no_replace
with connect(args.config.get_libpq_dsn()) as db_connection:
SPImporter(
args.config, args.phplib_dir, db_connection, loader
- ).import_phrases(tokenizer)
+ ).import_phrases(tokenizer, should_replace)
# WHERE word_id is null and type = 'postcode'""")
- def update_special_phrases(self, phrases):
+ def update_special_phrases(self, phrases, should_replace):
""" Replace the search index for special phrases with the new phrases.
"""
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
columns=['word', 'word_token', 'class', 'type',
'operator', 'search_name_count'])
- if to_delete:
+ if to_delete and should_replace:
psycopg2.extras.execute_values(
cur,
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
FROM location_postcode) x""")
- def update_special_phrases(self, phrases):
+ def update_special_phrases(self, phrases, should_replace):
""" Replace the search index for special phrases with the new phrases.
"""
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
FROM (VALUES %s) as v(name, class, type, op))""",
to_add)
- if to_delete:
+ if to_delete and should_replace:
psycopg2.extras.execute_values(
cur,
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
class SPImporter():
# pylint: disable-msg=too-many-instance-attributes
"""
- Class handling the process of special phrases importations into the database.
+ Class handling the process of special phrases importation into the database.
Take a sp loader which load the phrases from an external source.
"""
#special phrases class/type on the wiki.
self.table_phrases_to_delete = set()
- def import_phrases(self, tokenizer):
+ def import_phrases(self, tokenizer, should_replace):
"""
- Iterate through all specified languages and
- extract corresponding special phrases from the wiki.
+ Iterate through all SpecialPhrases extracted from the
+ loader and import them into the database.
+
+ If should_replace is set to True only the loaded phrases
+ will be kept into the database. All other phrases already
+ in the database will be removed.
"""
LOG.warning('Special phrases importation starting')
self._fetch_existing_place_classtype_tables()
class_type_pairs.update(result)
self._create_place_classtype_table_and_indexes(class_type_pairs)
- self._remove_non_existent_tables_from_db()
+ if should_replace:
+ self._remove_non_existent_tables_from_db()
self.db_connection.commit()
with tokenizer.name_analyzer() as analyzer:
- analyzer.update_special_phrases(self.word_phrases)
+ analyzer.update_special_phrases(self.word_phrases, should_replace)
LOG.warning('Import done.')
self.statistics_handler.notify_import_done()
def add_postcodes_from_db(self):
pass
- def update_special_phrases(self, phrases):
+ def update_special_phrases(self, phrases, should_replace):
self.analyser_cache['special_phrases'] = phrases
def add_country_names(self, code, names):
assert bnd_mock.called == do_bnds
assert rank_mock.called == do_ranks
-def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock):
+@pytest.mark.parametrize("no_replace", [(True), (False)])
+def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
- call_nominatim('special-phrases', '--import-from-wiki')
+ if no_replace:
+ call_nominatim('special-phrases', '--import-from-wiki', '--no-replace')
+ else:
+ call_nominatim('special-phrases', '--import-from-wiki')
assert func.called == 1
-def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock):
+@pytest.mark.parametrize("no_replace", [(True), (False)])
+def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
- testdata = Path('__file__') / '..' / '..' / 'testdb'
+ testdata = SRC_DIR / 'test' / 'testdb'
csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
- call_nominatim('special-phrases', '--import-from-csv', csv_path)
+
+ if no_replace:
+ call_nominatim('special-phrases', '--import-from-csv', csv_path, '--no-replace')
+ else:
+ call_nominatim('special-phrases', '--import-from-csv', csv_path)
assert func.called == 1
("König bei", "amenity", "royal", "near"),
("Könige", "amenity", "royal", "-"),
("strasse", "highway", "primary", "in")
- ])
+ ], True)
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
FROM word WHERE class != 'place'""") \
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
- analyzer.update_special_phrases([])
+ analyzer.update_special_phrases([], True)
assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,
+ make_standard_name):
+ temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+ VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
+ (' bar', 'bar', 'highway', 'road', null)""")
+
+ assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+ analyzer.update_special_phrases([], False)
+
+ assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+
def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
make_standard_name):
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
('prison', 'amenity', 'prison', 'in'),
('bar', 'highway', 'road', '-'),
('garden', 'leisure', 'garden', 'near')
- ])
+ ], True)
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
FROM word WHERE class != 'place'""") \
("König bei", "amenity", "royal", "near"),
("Könige", "amenity", "royal", "-"),
("street", "highway", "primary", "in")
- ])
+ ], True)
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
FROM word WHERE class != 'place'""") \
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
with analyzer() as a:
- a.update_special_phrases([])
+ a.update_special_phrases([], True)
assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,):
+ temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+ VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
+ (' BAR', 'bar', 'highway', 'road', null)""")
+
+ assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+ with analyzer() as a:
+ a.update_special_phrases([], False)
+
+ assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+
def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
('prison', 'amenity', 'prison', 'in'),
('bar', 'highway', 'road', '-'),
('garden', 'leisure', 'garden', 'near')
- ])
+ ], True)
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
FROM word WHERE class != 'place'""") \
tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
)
+@pytest.mark.parametrize("should_replace", [(True), (False)])
def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
- placex_table, tokenizer_mock):
+ placex_table, tokenizer_mock, should_replace):
"""
Check that the main import_phrases() method is well executed.
It should create the place_classtype table, the place_id and centroid indexes,
CREATE TABLE place_classtype_wrongclass_wrongtype();""")
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
- mock_get_wiki_content)
+ mock_get_wiki_content)
tokenizer = tokenizer_mock()
- sp_importer.import_phrases(tokenizer)
+ sp_importer.import_phrases(tokenizer, should_replace)
assert len(tokenizer.analyser_cache['special_phrases']) == 18
assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter')
- assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
+ if should_replace:
+ assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
#Format (query, should_return_something_bool) use to easily execute all asserts
queries_tests = set()
WHERE table_schema='public'
AND table_name = 'place_classtype_wrongclass_wrongtype';
"""
- queries_tests.add((query_wrong_table, False))
+ if should_replace:
+ queries_tests.add((query_wrong_table, False))
with temp_db_conn.cursor() as temp_db_cursor:
for query in queries_tests:
else:
assert not temp_db_cursor.fetchone()
-def mock_get_wiki_content(lang):
+def mock_get_wiki_content(self, lang):
"""
Mock the _get_wiki_content() method to return
static xml test file content.
phrases = sp_csv_loader.parse_csv()
assert check_phrases_content(phrases)
-
def test_next(sp_csv_loader):
"""
Test objects returned from the next() method.
mock_get_wiki_content)
return loader
-def mock_get_wiki_content(lang):
+def mock_get_wiki_content(self, lang):
"""
Mock the _get_wiki_content() method to return
static xml test file content.