## Removing large deleted objects
+Command: `nominatim admin --clean-deleted <PostgreSQL Time Interval>`
+
Nominatim refuses to delete very large areas because often these deletions are
accidental and are reverted within hours. Instead the deletions are logged in
the `import_polygon_delete` table and left to the administrator to clean up.
-There is currently no command to do that. You can use the following SQL
-query to force a deletion on all objects that have been deleted more than
-a certain timespan ago (here: 1 month):
+To run this command you will need to pass a PostgreSQL time interval. For example to
+delete any objects that have been deleted more than a month ago you would run:
+`nominatim admin --clean-deleted '1 month'`
-```sql
-SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
-WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
- and age(p.indexed_date) > '1 month'::interval
-```
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-
-CREATE OR REPLACE FUNCTION flush_deleted_places()
- RETURNS INTEGER
- AS $$
-BEGIN
- -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
- INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
- SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
-
- -- delete from place table
- ALTER TABLE place DISABLE TRIGGER place_before_delete;
- DELETE FROM place USING place_to_be_deleted
- WHERE place.osm_type = place_to_be_deleted.osm_type
- and place.osm_id = place_to_be_deleted.osm_id
- and place.class = place_to_be_deleted.class
- and place.type = place_to_be_deleted.type
- and not deferred;
- ALTER TABLE place ENABLE TRIGGER place_before_delete;
-
- -- Mark for delete in the placex table
- UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
- WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
- and placex.osm_id = place_to_be_deleted.osm_id
- and placex.class = place_to_be_deleted.class
- and placex.type = place_to_be_deleted.type
- and not deferred;
- UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
- WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
- and placex.osm_id = place_to_be_deleted.osm_id
- and placex.class = place_to_be_deleted.class
- and placex.type = place_to_be_deleted.type
- and not deferred;
- UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
- WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
- and placex.osm_id = place_to_be_deleted.osm_id
- and placex.class = place_to_be_deleted.class
- and placex.type = place_to_be_deleted.type
- and not deferred;
-
- -- Mark for delete in interpolations
- UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
- WHERE place_to_be_deleted.osm_type = 'W'
- and place_to_be_deleted.class = 'place'
- and place_to_be_deleted.type = 'houses'
- and location_property_osmline.osm_id = place_to_be_deleted.osm_id
- and not deferred;
-
- -- Clear todo list.
- TRUNCATE TABLE place_to_be_deleted;
-
- RETURN NULL;
-END;
-$$ LANGUAGE plpgsql;
-
END;
$$
LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION flush_deleted_places()
+ RETURNS INTEGER
+ AS $$
+BEGIN
+ -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
+ INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
+ SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
+
+ -- delete from place table
+ ALTER TABLE place DISABLE TRIGGER place_before_delete;
+ DELETE FROM place USING place_to_be_deleted
+ WHERE place.osm_type = place_to_be_deleted.osm_type
+ and place.osm_id = place_to_be_deleted.osm_id
+ and place.class = place_to_be_deleted.class
+ and place.type = place_to_be_deleted.type
+ and not deferred;
+ ALTER TABLE place ENABLE TRIGGER place_before_delete;
+
+ -- Mark for delete in the placex table
+ UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
+ WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
+ and placex.osm_id = place_to_be_deleted.osm_id
+ and placex.class = place_to_be_deleted.class
+ and placex.type = place_to_be_deleted.type
+ and not deferred;
+ UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
+ WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
+ and placex.osm_id = place_to_be_deleted.osm_id
+ and placex.class = place_to_be_deleted.class
+ and placex.type = place_to_be_deleted.type
+ and not deferred;
+ UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
+ WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
+ and placex.osm_id = place_to_be_deleted.osm_id
+ and placex.class = place_to_be_deleted.class
+ and placex.type = place_to_be_deleted.type
+ and not deferred;
+
+ -- Mark for delete in interpolations
+ UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
+ WHERE place_to_be_deleted.osm_type = 'W'
+ and place_to_be_deleted.class = 'place'
+ and place_to_be_deleted.type = 'houses'
+ and location_property_osmline.osm_id = place_to_be_deleted.osm_id
+ and not deferred;
+
+ -- Clear todo list.
+ TRUNCATE TABLE place_to_be_deleted;
+
+ RETURN NULL;
+END;
+$$ LANGUAGE plpgsql;
return REVERSE_MAX_RANKS[max(0, min(18, zoom))]
-FEATURE_TYPE_TO_RANK: Dict[Optional[str], Any] = {
+FEATURE_TYPE_TO_RANK: Dict[Optional[str], Tuple[int, int]] = {
'country': (4, 4),
'state': (8, 8),
'city': (14, 16),
help='Print performance analysis of the indexing process')
objs.add_argument('--collect-os-info', action="store_true",
help="Generate a report about the host system information")
+ objs.add_argument('--clean-deleted', action='store', metavar='AGE',
+ help='Clean up deleted relations')
group = parser.add_argument_group('Arguments for cache warming')
group.add_argument('--search-only', action='store_const', dest='target',
const='search',
mgroup.add_argument('--place-id', type=int,
help='Analyse indexing of the given Nominatim object')
+
def run(self, args: NominatimArgs) -> int:
+ # pylint: disable=too-many-return-statements
if args.warm:
return self._warm(args)
collect_os_info.report_system_information(args.config)
return 0
+ if args.clean_deleted:
+ LOG.warning('Cleaning up deleted relations')
+ from ..tools import admin
+ admin.clean_deleted_relations(args.config, age=args.clean_deleted)
+ return 0
+
return 1
check_database: bool
migrate: bool
collect_os_info: bool
+ clean_deleted: str
analyse_indexing: bool
target: Optional[str]
osm_id: Optional[str]
pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
- self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?(.*)\\s*')
+ self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?({pc_pattern})\\s*')
self.pattern = re.compile(pc_pattern)
self.output = config.get('output', r'\g<0>')
import logging
from psycopg2.extras import Json, register_hstore
+from psycopg2 import DataError
from nominatim.config import Configuration
from nominatim.db.connection import connect, Cursor
for msg in conn.notices:
print(msg)
+
+
+def clean_deleted_relations(config: Configuration, age: str) -> None:
+ """ Clean deleted relations older than a given age
+ """
+ with connect(config.get_libpq_dsn()) as conn:
+ with conn.cursor() as cur:
+ try:
+ cur.execute("""SELECT place_force_delete(p.place_id)
+ FROM import_polygon_delete d, placex p
+ WHERE p.osm_type = d.osm_type AND p.osm_id = d.osm_id
+ AND age(p.indexed_date) > %s::interval""",
+ (age, ))
+ except DataError as exc:
+ raise UsageError('Invalid PostgreSQL time interval format') from exc
+ conn.commit()
assert mock.called == 1
+def test_admin_clean_deleted_relations(cli_call, mock_func_factory):
+ mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
+
+ assert cli_call('admin', '--clean-deleted', '1 month') == 0
+ assert mock.called == 1
+
+def test_admin_clean_deleted_relations_no_age(cli_call, mock_func_factory):
+ mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
+
+ assert cli_call('admin', '--clean-deleted') == 1
+
class TestCliAdminWithDb:
@pytest.fixture(autouse=True)
assert sanitize(country='se', postcode=postcode) == []
+@pytest.mark.parametrize("postcode", ('AD123', '123', 'AD 123', 'AD-123'))
+def test_postcode_andorra_pass(sanitize, postcode):
+ assert sanitize(country='ad', postcode=postcode) == [('postcode', 'AD123')]
+
+
+@pytest.mark.parametrize("postcode", ('AD1234', 'AD AD123', 'XX123'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_andorra_fail(sanitize, postcode):
+ assert sanitize(country='ad', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('AI-2640', '2640', 'AI 2640'))
+def test_postcode_anguilla_pass(sanitize, postcode):
+ assert sanitize(country='ai', postcode=postcode) == [('postcode', 'AI-2640')]
+
+
+@pytest.mark.parametrize("postcode", ('AI-2000', 'AI US-2640', 'AI AI-2640'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_anguilla_fail(sanitize, postcode):
+ assert sanitize(country='ai', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('BN1111', 'BN 1111', 'BN BN1111', 'BN BN 1111'))
+def test_postcode_brunei_pass(sanitize, postcode):
+ assert sanitize(country='bn', postcode=postcode) == [('postcode', 'BN1111')]
+
+
+@pytest.mark.parametrize("postcode", ('BN-1111', 'BNN1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_brunei_fail(sanitize, postcode):
+ assert sanitize(country='bn', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('IM1 1AA', 'IM11AA', 'IM IM11AA'))
+def test_postcode_isle_of_man_pass(sanitize, postcode):
+ assert sanitize(country='im', postcode=postcode) == [('postcode', 'IM1 1AA')]
+
+
+@pytest.mark.parametrize("postcode", ('IZ1 1AA', 'IM1 AA'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_isle_of_man_fail(sanitize, postcode):
+ assert sanitize(country='im', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('JE5 0LA', 'JE50LA', 'JE JE50LA', 'je JE5 0LA'))
+def test_postcode_jersey_pass(sanitize, postcode):
+ assert sanitize(country='je', postcode=postcode) == [('postcode', 'JE5 0LA')]
+
+
+@pytest.mark.parametrize("postcode", ('gb JE5 0LA', 'IM50LA', 'IM5 012'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_jersey_fail(sanitize, postcode):
+ assert sanitize(country='je', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('KY1-1234', '1-1234', 'KY 1-1234'))
+def test_postcode_cayman_islands_pass(sanitize, postcode):
+ assert sanitize(country='ky', postcode=postcode) == [('postcode', 'KY1-1234')]
+
+
+@pytest.mark.parametrize("postcode", ('KY-1234', 'KZ1-1234', 'KY1 1234', 'KY1-123', 'KY KY1-1234'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_cayman_islands_fail(sanitize, postcode):
+ assert sanitize(country='ky', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('LC11 222', '11 222', '11222', 'LC 11 222'))
+def test_postcode_saint_lucia_pass(sanitize, postcode):
+ assert sanitize(country='lc', postcode=postcode) == [('postcode', 'LC11 222')]
+
+
+@pytest.mark.parametrize("postcode", ('11 2222', 'LC LC11 222'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_saint_lucia_fail(sanitize, postcode):
+ assert sanitize(country='lc', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('LV-1111', '1111', 'LV 1111', 'LV1111',))
+def test_postcode_latvia_pass(sanitize, postcode):
+ assert sanitize(country='lv', postcode=postcode) == [('postcode', 'LV-1111')]
+
+
+@pytest.mark.parametrize("postcode", ('111', '11111', 'LV LV-1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_latvia_fail(sanitize, postcode):
+ assert sanitize(country='lv', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('MD-1111', '1111', 'MD 1111', 'MD1111'))
+def test_postcode_moldova_pass(sanitize, postcode):
+ assert sanitize(country='md', postcode=postcode) == [('postcode', 'MD-1111')]
+
+
+@pytest.mark.parametrize("postcode", ("MD MD-1111", "MD MD1111", "MD MD 1111"))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_moldova_fail(sanitize, postcode):
+ assert sanitize(country='md', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('VLT 1117', 'GDJ 1234', 'BZN 2222'))
+def test_postcode_malta_pass(sanitize, postcode):
+ assert sanitize(country='mt', postcode=postcode) == [('postcode', postcode)]
+
+
+@pytest.mark.parametrize("postcode", ('MTF 1111', 'MT MTF 1111', 'MTF1111', 'MT MTF1111'))
+def test_postcode_malta_mtarfa_pass(sanitize, postcode):
+ assert sanitize(country='mt', postcode=postcode) == [('postcode', 'MTF 1111')]
+
+
+@pytest.mark.parametrize("postcode", ('1111', 'MTMT 1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_malta_fail(sanitize, postcode):
+ assert sanitize(country='mt', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('VC1111', '1111', 'VC-1111', 'VC 1111'))
+def test_postcode_saint_vincent_pass(sanitize, postcode):
+ assert sanitize(country='vc', postcode=postcode) == [('postcode', 'VC1111')]
+
+
+@pytest.mark.parametrize("postcode", ('VC11', 'VC VC1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_saint_vincent_fail(sanitize, postcode):
+ assert sanitize(country='vc', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('VG1111', '1111', 'VG 1111', 'VG-1111'))
+def test_postcode_virgin_islands_pass(sanitize, postcode):
+ assert sanitize(country='vg', postcode=postcode) == [('postcode', 'VG1111')]
+
+
+@pytest.mark.parametrize("postcode", ('111', '11111', 'VG VG1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_virgin_islands_fail(sanitize, postcode):
+ assert sanitize(country='vg', postcode=postcode) == []
+
+
@pytest.mark.parametrize("postcode", ('AB1', '123-456-7890', '1 as 44'))
@pytest.mark.sanitizer_params(default_pattern='[A-Z0-9- ]{3,12}')
def test_postcode_default_pattern_pass(sanitize, postcode):
@pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}')
def test_postcode_default_pattern_fail(sanitize, postcode):
assert sanitize(country='an', postcode=postcode) == []
-
from nominatim.errors import UsageError
from nominatim.tools import admin
from nominatim.tokenizer import factory
+from nominatim.db.sql_preprocessor import SQLPreprocessor
@pytest.fixture(autouse=True)
def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table):
VALUES(9988, 'N', 10000)""")
admin.analyse_indexing(project_env, osm_id='N10000')
+
+
+class TestAdminCleanDeleted:
+
+ @pytest.fixture(autouse=True)
+ def setup_polygon_delete(self, project_env, table_factory, place_table, osmline_table, temp_db_cursor, temp_db_conn, def_config, src_dir):
+ """ Set up place_force_delete function and related tables
+ """
+ self.project_env = project_env
+ self.temp_db_cursor = temp_db_cursor
+ table_factory('import_polygon_delete',
+ """osm_id BIGINT,
+ osm_type CHAR(1),
+ class TEXT NOT NULL,
+ type TEXT NOT NULL""",
+ ((100, 'N', 'boundary', 'administrative'),
+ (145, 'N', 'boundary', 'administrative'),
+ (175, 'R', 'landcover', 'grass')))
+ temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_id, osm_type, class, type, indexed_date, indexed_status)
+ VALUES(1, 100, 'N', 'boundary', 'administrative', current_date - INTERVAL '1 month', 1),
+ (2, 145, 'N', 'boundary', 'administrative', current_date - INTERVAL '3 month', 1),
+ (3, 175, 'R', 'landcover', 'grass', current_date - INTERVAL '3 months', 1)""")
+ # set up tables and triggers for utils function
+ table_factory('place_to_be_deleted',
+ """osm_id BIGINT,
+ osm_type CHAR(1),
+ class TEXT NOT NULL,
+ type TEXT NOT NULL,
+ deferred BOOLEAN""")
+ table_factory('country_name', 'partition INT')
+ table_factory('import_polygon_error', """osm_id BIGINT,
+ osm_type CHAR(1),
+ class TEXT NOT NULL,
+ type TEXT NOT NULL""")
+ temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_delete()
+ RETURNS TRIGGER AS $$
+ BEGIN RETURN NULL; END;
+ $$ LANGUAGE plpgsql;""")
+ temp_db_cursor.execute("""CREATE TRIGGER place_before_delete BEFORE DELETE ON place
+ FOR EACH ROW EXECUTE PROCEDURE place_delete();""")
+ orig_sql = def_config.lib_dir.sql
+ def_config.lib_dir.sql = src_dir / 'lib-sql'
+ sqlproc = SQLPreprocessor(temp_db_conn, def_config)
+ sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
+ def_config.lib_dir.sql = orig_sql
+
+
+ def test_admin_clean_deleted_no_records(self):
+ admin.clean_deleted_relations(self.project_env, age='1 year')
+ assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
+ (145, 'N', 'boundary', 'administrative', 1),
+ (175, 'R', 'landcover', 'grass', 1)}
+ assert self.temp_db_cursor.table_rows('import_polygon_delete') == 3
+
+
+ @pytest.mark.parametrize('test_age', ['T week', '1 welk', 'P1E'])
+ def test_admin_clean_deleted_bad_age(self, test_age):
+ with pytest.raises(UsageError):
+ admin.clean_deleted_relations(self.project_env, age = test_age)
+
+
+ def test_admin_clean_deleted_partial(self):
+ admin.clean_deleted_relations(self.project_env, age = '2 months')
+ assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
+ (145, 'N', 'boundary', 'administrative', 100),
+ (175, 'R', 'landcover', 'grass', 100)}
+ assert self.temp_db_cursor.table_rows('import_polygon_delete') == 1
+
+ @pytest.mark.parametrize('test_age', ['1 week', 'P3D', '5 hours'])
+ def test_admin_clean_deleted(self, test_age):
+ admin.clean_deleted_relations(self.project_env, age = test_age)
+ assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 100),
+ (145, 'N', 'boundary', 'administrative', 100),
+ (175, 'R', 'landcover', 'grass', 100)}
+ assert self.temp_db_cursor.table_rows('import_polygon_delete') == 0