1 # SPDX-License-Identifier: GPL-3.0-or-later
 
   3 # This file is part of Nominatim. (https://nominatim.org)
 
   5 # Copyright (C) 2025 by the Nominatim developer community.
 
   6 # For a full list of authors see the git log.
 
   8 Tests for functions to maintain the artificial postcode table.
 
  14 from nominatim_db.tools import postcodes
 
  15 from nominatim_db.data import country_info
 
  16 import dummy_tokenizer
 
  19 class MockPostcodeTable:
 
  20     """ A location_postcode table for testing.
 
  22     def __init__(self, conn):
 
  24         with conn.cursor() as cur:
 
  25             cur.execute("""CREATE TABLE location_postcode (
 
  27                                parent_place_id BIGINT,
 
  29                                rank_address SMALLINT,
 
  30                                indexed_status SMALLINT,
 
  31                                indexed_date TIMESTAMP,
 
  32                                country_code varchar(2),
 
  34                                geometry GEOMETRY(Geometry, 4326))""")
 
  35             cur.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
 
  36                            RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
 
  38                            CREATE OR REPLACE FUNCTION get_country_code(place geometry)
 
  39                            RETURNS TEXT AS $$ BEGIN
 
  41                            END; $$ LANGUAGE plpgsql;
 
  45     def add(self, country, postcode, x, y):
 
  46         with self.conn.cursor() as cur:
 
  47             cur.execute("""INSERT INTO location_postcode (place_id, indexed_status,
 
  48                                                           country_code, postcode,
 
  50                            VALUES (nextval('seq_place'), 1, %s, %s,
 
  51                                    ST_SetSRID(ST_MakePoint(%s, %s), 4326))""",
 
  52                         (country, postcode, x, y))
 
  57         with self.conn.cursor() as cur:
 
  58             cur.execute("""SELECT country_code, postcode,
 
  59                                   ST_X(geometry), ST_Y(geometry)
 
  60                            FROM location_postcode""")
 
  61             return set((tuple(row) for row in cur))
 
  66     return dummy_tokenizer.DummyTokenizer(None)
 
  70 def postcode_table(def_config, temp_db_conn, placex_table):
 
  71     country_info.setup_country_config(def_config)
 
  72     return MockPostcodeTable(temp_db_conn)
 
  76 def insert_implicit_postcode(placex_table, place_row):
 
  78         Inserts data into the placex and place table
 
  79         which can then be used to compute one postcode.
 
  81     def _insert_implicit_postcode(osm_id, country, geometry, address):
 
  82         placex_table.add(osm_id=osm_id, country=country, geom=geometry)
 
  83         place_row(osm_id=osm_id, geom='SRID=4326;'+geometry, address=address)
 
  85     return _insert_implicit_postcode
 
  88 def test_postcodes_empty(dsn, postcode_table, place_table,
 
  90     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
  92     assert not postcode_table.row_set
 
  95 def test_postcodes_add_new(dsn, postcode_table, tmp_path,
 
  96                            insert_implicit_postcode, tokenizer):
 
  97     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='9486'))
 
  98     postcode_table.add('yy', '9486', 99, 34)
 
 100     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
 102     assert postcode_table.row_set == {('xx', '9486', 10, 12), }
 
 105 def test_postcodes_replace_coordinates(dsn, postcode_table, tmp_path,
 
 106                                        insert_implicit_postcode, tokenizer):
 
 107     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
 
 108     postcode_table.add('xx', 'AB 4511', 99, 34)
 
 110     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
 112     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
 
 115 def test_postcodes_replace_coordinates_close(dsn, postcode_table, tmp_path,
 
 116                                              insert_implicit_postcode, tokenizer):
 
 117     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
 
 118     postcode_table.add('xx', 'AB 4511', 10, 11.99999)
 
 120     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
 122     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 11.99999)}
 
 125 def test_postcodes_remove(dsn, postcode_table, tmp_path,
 
 126                           insert_implicit_postcode, tokenizer):
 
 127     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
 
 128     postcode_table.add('xx', 'badname', 10, 12)
 
 130     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
 132     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
 
 135 def test_postcodes_ignore_empty_country(dsn, postcode_table, tmp_path,
 
 136                                         insert_implicit_postcode, tokenizer):
 
 137     insert_implicit_postcode(1, None, 'POINT(10 12)', dict(postcode='AB 4511'))
 
 138     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
 139     assert not postcode_table.row_set
 
 142 def test_postcodes_remove_all(dsn, postcode_table, place_table,
 
 143                               tmp_path, tokenizer):
 
 144     postcode_table.add('ch', '5613', 10, 12)
 
 145     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
 147     assert not postcode_table.row_set
 
 150 def test_postcodes_multi_country(dsn, postcode_table, tmp_path,
 
 151                                  insert_implicit_postcode, tokenizer):
 
 152     insert_implicit_postcode(1, 'de', 'POINT(10 12)', dict(postcode='54451'))
 
 153     insert_implicit_postcode(2, 'cc', 'POINT(100 56)', dict(postcode='DD23 T'))
 
 154     insert_implicit_postcode(3, 'de', 'POINT(10.3 11.0)', dict(postcode='54452'))
 
 155     insert_implicit_postcode(4, 'cc', 'POINT(10.3 11.0)', dict(postcode='54452'))
 
 157     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
 159     assert postcode_table.row_set == {('de', '54451', 10, 12),
 
 160                                       ('de', '54452', 10.3, 11.0),
 
 161                                       ('cc', '54452', 10.3, 11.0),
 
 162                                       ('cc', 'DD23 T', 100, 56)}
 
 165 @pytest.mark.parametrize("gzipped", [True, False])
 
 166 def test_postcodes_extern(dsn, postcode_table, tmp_path,
 
 167                           insert_implicit_postcode, tokenizer, gzipped):
 
 168     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
 
 170     extfile = tmp_path / 'xx_postcodes.csv'
 
 171     extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
 
 174         subprocess.run(['gzip', str(extfile)])
 
 175         assert not extfile.is_file()
 
 177     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
 179     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
 
 180                                       ('xx', 'CD 4511', -10, -5)}
 
 183 def test_postcodes_extern_bad_column(dsn, postcode_table, tmp_path,
 
 184                                      insert_implicit_postcode, tokenizer):
 
 185     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
 
 187     extfile = tmp_path / 'xx_postcodes.csv'
 
 188     extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
 
 190     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
 192     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
 
 195 def test_postcodes_extern_bad_number(dsn, insert_implicit_postcode,
 
 196                                      postcode_table, tmp_path, tokenizer):
 
 197     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
 
 199     extfile = tmp_path / 'xx_postcodes.csv'
 
 200     extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
 
 202     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
 204     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
 
 205                                       ('xx', 'CD 4511', -10, -5)}
 
 208 def test_can_compute(dsn, table_factory):
 
 209     assert not postcodes.can_compute(dsn)
 
 210     table_factory('place')
 
 211     assert postcodes.can_compute(dsn)
 
 214 def test_no_placex_entry(dsn, tmp_path, temp_db_cursor, place_row, postcode_table, tokenizer):
 
 215     # Rewrite the get_country_code function to verify its execution.
 
 216     temp_db_cursor.execute("""
 
 217         CREATE OR REPLACE FUNCTION get_country_code(place geometry)
 
 218         RETURNS TEXT AS $$ BEGIN
 
 220         END; $$ LANGUAGE plpgsql;
 
 222     place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511'))
 
 223     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
 225     assert postcode_table.row_set == {('yy', 'AB 4511', 10, 12)}
 
 228 def test_discard_badly_formatted_postcodes(dsn, tmp_path, temp_db_cursor, place_row,
 
 229                                            postcode_table, tokenizer):
 
 230     # Rewrite the get_country_code function to verify its execution.
 
 231     temp_db_cursor.execute("""
 
 232         CREATE OR REPLACE FUNCTION get_country_code(place geometry)
 
 233         RETURNS TEXT AS $$ BEGIN
 
 235         END; $$ LANGUAGE plpgsql;
 
 237     place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511'))
 
 238     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
 
 240     assert not postcode_table.row_set