test/bdd/steps/steps_db_ops.py

   1 import logging
   2 from itertools import chain
   3
   4 import psycopg2.extras
   5
   6 from place_inserter import PlaceColumn
   7 from table_compare import NominatimID, DBRow
   8
   9 from nominatim.indexer import indexer
  10
  11 def check_database_integrity(context):
  12     """ Check some generic constraints on the tables.
  13     """
  14     # place_addressline should not have duplicate (place_id, address_place_id)
  15     cur = context.db.cursor()
  16     cur.execute("""SELECT count(*) FROM
  17                     (SELECT place_id, address_place_id, count(*) as c
  18                      FROM place_addressline GROUP BY place_id, address_place_id) x
  19                    WHERE c > 1""")
  20     assert cur.fetchone()[0] == 0, "Duplicates found in place_addressline"
  21
  22
  23 ################################ GIVEN ##################################
  24
  25 @given("the (?P<named>named )?places")
  26 def add_data_to_place_table(context, named):
  27     """ Add entries into the place table. 'named places' makes sure that
  28         the entries get a random name when none is explicitly given.
  29     """
  30     with context.db.cursor() as cur:
  31         cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert')
  32         for row in context.table:
  33             PlaceColumn(context).add_row(row, named is not None).db_insert(cur)
  34         cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert')
  35
  36 @given("the relations")
  37 def add_data_to_planet_relations(context):
  38     """ Add entries into the osm2pgsql relation middle table. This is needed
  39         for tests on data that looks up members.
  40     """
  41     with context.db.cursor() as cur:
  42         for r in context.table:
  43             last_node = 0
  44             last_way = 0
  45             parts = []
  46             if r['members']:
  47                 members = []
  48                 for m in r['members'].split(','):
  49                     mid = NominatimID(m)
  50                     if mid.typ == 'N':
  51                         parts.insert(last_node, int(mid.oid))
  52                         last_node += 1
  53                         last_way += 1
  54                     elif mid.typ == 'W':
  55                         parts.insert(last_way, int(mid.oid))
  56                         last_way += 1
  57                     else:
  58                         parts.append(int(mid.oid))
  59
  60                     members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
  61             else:
  62                 members = None
  63
  64             tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
  65
  66             cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
  67                            VALUES (%s, %s, %s, %s, %s, %s)""",
  68                         (r['id'], last_node, last_way, parts, members, list(tags)))
  69
  70 @given("the ways")
  71 def add_data_to_planet_ways(context):
  72     """ Add entries into the osm2pgsql way middle table. This is necessary for
  73         tests on that that looks up node ids in this table.
  74     """
  75     with context.db.cursor() as cur:
  76         for r in context.table:
  77             tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
  78             nodes = [ int(x.strip()) for x in r['nodes'].split(',') ]
  79
  80             cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)",
  81                         (r['id'], nodes, list(tags)))
  82
  83 ################################ WHEN ##################################
  84
  85 @when("importing")
  86 def import_and_index_data_from_place_table(context):
  87     """ Import data previously set up in the place table.
  88     """
  89     context.nominatim.copy_from_place(context.db)
  90
  91     # XXX use tool function as soon as it is ported
  92     with context.db.cursor() as cur:
  93         with (context.nominatim.src_dir / 'lib-sql' / 'postcode_tables.sql').open('r') as fd:
  94             cur.execute(fd.read())
  95         cur.execute("""
  96             INSERT INTO location_postcode
  97              (place_id, indexed_status, country_code, postcode, geometry)
  98             SELECT nextval('seq_place'), 1, country_code,
  99                    upper(trim (both ' ' from address->'postcode')) as pc,
 100                    ST_Centroid(ST_Collect(ST_Centroid(geometry)))
 101               FROM placex
 102              WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
 103                    AND geometry IS NOT null
 104              GROUP BY country_code, pc""")
 105
 106     # Call directly as the refresh function does not include postcodes.
 107     indexer.LOG.setLevel(logging.ERROR)
 108     indexer.Indexer(context.nominatim.get_libpq_dsn(), 1).index_full(analyse=False)
 109
 110     check_database_integrity(context)
 111
 112 @when("updating places")
 113 def update_place_table(context):
 114     """ Update the place table with the given data. Also runs all triggers
 115         related to updates and reindexes the new data.
 116     """
 117     context.nominatim.run_nominatim('refresh', '--functions')
 118     with context.db.cursor() as cur:
 119         for row in context.table:
 120             PlaceColumn(context).add_row(row, False).db_insert(cur)
 121
 122     context.nominatim.reindex_placex(context.db)
 123     check_database_integrity(context)
 124
 125 @when("updating postcodes")
 126 def update_postcodes(context):
 127     """ Rerun the calculation of postcodes.
 128     """
 129     context.nominatim.run_nominatim('refresh', '--postcodes')
 130
 131 @when("marking for delete (?P<oids>.*)")
 132 def delete_places(context, oids):
 133     """ Remove entries from the place table. Multiple ids may be given
 134         separated by commas. Also runs all triggers
 135         related to updates and reindexes the new data.
 136     """
 137     context.nominatim.run_nominatim('refresh', '--functions')
 138     with context.db.cursor() as cur:
 139         for oid in oids.split(','):
 140             NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}')
 141
 142     context.nominatim.reindex_placex(context.db)
 143
 144 ################################ THEN ##################################
 145
 146 @then("(?P<table>placex|place) contains(?P<exact> exactly)?")
 147 def check_place_contents(context, table, exact):
 148     """ Check contents of place/placex tables. Each row represents a table row
 149         and all data must match. Data not present in the expected table, may
 150         be arbitry. The rows are identified via the 'object' column which must
 151         have an identifier of the form '<NRW><osm id>[:<class>]'. When multiple
 152         rows match (for example because 'class' was left out and there are
 153         multiple entries for the given OSM object) then all must match. All
 154         expected rows are expected to be present with at least one database row.
 155         When 'exactly' is given, there must not be additional rows in the database.
 156     """
 157     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 158         expected_content = set()
 159         for row in context.table:
 160             nid = NominatimID(row['object'])
 161             query = 'SELECT *, ST_AsText(geometry) as geomtxt, ST_GeometryType(geometry) as geometrytype'
 162             if table == 'placex':
 163                 query += ' ,ST_X(centroid) as cx, ST_Y(centroid) as cy'
 164             query += " FROM %s WHERE {}" % (table, )
 165             nid.query_osm_id(cur, query)
 166             assert cur.rowcount > 0, "No rows found for " + row['object']
 167
 168             for res in cur:
 169                 if exact:
 170                     expected_content.add((res['osm_type'], res['osm_id'], res['class']))
 171
 172                 DBRow(nid, res, context).assert_row(row, ['object'])
 173
 174         if exact:
 175             cur.execute('SELECT osm_type, osm_id, class from {}'.format(table))
 176             assert expected_content == set([(r[0], r[1], r[2]) for r in cur])
 177
 178
 179 @then("(?P<table>placex|place) has no entry for (?P<oid>.*)")
 180 def check_place_has_entry(context, table, oid):
 181     """ Ensure that no database row for the given object exists. The ID
 182         must be of the form '<NRW><osm id>[:<class>]'.
 183     """
 184     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 185         NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table)
 186         assert cur.rowcount == 0, \
 187                "Found {} entries for ID {}".format(cur.rowcount, oid)
 188
 189
 190 @then("search_name contains(?P<exclude> not)?")
 191 def check_search_name_contents(context, exclude):
 192     """ Check contents of place/placex tables. Each row represents a table row
 193         and all data must match. Data not present in the expected table, may
 194         be arbitry. The rows are identified via the 'object' column which must
 195         have an identifier of the form '<NRW><osm id>[:<class>]'. All
 196         expected rows are expected to be present with at least one database row.
 197     """
 198     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 199         for row in context.table:
 200             nid = NominatimID(row['object'])
 201             nid.row_by_place_id(cur, 'search_name',
 202                                 ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
 203             assert cur.rowcount > 0, "No rows found for " + row['object']
 204
 205             for res in cur:
 206                 db_row = DBRow(nid, res, context)
 207                 for name, value in zip(row.headings, row.cells):
 208                     if name in ('name_vector', 'nameaddress_vector'):
 209                         items = [x.strip() for x in value.split(',')]
 210                         with context.db.cursor() as subcur:
 211                             subcur.execute(""" SELECT word_id, word_token
 212                                                FROM word, (SELECT unnest(%s::TEXT[]) as term) t
 213                                                WHERE word_token = make_standard_name(t.term)
 214                                                      and class is null and country_code is null
 215                                                      and operator is null
 216                                               UNION
 217                                                SELECT word_id, word_token
 218                                                FROM word, (SELECT unnest(%s::TEXT[]) as term) t
 219                                                WHERE word_token = ' ' || make_standard_name(t.term)
 220                                                      and class is null and country_code is null
 221                                                      and operator is null
 222                                            """,
 223                                            (list(filter(lambda x: not x.startswith('#'), items)),
 224                                             list(filter(lambda x: x.startswith('#'), items))))
 225                             if not exclude:
 226                                 assert subcur.rowcount >= len(items), \
 227                                     "No word entry found for {}. Entries found: {!s}".format(value, subcur.rowcount)
 228                             for wid in subcur:
 229                                 present = wid[0] in res[name]
 230                                 if exclude:
 231                                     assert not present, "Found term for {}/{}: {}".format(row['object'], name, wid[1])
 232                                 else:
 233                                     assert present, "Missing term for {}/{}: {}".fromat(row['object'], name, wid[1])
 234                     elif name != 'object':
 235                         assert db_row.contains(name, value), db_row.assert_msg(name, value)
 236
 237 @then("search_name has no entry for (?P<oid>.*)")
 238 def check_search_name_has_entry(context, oid):
 239     """ Check that there is noentry in the search_name table for the given
 240         objects. IDs are in format '<NRW><osm id>[:<class>]'.
 241     """
 242     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 243         NominatimID(oid).row_by_place_id(cur, 'search_name')
 244
 245         assert cur.rowcount == 0, \
 246                "Found {} entries for ID {}".format(cur.rowcount, oid)
 247
 248 @then("location_postcode contains exactly")
 249 def check_location_postcode(context):
 250     """ Check full contents for location_postcode table. Each row represents a table row
 251         and all data must match. Data not present in the expected table, may
 252         be arbitry. The rows are identified via 'country' and 'postcode' columns.
 253         All rows must be present as excepted and there must not be additional
 254         rows.
 255     """
 256     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 257         cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
 258         assert cur.rowcount == len(list(context.table)), \
 259             "Postcode table has {} rows, expected {}.".foramt(cur.rowcount, len(list(context.table)))
 260
 261         results = {}
 262         for row in cur:
 263             key = (row['country_code'], row['postcode'])
 264             assert key not in results, "Postcode table has duplicate entry: {}".format(row)
 265             results[key] = DBRow((row['country_code'],row['postcode']), row, context)
 266
 267         for row in context.table:
 268             db_row = results.get((row['country'],row['postcode']))
 269             assert db_row is not None, \
 270                 "Missing row for country '{r['country']}' postcode '{r['postcode']}'.".format(r=row)
 271
 272             db_row.assert_row(row, ('country', 'postcode'))
 273
 274 @then("word contains(?P<exclude> not)?")
 275 def check_word_table(context, exclude):
 276     """ Check the contents of the word table. Each row represents a table row
 277         and all data must match. Data not present in the expected table, may
 278         be arbitry. The rows are identified via all given columns.
 279     """
 280     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 281         for row in context.table:
 282             wheres = ' AND '.join(["{} = %s".format(h) for h in row.headings])
 283             cur.execute("SELECT * from word WHERE " + wheres, list(row.cells))
 284             if exclude:
 285                 assert cur.rowcount == 0, "Row still in word table: %s" % '/'.join(values)
 286             else:
 287                 assert cur.rowcount > 0, "Row not in word table: %s" % '/'.join(values)
 288
 289 @then("place_addressline contains")
 290 def check_place_addressline(context):
 291     """ Check the contents of the place_addressline table. Each row represents
 292         a table row and all data must match. Data not present in the expected
 293         table, may be arbitry. The rows are identified via the 'object' column,
 294         representing the addressee and the 'address' column, representing the
 295         address item.
 296     """
 297     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 298         for row in context.table:
 299             nid = NominatimID(row['object'])
 300             pid = nid.get_place_id(cur)
 301             apid = NominatimID(row['address']).get_place_id(cur)
 302             cur.execute(""" SELECT * FROM place_addressline
 303                             WHERE place_id = %s AND address_place_id = %s""",
 304                         (pid, apid))
 305             assert cur.rowcount > 0, \
 306                         "No rows found for place %s and address %s" % (row['object'], row['address'])
 307
 308             for res in cur:
 309                 DBRow(nid, res, context).assert_row(row, ('address', 'object'))
 310
 311 @then("place_addressline doesn't contain")
 312 def check_place_addressline_exclude(context):
 313     """ Check that the place_addressline doesn't contain any entries for the
 314         given addressee/address item pairs.
 315     """
 316     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 317         for row in context.table:
 318             pid = NominatimID(row['object']).get_place_id(cur)
 319             apid = NominatimID(row['address']).get_place_id(cur)
 320             cur.execute(""" SELECT * FROM place_addressline
 321                             WHERE place_id = %s AND address_place_id = %s""",
 322                         (pid, apid))
 323             assert cur.rowcount == 0, \
 324                 "Row found for place %s and address %s" % (row['object'], row['address'])
 325
 326 @then("W(?P<oid>\d+) expands to(?P<neg> no)? interpolation")
 327 def check_location_property_osmline(context, oid, neg):
 328     """ Check that the given way is present in the interpolation table.
 329     """
 330     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 331         cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt
 332                        FROM location_property_osmline
 333                        WHERE osm_id = %s AND startnumber IS NOT NULL""",
 334                     (oid, ))
 335
 336         if neg:
 337             assert cur.rowcount == 0, "Interpolation found for way {}.".format(oid)
 338             return
 339
 340         todo = list(range(len(list(context.table))))
 341         for res in cur:
 342             for i in todo:
 343                 row = context.table[i]
 344                 if (int(row['start']) == res['startnumber']
 345                     and int(row['end']) == res['endnumber']):
 346                     todo.remove(i)
 347                     break
 348             else:
 349                 assert False, "Unexpected row " + str(res)
 350
 351             DBRow(oid, res, context).assert_row(row, ('start', 'end'))
 352
 353         assert not todo
 354
 355