]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/search/db_searches.py
factor out SQL for filtering by location
[nominatim.git] / nominatim / api / search / db_searches.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of the acutal database accesses for forward search.
9 """
10 from typing import List, Tuple, AsyncIterator, Dict, Any, Callable
11 import abc
12
13 import sqlalchemy as sa
14 from sqlalchemy.dialects.postgresql import array_agg
15
16 from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
17                              SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
18 from nominatim.api.connection import SearchConnection
19 from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
20 import nominatim.api.results as nres
21 from nominatim.api.search.db_search_fields import SearchData, WeightedCategories
22 from nominatim.db.sqlalchemy_types import Geometry
23
24 #pylint: disable=singleton-comparison,not-callable
25 #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
26
27 def no_index(expr: SaColumn) -> SaColumn:
28     """ Wrap the given expression, so that the query planner will
29         refrain from using the expression for index lookup.
30     """
31     return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
32
33
34 def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
35     """ Create a dictionary from search parameters that can be used
36         as bind parameter for SQL execute.
37     """
38     return {'limit': details.max_results,
39             'min_rank': details.min_rank,
40             'max_rank': details.max_rank,
41             'viewbox': details.viewbox,
42             'viewbox2': details.viewbox_x2,
43             'near': details.near,
44             'near_radius': details.near_radius,
45             'excluded': details.excluded,
46             'countries': details.countries}
47
48
49 LIMIT_PARAM: SaBind = sa.bindparam('limit')
50 MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
51 MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
52 VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
53 VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
54 NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
55 NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
56 COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
57
58
59 def filter_by_area(sql: SaSelect, t: SaFromClause,
60                    details: SearchDetails, avoid_index: bool = False) -> SaSelect:
61     """ Apply SQL statements for filtering by viewbox and near point,
62         if applicable.
63     """
64     if details.near is not None and details.near_radius is not None:
65         if details.near_radius < 0.1 and not avoid_index:
66             sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM))
67         else:
68             sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM)
69     if details.viewbox is not None and details.bounded_viewbox:
70         sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM,
71                                                 use_index=not avoid_index and
72                                                           details.viewbox.area < 0.2))
73
74     return sql
75
76
77 def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
78     return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
79
80
81 def _select_placex(t: SaFromClause) -> SaSelect:
82     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
83                      t.c.class_, t.c.type,
84                      t.c.address, t.c.extratags,
85                      t.c.housenumber, t.c.postcode, t.c.country_code,
86                      t.c.wikipedia,
87                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
88                      t.c.linked_place_id, t.c.admin_level,
89                      t.c.centroid,
90                      t.c.geometry.ST_Expand(0).label('bbox'))
91
92
93 def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
94     out = []
95
96     if details.geometry_simplification > 0.0:
97         col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
98
99     if details.geometry_output & GeometryFormat.GEOJSON:
100         out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
101     if details.geometry_output & GeometryFormat.TEXT:
102         out.append(sa.func.ST_AsText(col).label('geometry_text'))
103     if details.geometry_output & GeometryFormat.KML:
104         out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
105     if details.geometry_output & GeometryFormat.SVG:
106         out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
107
108     return sql.add_columns(*out)
109
110
111 def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
112                                  numerals: List[int], details: SearchDetails) -> SaScalarSelect:
113     all_ids = array_agg(table.c.place_id) # type: ignore[no-untyped-call]
114     sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
115
116     if len(numerals) == 1:
117         sql = sql.where(sa.between(numerals[0], table.c.startnumber, table.c.endnumber))\
118                  .where((numerals[0] - table.c.startnumber) % table.c.step == 0)
119     else:
120         sql = sql.where(sa.or_(
121                 *(sa.and_(sa.between(n, table.c.startnumber, table.c.endnumber),
122                           (n - table.c.startnumber) % table.c.step == 0)
123                   for n in numerals)))
124
125     if details.excluded:
126         sql = sql.where(_exclude_places(table))
127
128     return sql.scalar_subquery()
129
130
131 def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
132     orexpr: List[SaExpression] = []
133     if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
134         orexpr.append(no_index(table.c.rank_address).between(1, 30))
135     elif layers & DataLayer.ADDRESS:
136         orexpr.append(no_index(table.c.rank_address).between(1, 29))
137         orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
138                               sa.or_(table.c.housenumber != None,
139                                      table.c.address.has_key('addr:housename'))))
140     elif layers & DataLayer.POI:
141         orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
142                               table.c.class_.not_in(('place', 'building'))))
143
144     if layers & DataLayer.MANMADE:
145         exclude = []
146         if not layers & DataLayer.RAILWAY:
147             exclude.append('railway')
148         if not layers & DataLayer.NATURAL:
149             exclude.extend(('natural', 'water', 'waterway'))
150         orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
151                               no_index(table.c.rank_address) == 0))
152     else:
153         include = []
154         if layers & DataLayer.RAILWAY:
155             include.append('railway')
156         if layers & DataLayer.NATURAL:
157             include.extend(('natural', 'water', 'waterway'))
158         orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
159                               no_index(table.c.rank_address) == 0))
160
161     if len(orexpr) == 1:
162         return orexpr[0]
163
164     return sa.or_(*orexpr)
165
166
167 def _interpolated_position(table: SaFromClause, nr: SaColumn) -> SaColumn:
168     pos = sa.cast(nr - table.c.startnumber, sa.Float) / (table.c.endnumber - table.c.startnumber)
169     return sa.case(
170             (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
171             else_=table.c.linegeo.ST_LineInterpolatePoint(pos)).label('centroid')
172
173
174 async def _get_placex_housenumbers(conn: SearchConnection,
175                                    place_ids: List[int],
176                                    details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
177     t = conn.t.placex
178     sql = _select_placex(t).add_columns(t.c.importance)\
179                            .where(t.c.place_id.in_(place_ids))
180
181     if details.geometry_output:
182         sql = _add_geometry_columns(sql, t.c.geometry, details)
183
184     for row in await conn.execute(sql):
185         result = nres.create_from_placex_row(row, nres.SearchResult)
186         assert result
187         result.bbox = Bbox.from_wkb(row.bbox)
188         yield result
189
190
191 async def _get_osmline(conn: SearchConnection, place_ids: List[int],
192                        numerals: List[int],
193                        details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
194     t = conn.t.osmline
195     values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
196                .data([(n,) for n in numerals])
197     sql = sa.select(t.c.place_id, t.c.osm_id,
198                     t.c.parent_place_id, t.c.address,
199                     values.c.nr.label('housenumber'),
200                     _interpolated_position(t, values.c.nr),
201                     t.c.postcode, t.c.country_code)\
202             .where(t.c.place_id.in_(place_ids))\
203             .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
204
205     if details.geometry_output:
206         sub = sql.subquery()
207         sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
208
209     for row in await conn.execute(sql):
210         result = nres.create_from_osmline_row(row, nres.SearchResult)
211         assert result
212         yield result
213
214
215 async def _get_tiger(conn: SearchConnection, place_ids: List[int],
216                      numerals: List[int], osm_id: int,
217                      details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
218     t = conn.t.tiger
219     values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
220                .data([(n,) for n in numerals])
221     sql = sa.select(t.c.place_id, t.c.parent_place_id,
222                     sa.literal('W').label('osm_type'),
223                     sa.literal(osm_id).label('osm_id'),
224                     values.c.nr.label('housenumber'),
225                     _interpolated_position(t, values.c.nr),
226                     t.c.postcode)\
227             .where(t.c.place_id.in_(place_ids))\
228             .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
229
230     if details.geometry_output:
231         sub = sql.subquery()
232         sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
233
234     for row in await conn.execute(sql):
235         result = nres.create_from_tiger_row(row, nres.SearchResult)
236         assert result
237         yield result
238
239
240 class AbstractSearch(abc.ABC):
241     """ Encapuslation of a single lookup in the database.
242     """
243
244     def __init__(self, penalty: float) -> None:
245         self.penalty = penalty
246
247     @abc.abstractmethod
248     async def lookup(self, conn: SearchConnection,
249                      details: SearchDetails) -> nres.SearchResults:
250         """ Find results for the search in the database.
251         """
252
253
254 class NearSearch(AbstractSearch):
255     """ Category search of a place type near the result of another search.
256     """
257     def __init__(self, penalty: float, categories: WeightedCategories,
258                  search: AbstractSearch) -> None:
259         super().__init__(penalty)
260         self.search = search
261         self.categories = categories
262
263
264     async def lookup(self, conn: SearchConnection,
265                      details: SearchDetails) -> nres.SearchResults:
266         """ Find results for the search in the database.
267         """
268         results = nres.SearchResults()
269         base = await self.search.lookup(conn, details)
270
271         if not base:
272             return results
273
274         base.sort(key=lambda r: (r.accuracy, r.rank_search))
275         max_accuracy = base[0].accuracy + 0.5
276         if base[0].rank_address == 0:
277             min_rank = 0
278             max_rank = 0
279         elif base[0].rank_address < 26:
280             min_rank = 1
281             max_rank = min(25, base[0].rank_address + 4)
282         else:
283             min_rank = 26
284             max_rank = 30
285         base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
286                                                      and r.accuracy <= max_accuracy
287                                                      and r.bbox and r.bbox.area < 20
288                                                      and r.rank_address >= min_rank
289                                                      and r.rank_address <= max_rank)
290
291         if base:
292             baseids = [b.place_id for b in base[:5] if b.place_id]
293
294             for category, penalty in self.categories:
295                 await self.lookup_category(results, conn, baseids, category, penalty, details)
296                 if len(results) >= details.max_results:
297                     break
298
299         return results
300
301
302     async def lookup_category(self, results: nres.SearchResults,
303                               conn: SearchConnection, ids: List[int],
304                               category: Tuple[str, str], penalty: float,
305                               details: SearchDetails) -> None:
306         """ Find places of the given category near the list of
307             place ids and add the results to 'results'.
308         """
309         table = await conn.get_class_table(*category)
310
311         tgeom = conn.t.placex.alias('pgeom')
312
313         if table is None:
314             # No classtype table available, do a simplified lookup in placex.
315             table = conn.t.placex.alias('inner')
316             sql = sa.select(table.c.place_id,
317                             sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
318                               .label('dist'))\
319                     .join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
320                     .where(table.c.class_ == category[0])\
321                     .where(table.c.type == category[1])
322         else:
323             # Use classtype table. We can afford to use a larger
324             # radius for the lookup.
325             sql = sa.select(table.c.place_id,
326                             sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
327                               .label('dist'))\
328                     .join(tgeom,
329                           table.c.centroid.ST_CoveredBy(
330                               sa.case((sa.and_(tgeom.c.rank_address > 9,
331                                                 tgeom.c.geometry.is_area()),
332                                        tgeom.c.geometry),
333                                       else_ = tgeom.c.centroid.ST_Expand(0.05))))
334
335         inner = sql.where(tgeom.c.place_id.in_(ids))\
336                    .group_by(table.c.place_id).subquery()
337
338         t = conn.t.placex
339         sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
340                                .join(inner, inner.c.place_id == t.c.place_id)\
341                                .order_by(inner.c.dist)
342
343         sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
344         if details.countries:
345             sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
346         if details.excluded:
347             sql = sql.where(_exclude_places(t))
348         if details.layers is not None:
349             sql = sql.where(_filter_by_layer(t, details.layers))
350
351         sql = sql.limit(LIMIT_PARAM)
352         for row in await conn.execute(sql, _details_to_bind_params(details)):
353             result = nres.create_from_placex_row(row, nres.SearchResult)
354             assert result
355             result.accuracy = self.penalty + penalty
356             result.bbox = Bbox.from_wkb(row.bbox)
357             results.append(result)
358
359
360
361 class PoiSearch(AbstractSearch):
362     """ Category search in a geographic area.
363     """
364     def __init__(self, sdata: SearchData) -> None:
365         super().__init__(sdata.penalty)
366         self.qualifiers = sdata.qualifiers
367         self.countries = sdata.countries
368
369
370     async def lookup(self, conn: SearchConnection,
371                      details: SearchDetails) -> nres.SearchResults:
372         """ Find results for the search in the database.
373         """
374         bind_params = _details_to_bind_params(details)
375         t = conn.t.placex
376
377         rows: List[SaRow] = []
378
379         if details.near and details.near_radius is not None and details.near_radius < 0.2:
380             # simply search in placex table
381             def _base_query() -> SaSelect:
382                 return _select_placex(t) \
383                            .add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
384                                          .label('importance'))\
385                            .where(t.c.linked_place_id == None) \
386                            .where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
387                            .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
388                            .limit(LIMIT_PARAM)
389
390             classtype = self.qualifiers.values
391             if len(classtype) == 1:
392                 cclass, ctype = classtype[0]
393                 sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
394                                                  .where(t.c.class_ == cclass)
395                                                  .where(t.c.type == ctype))
396             else:
397                 sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
398                                                    for cls, typ in classtype)))
399
400             if self.countries:
401                 sql = sql.where(t.c.country_code.in_(self.countries.values))
402
403             if details.viewbox is not None and details.bounded_viewbox:
404                 sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
405
406             rows.extend(await conn.execute(sql, bind_params))
407         else:
408             # use the class type tables
409             for category in self.qualifiers.values:
410                 table = await conn.get_class_table(*category)
411                 if table is not None:
412                     sql = _select_placex(t)\
413                                .add_columns(t.c.importance)\
414                                .join(table, t.c.place_id == table.c.place_id)\
415                                .where(t.c.class_ == category[0])\
416                                .where(t.c.type == category[1])
417
418                     if details.viewbox is not None and details.bounded_viewbox:
419                         sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM))
420
421                     if details.near and details.near_radius is not None:
422                         sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
423                                  .where(table.c.centroid.within_distance(NEAR_PARAM,
424                                                                          NEAR_RADIUS_PARAM))
425
426                     if self.countries:
427                         sql = sql.where(t.c.country_code.in_(self.countries.values))
428
429                     sql = sql.limit(LIMIT_PARAM)
430                     rows.extend(await conn.execute(sql, bind_params))
431
432         results = nres.SearchResults()
433         for row in rows:
434             result = nres.create_from_placex_row(row, nres.SearchResult)
435             assert result
436             result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
437             result.bbox = Bbox.from_wkb(row.bbox)
438             results.append(result)
439
440         return results
441
442
443 class CountrySearch(AbstractSearch):
444     """ Search for a country name or country code.
445     """
446     def __init__(self, sdata: SearchData) -> None:
447         super().__init__(sdata.penalty)
448         self.countries = sdata.countries
449
450
451     async def lookup(self, conn: SearchConnection,
452                      details: SearchDetails) -> nres.SearchResults:
453         """ Find results for the search in the database.
454         """
455         t = conn.t.placex
456
457         ccodes = self.countries.values
458         sql = _select_placex(t)\
459                 .add_columns(t.c.importance)\
460                 .where(t.c.country_code.in_(ccodes))\
461                 .where(t.c.rank_address == 4)
462
463         if details.geometry_output:
464             sql = _add_geometry_columns(sql, t.c.geometry, details)
465
466         if details.excluded:
467             sql = sql.where(_exclude_places(t))
468
469         sql = filter_by_area(sql, t, details)
470
471         results = nres.SearchResults()
472         for row in await conn.execute(sql, _details_to_bind_params(details)):
473             result = nres.create_from_placex_row(row, nres.SearchResult)
474             assert result
475             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
476             result.bbox = Bbox.from_wkb(row.bbox)
477             results.append(result)
478
479         return results or await self.lookup_in_country_table(conn, details)
480
481
482     async def lookup_in_country_table(self, conn: SearchConnection,
483                                       details: SearchDetails) -> nres.SearchResults:
484         """ Look up the country in the fallback country tables.
485         """
486         # Avoid the fallback search when this is a more search. Country results
487         # usually are in the first batch of results and it is not possible
488         # to exclude these fallbacks.
489         if details.excluded:
490             return nres.SearchResults()
491
492         t = conn.t.country_name
493         tgrid = conn.t.country_grid
494
495         sql = sa.select(tgrid.c.country_code,
496                         tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
497                               .label('centroid'),
498                         tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
499                 .where(tgrid.c.country_code.in_(self.countries.values))\
500                 .group_by(tgrid.c.country_code)
501
502         sql = filter_by_area(sql, tgrid, details, avoid_index=True)
503
504         sub = sql.subquery('grid')
505
506         sql = sa.select(t.c.country_code,
507                         t.c.name.merge(t.c.derived_name).label('name'),
508                         sub.c.centroid, sub.c.bbox)\
509                 .join(sub, t.c.country_code == sub.c.country_code)
510
511         if details.geometry_output:
512             sql = _add_geometry_columns(sql, sub.c.centroid, details)
513
514         results = nres.SearchResults()
515         for row in await conn.execute(sql, _details_to_bind_params(details)):
516             result = nres.create_from_country_row(row, nres.SearchResult)
517             assert result
518             result.bbox = Bbox.from_wkb(row.bbox)
519             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
520             results.append(result)
521
522         return results
523
524
525
526 class PostcodeSearch(AbstractSearch):
527     """ Search for a postcode.
528     """
529     def __init__(self, extra_penalty: float, sdata: SearchData) -> None:
530         super().__init__(sdata.penalty + extra_penalty)
531         self.countries = sdata.countries
532         self.postcodes = sdata.postcodes
533         self.lookups = sdata.lookups
534         self.rankings = sdata.rankings
535
536
537     async def lookup(self, conn: SearchConnection,
538                      details: SearchDetails) -> nres.SearchResults:
539         """ Find results for the search in the database.
540         """
541         t = conn.t.postcode
542         pcs = self.postcodes.values
543
544         sql = sa.select(t.c.place_id, t.c.parent_place_id,
545                         t.c.rank_search, t.c.rank_address,
546                         t.c.postcode, t.c.country_code,
547                         t.c.geometry.label('centroid'))\
548                 .where(t.c.postcode.in_(pcs))
549
550         if details.geometry_output:
551             sql = _add_geometry_columns(sql, t.c.geometry, details)
552
553         penalty: SaExpression = sa.literal(self.penalty)
554
555         if details.viewbox is not None and not details.bounded_viewbox:
556             penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
557                                (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
558                                else_=1.0)
559
560         if details.near is not None:
561             sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
562
563         sql = filter_by_area(sql, t, details)
564
565         if self.countries:
566             sql = sql.where(t.c.country_code.in_(self.countries.values))
567
568         if details.excluded:
569             sql = sql.where(_exclude_places(t))
570
571         if self.lookups:
572             assert len(self.lookups) == 1
573             tsearch = conn.t.search_name
574             sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
575                      .where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
576                                      .contains(self.lookups[0].tokens))
577
578         for ranking in self.rankings:
579             penalty += ranking.sql_penalty(conn.t.search_name)
580         penalty += sa.case(*((t.c.postcode == v, p) for v, p in self.postcodes),
581                        else_=1.0)
582
583
584         sql = sql.add_columns(penalty.label('accuracy'))
585         sql = sql.order_by('accuracy').limit(LIMIT_PARAM)
586
587         results = nres.SearchResults()
588         for row in await conn.execute(sql, _details_to_bind_params(details)):
589             result = nres.create_from_postcode_row(row, nres.SearchResult)
590             assert result
591             result.accuracy = row.accuracy
592             results.append(result)
593
594         return results
595
596
597
598 class PlaceSearch(AbstractSearch):
599     """ Generic search for an address or named place.
600     """
601     def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
602         super().__init__(sdata.penalty + extra_penalty)
603         self.countries = sdata.countries
604         self.postcodes = sdata.postcodes
605         self.housenumbers = sdata.housenumbers
606         self.qualifiers = sdata.qualifiers
607         self.lookups = sdata.lookups
608         self.rankings = sdata.rankings
609         self.expected_count = expected_count
610
611
612     async def lookup(self, conn: SearchConnection,
613                      details: SearchDetails) -> nres.SearchResults:
614         """ Find results for the search in the database.
615         """
616         t = conn.t.placex
617         tsearch = conn.t.search_name
618
619         sql: SaLambdaSelect = sa.lambda_stmt(lambda:
620                   _select_placex(t).where(t.c.place_id == tsearch.c.place_id))
621
622
623         if details.geometry_output:
624             sql = _add_geometry_columns(sql, t.c.geometry, details)
625
626         penalty: SaExpression = sa.literal(self.penalty)
627         for ranking in self.rankings:
628             penalty += ranking.sql_penalty(tsearch)
629
630         for lookup in self.lookups:
631             sql = sql.where(lookup.sql_condition(tsearch))
632
633         if self.countries:
634             sql = sql.where(tsearch.c.country_code.in_(self.countries.values))
635
636         if self.postcodes:
637             # if a postcode is given, don't search for state or country level objects
638             sql = sql.where(tsearch.c.address_rank > 9)
639             tpc = conn.t.postcode
640             pcs = self.postcodes.values
641             if self.expected_count > 5000:
642                 # Many results expected. Restrict by postcode.
643                 sql = sql.where(sa.select(tpc.c.postcode)
644                                   .where(tpc.c.postcode.in_(pcs))
645                                   .where(tsearch.c.centroid.within_distance(tpc.c.geometry, 0.12))
646                                   .exists())
647
648             # Less results, only have a preference for close postcodes
649             pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\
650                       .where(tpc.c.postcode.in_(pcs))\
651                       .scalar_subquery()
652             penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
653                                else_=sa.func.coalesce(pc_near, 2.0))
654
655         if details.viewbox is not None:
656             if details.bounded_viewbox:
657                 sql = sql.where(tsearch.c.centroid
658                                          .intersects(VIEWBOX_PARAM,
659                                                      use_index=details.viewbox.area < 0.2))
660             elif self.expected_count >= 10000:
661                 sql = sql.where(tsearch.c.centroid
662                                          .intersects(VIEWBOX2_PARAM,
663                                                      use_index=details.viewbox.area < 0.5))
664             else:
665                 penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
666                                    (t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
667                                    else_=1.0)
668
669         if details.near is not None:
670             if details.near_radius is not None:
671                 if details.near_radius < 0.1:
672                     sql = sql.where(tsearch.c.centroid.within_distance(NEAR_PARAM,
673                                                                        NEAR_RADIUS_PARAM))
674                 else:
675                     sql = sql.where(tsearch.c.centroid
676                                              .ST_Distance(NEAR_PARAM) <  NEAR_RADIUS_PARAM)
677             sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
678                                       .label('importance'))
679             sql = sql.order_by(sa.desc(sa.text('importance')))
680         else:
681             if self.expected_count < 10000\
682                or (details.viewbox is not None and details.viewbox.area < 0.5):
683                 sql = sql.order_by(
684                         penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
685                                     else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
686             sql = sql.add_columns(t.c.importance)
687
688
689         sql = sql.add_columns(penalty.label('accuracy'))
690
691         if self.expected_count < 10000:
692             sql = sql.order_by(sa.text('accuracy'))
693
694         if self.housenumbers:
695             hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
696             sql = sql.where(tsearch.c.address_rank.between(16, 30))\
697                      .where(sa.or_(tsearch.c.address_rank < 30,
698                                    t.c.housenumber.op('~*')(hnr_regexp)))
699
700             # Cross check for housenumbers, need to do that on a rather large
701             # set. Worst case there are 40.000 main streets in OSM.
702             inner = sql.limit(10000).subquery()
703
704             # Housenumbers from placex
705             thnr = conn.t.placex.alias('hnr')
706             pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call]
707             place_sql = sa.select(pid_list)\
708                           .where(thnr.c.parent_place_id == inner.c.place_id)\
709                           .where(thnr.c.housenumber.op('~*')(hnr_regexp))\
710                           .where(thnr.c.linked_place_id == None)\
711                           .where(thnr.c.indexed_status == 0)
712
713             if details.excluded:
714                 place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
715             if self.qualifiers:
716                 place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
717
718             numerals = [int(n) for n in self.housenumbers.values
719                         if n.isdigit() and len(n) < 8]
720             interpol_sql: SaColumn
721             tiger_sql: SaColumn
722             if numerals and \
723                (not self.qualifiers or ('place', 'house') in self.qualifiers.values):
724                 # Housenumbers from interpolations
725                 interpol_sql = _make_interpolation_subquery(conn.t.osmline, inner,
726                                                             numerals, details)
727                 # Housenumbers from Tiger
728                 tiger_sql = sa.case((inner.c.country_code == 'us',
729                                      _make_interpolation_subquery(conn.t.tiger, inner,
730                                                                   numerals, details)
731                                     ), else_=None)
732             else:
733                 interpol_sql = sa.null()
734                 tiger_sql = sa.null()
735
736             unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
737                                interpol_sql.label('interpol_hnr'),
738                                tiger_sql.label('tiger_hnr')).subquery('unsort')
739             sql = sa.select(unsort)\
740                     .order_by(sa.case((unsort.c.placex_hnr != None, 1),
741                                       (unsort.c.interpol_hnr != None, 2),
742                                       (unsort.c.tiger_hnr != None, 3),
743                                       else_=4),
744                               unsort.c.accuracy)
745         else:
746             sql = sql.where(t.c.linked_place_id == None)\
747                      .where(t.c.indexed_status == 0)
748             if self.qualifiers:
749                 sql = sql.where(self.qualifiers.sql_restrict(t))
750             if details.excluded:
751                 sql = sql.where(_exclude_places(tsearch))
752             if details.min_rank > 0:
753                 sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM,
754                                        tsearch.c.search_rank >= MIN_RANK_PARAM))
755             if details.max_rank < 30:
756                 sql = sql.where(sa.or_(tsearch.c.address_rank <= MAX_RANK_PARAM,
757                                        tsearch.c.search_rank <= MAX_RANK_PARAM))
758             if details.layers is not None:
759                 sql = sql.where(_filter_by_layer(t, details.layers))
760
761         sql = sql.limit(LIMIT_PARAM)
762
763         results = nres.SearchResults()
764         for row in await conn.execute(sql, _details_to_bind_params(details)):
765             result = nres.create_from_placex_row(row, nres.SearchResult)
766             assert result
767             result.bbox = Bbox.from_wkb(row.bbox)
768             result.accuracy = row.accuracy
769             if self.housenumbers and row.rank_address < 30:
770                 if row.placex_hnr:
771                     subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
772                 elif row.interpol_hnr:
773                     subs = _get_osmline(conn, row.interpol_hnr, numerals, details)
774                 elif row.tiger_hnr:
775                     subs = _get_tiger(conn, row.tiger_hnr, numerals, row.osm_id, details)
776                 else:
777                     subs = None
778
779                 if subs is not None:
780                     async for sub in subs:
781                         assert sub.housenumber
782                         sub.accuracy = result.accuracy
783                         if not any(nr in self.housenumbers.values
784                                    for nr in sub.housenumber.split(';')):
785                             sub.accuracy += 0.6
786                         results.append(sub)
787
788                 # Only add the street as a result, if it meets all other
789                 # filter conditions.
790                 if (not details.excluded or result.place_id not in details.excluded)\
791                    and (not self.qualifiers or result.category in self.qualifiers.values)\
792                    and result.rank_address >= details.min_rank:
793                     result.accuracy += 1.0 # penalty for missing housenumber
794                     results.append(result)
795             else:
796                 results.append(result)
797
798         return results