]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/search/db_searches.py
avoid duplicate lines during category search
[nominatim.git] / nominatim / api / search / db_searches.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Implementation of the acutal database accesses for forward search.
9 """
10 from typing import List, Tuple, AsyncIterator, Dict, Any, Callable
11 import abc
12
13 import sqlalchemy as sa
14 from sqlalchemy.dialects.postgresql import ARRAY, array_agg
15
16 from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
17                              SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
18 from nominatim.api.connection import SearchConnection
19 from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
20 import nominatim.api.results as nres
21 from nominatim.api.search.db_search_fields import SearchData, WeightedCategories
22 from nominatim.db.sqlalchemy_types import Geometry
23
24 #pylint: disable=singleton-comparison,not-callable
25 #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
26
27 def no_index(expr: SaColumn) -> SaColumn:
28     """ Wrap the given expression, so that the query planner will
29         refrain from using the expression for index lookup.
30     """
31     return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
32
33
34 def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
35     """ Create a dictionary from search parameters that can be used
36         as bind parameter for SQL execute.
37     """
38     return {'limit': details.max_results,
39             'min_rank': details.min_rank,
40             'max_rank': details.max_rank,
41             'viewbox': details.viewbox,
42             'viewbox2': details.viewbox_x2,
43             'near': details.near,
44             'near_radius': details.near_radius,
45             'excluded': details.excluded,
46             'countries': details.countries}
47
48
49 LIMIT_PARAM: SaBind = sa.bindparam('limit')
50 MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
51 MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
52 VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
53 VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
54 NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
55 NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
56 COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
57
58 def _within_near(t: SaFromClause) -> Callable[[], SaExpression]:
59     return lambda: t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)
60
61 def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
62     return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
63
64 def _select_placex(t: SaFromClause) -> SaSelect:
65     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
66                      t.c.class_, t.c.type,
67                      t.c.address, t.c.extratags,
68                      t.c.housenumber, t.c.postcode, t.c.country_code,
69                      t.c.importance, t.c.wikipedia,
70                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
71                      t.c.linked_place_id, t.c.admin_level,
72                      t.c.centroid,
73                      t.c.geometry.ST_Expand(0).label('bbox'))
74
75
76 def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
77     out = []
78
79     if details.geometry_simplification > 0.0:
80         col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
81
82     if details.geometry_output & GeometryFormat.GEOJSON:
83         out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
84     if details.geometry_output & GeometryFormat.TEXT:
85         out.append(sa.func.ST_AsText(col).label('geometry_text'))
86     if details.geometry_output & GeometryFormat.KML:
87         out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
88     if details.geometry_output & GeometryFormat.SVG:
89         out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
90
91     return sql.add_columns(*out)
92
93
94 def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
95                                  numerals: List[int], details: SearchDetails) -> SaScalarSelect:
96     all_ids = array_agg(table.c.place_id) # type: ignore[no-untyped-call]
97     sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
98
99     if len(numerals) == 1:
100         sql = sql.where(sa.between(numerals[0], table.c.startnumber, table.c.endnumber))\
101                  .where((numerals[0] - table.c.startnumber) % table.c.step == 0)
102     else:
103         sql = sql.where(sa.or_(
104                 *(sa.and_(sa.between(n, table.c.startnumber, table.c.endnumber),
105                           (n - table.c.startnumber) % table.c.step == 0)
106                   for n in numerals)))
107
108     if details.excluded:
109         sql = sql.where(_exclude_places(table))
110
111     return sql.scalar_subquery()
112
113
114 def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
115     orexpr: List[SaExpression] = []
116     if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
117         orexpr.append(no_index(table.c.rank_address).between(1, 30))
118     elif layers & DataLayer.ADDRESS:
119         orexpr.append(no_index(table.c.rank_address).between(1, 29))
120         orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
121                               sa.or_(table.c.housenumber != None,
122                                      table.c.address.has_key('addr:housename'))))
123     elif layers & DataLayer.POI:
124         orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
125                               table.c.class_.not_in(('place', 'building'))))
126
127     if layers & DataLayer.MANMADE:
128         exclude = []
129         if not layers & DataLayer.RAILWAY:
130             exclude.append('railway')
131         if not layers & DataLayer.NATURAL:
132             exclude.extend(('natural', 'water', 'waterway'))
133         orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
134                               no_index(table.c.rank_address) == 0))
135     else:
136         include = []
137         if layers & DataLayer.RAILWAY:
138             include.append('railway')
139         if layers & DataLayer.NATURAL:
140             include.extend(('natural', 'water', 'waterway'))
141         orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
142                               no_index(table.c.rank_address) == 0))
143
144     if len(orexpr) == 1:
145         return orexpr[0]
146
147     return sa.or_(*orexpr)
148
149
150 def _interpolated_position(table: SaFromClause, nr: SaColumn) -> SaColumn:
151     pos = sa.cast(nr - table.c.startnumber, sa.Float) / (table.c.endnumber - table.c.startnumber)
152     return sa.case(
153             (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
154             else_=table.c.linegeo.ST_LineInterpolatePoint(pos)).label('centroid')
155
156
157 async def _get_placex_housenumbers(conn: SearchConnection,
158                                    place_ids: List[int],
159                                    details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
160     t = conn.t.placex
161     sql = _select_placex(t).where(t.c.place_id.in_(place_ids))
162
163     if details.geometry_output:
164         sql = _add_geometry_columns(sql, t.c.geometry, details)
165
166     for row in await conn.execute(sql):
167         result = nres.create_from_placex_row(row, nres.SearchResult)
168         assert result
169         result.bbox = Bbox.from_wkb(row.bbox)
170         yield result
171
172
173 async def _get_osmline(conn: SearchConnection, place_ids: List[int],
174                        numerals: List[int],
175                        details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
176     t = conn.t.osmline
177     values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
178                .data([(n,) for n in numerals])
179     sql = sa.select(t.c.place_id, t.c.osm_id,
180                     t.c.parent_place_id, t.c.address,
181                     values.c.nr.label('housenumber'),
182                     _interpolated_position(t, values.c.nr),
183                     t.c.postcode, t.c.country_code)\
184             .where(t.c.place_id.in_(place_ids))\
185             .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
186
187     if details.geometry_output:
188         sub = sql.subquery()
189         sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
190
191     for row in await conn.execute(sql):
192         result = nres.create_from_osmline_row(row, nres.SearchResult)
193         assert result
194         yield result
195
196
197 async def _get_tiger(conn: SearchConnection, place_ids: List[int],
198                      numerals: List[int], osm_id: int,
199                      details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
200     t = conn.t.tiger
201     values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
202                .data([(n,) for n in numerals])
203     sql = sa.select(t.c.place_id, t.c.parent_place_id,
204                     sa.literal('W').label('osm_type'),
205                     sa.literal(osm_id).label('osm_id'),
206                     values.c.nr.label('housenumber'),
207                     _interpolated_position(t, values.c.nr),
208                     t.c.postcode)\
209             .where(t.c.place_id.in_(place_ids))\
210             .join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
211
212     if details.geometry_output:
213         sub = sql.subquery()
214         sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
215
216     for row in await conn.execute(sql):
217         result = nres.create_from_tiger_row(row, nres.SearchResult)
218         assert result
219         yield result
220
221
222 class AbstractSearch(abc.ABC):
223     """ Encapuslation of a single lookup in the database.
224     """
225
226     def __init__(self, penalty: float) -> None:
227         self.penalty = penalty
228
229     @abc.abstractmethod
230     async def lookup(self, conn: SearchConnection,
231                      details: SearchDetails) -> nres.SearchResults:
232         """ Find results for the search in the database.
233         """
234
235
236 class NearSearch(AbstractSearch):
237     """ Category search of a place type near the result of another search.
238     """
239     def __init__(self, penalty: float, categories: WeightedCategories,
240                  search: AbstractSearch) -> None:
241         super().__init__(penalty)
242         self.search = search
243         self.categories = categories
244
245
246     async def lookup(self, conn: SearchConnection,
247                      details: SearchDetails) -> nres.SearchResults:
248         """ Find results for the search in the database.
249         """
250         results = nres.SearchResults()
251         base = await self.search.lookup(conn, details)
252
253         if not base:
254             return results
255
256         base.sort(key=lambda r: (r.accuracy, r.rank_search))
257         max_accuracy = base[0].accuracy + 0.5
258         base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
259                                                      and r.accuracy <= max_accuracy
260                                                      and r.bbox and r.bbox.area < 20)
261
262         if base:
263             baseids = [b.place_id for b in base[:5] if b.place_id]
264
265             for category, penalty in self.categories:
266                 await self.lookup_category(results, conn, baseids, category, penalty, details)
267                 if len(results) >= details.max_results:
268                     break
269
270         return results
271
272
273     async def lookup_category(self, results: nres.SearchResults,
274                               conn: SearchConnection, ids: List[int],
275                               category: Tuple[str, str], penalty: float,
276                               details: SearchDetails) -> None:
277         """ Find places of the given category near the list of
278             place ids and add the results to 'results'.
279         """
280         table = await conn.get_class_table(*category)
281
282         tgeom = conn.t.placex.alias('pgeom')
283
284         if table is None:
285             # No classtype table available, do a simplified lookup in placex.
286             table = conn.t.placex.alias('inner')
287             sql = sa.select(table.c.place_id,
288                             sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
289                               .label('dist'))\
290                     .join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
291                     .where(table.c.class_ == category[0])\
292                     .where(table.c.type == category[1])
293         else:
294             # Use classtype table. We can afford to use a larger
295             # radius for the lookup.
296             sql = sa.select(table.c.place_id,
297                             sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
298                               .label('dist'))\
299                     .join(tgeom,
300                           table.c.centroid.ST_CoveredBy(
301                               sa.case((sa.and_(tgeom.c.rank_address > 9,
302                                                 tgeom.c.geometry.is_area()),
303                                        tgeom.c.geometry),
304                                       else_ = tgeom.c.centroid.ST_Expand(0.05))))
305
306         inner = sql.where(tgeom.c.place_id.in_(ids))\
307                    .group_by(table.c.place_id).subquery()
308
309         t = conn.t.placex
310         sql = _select_placex(t).join(inner, inner.c.place_id == t.c.place_id)\
311                                .order_by(inner.c.dist)
312
313         sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
314         if details.countries:
315             sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
316         if details.excluded:
317             sql = sql.where(_exclude_places(t))
318         if details.layers is not None:
319             sql = sql.where(_filter_by_layer(t, details.layers))
320
321         sql = sql.limit(LIMIT_PARAM)
322         for row in await conn.execute(sql, _details_to_bind_params(details)):
323             result = nres.create_from_placex_row(row, nres.SearchResult)
324             assert result
325             result.accuracy = self.penalty + penalty
326             result.bbox = Bbox.from_wkb(row.bbox)
327             results.append(result)
328
329
330
331 class PoiSearch(AbstractSearch):
332     """ Category search in a geographic area.
333     """
334     def __init__(self, sdata: SearchData) -> None:
335         super().__init__(sdata.penalty)
336         self.qualifiers = sdata.qualifiers
337         self.countries = sdata.countries
338
339
340     async def lookup(self, conn: SearchConnection,
341                      details: SearchDetails) -> nres.SearchResults:
342         """ Find results for the search in the database.
343         """
344         bind_params = _details_to_bind_params(details)
345         t = conn.t.placex
346
347         rows: List[SaRow] = []
348
349         if details.near and details.near_radius is not None and details.near_radius < 0.2:
350             # simply search in placex table
351             def _base_query() -> SaSelect:
352                 return _select_placex(t) \
353                            .where(t.c.linked_place_id == None) \
354                            .where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
355                            .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
356                            .limit(LIMIT_PARAM)
357
358             classtype = self.qualifiers.values
359             if len(classtype) == 1:
360                 cclass, ctype = classtype[0]
361                 sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
362                                                  .where(t.c.class_ == cclass)
363                                                  .where(t.c.type == ctype))
364             else:
365                 sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
366                                                    for cls, typ in classtype)))
367
368             if self.countries:
369                 sql = sql.where(t.c.country_code.in_(self.countries.values))
370
371             if details.viewbox is not None and details.bounded_viewbox:
372                 sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
373
374             rows.extend(await conn.execute(sql, bind_params))
375         else:
376             # use the class type tables
377             for category in self.qualifiers.values:
378                 table = await conn.get_class_table(*category)
379                 if table is not None:
380                     sql = _select_placex(t)\
381                                .join(table, t.c.place_id == table.c.place_id)\
382                                .where(t.c.class_ == category[0])\
383                                .where(t.c.type == category[1])
384
385                     if details.viewbox is not None and details.bounded_viewbox:
386                         sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM))
387
388                     if details.near and details.near_radius is not None:
389                         sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
390                                  .where(table.c.centroid.ST_DWithin(NEAR_PARAM,
391                                                                     NEAR_RADIUS_PARAM))
392
393                     if self.countries:
394                         sql = sql.where(t.c.country_code.in_(self.countries.values))
395
396                     sql = sql.limit(LIMIT_PARAM)
397                     rows.extend(await conn.execute(sql, bind_params))
398
399         results = nres.SearchResults()
400         for row in rows:
401             result = nres.create_from_placex_row(row, nres.SearchResult)
402             assert result
403             result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
404             result.bbox = Bbox.from_wkb(row.bbox)
405             results.append(result)
406
407         return results
408
409
410 class CountrySearch(AbstractSearch):
411     """ Search for a country name or country code.
412     """
413     def __init__(self, sdata: SearchData) -> None:
414         super().__init__(sdata.penalty)
415         self.countries = sdata.countries
416
417
418     async def lookup(self, conn: SearchConnection,
419                      details: SearchDetails) -> nres.SearchResults:
420         """ Find results for the search in the database.
421         """
422         t = conn.t.placex
423
424         ccodes = self.countries.values
425         sql = _select_placex(t)\
426                 .where(t.c.country_code.in_(ccodes))\
427                 .where(t.c.rank_address == 4)
428
429         if details.geometry_output:
430             sql = _add_geometry_columns(sql, t.c.geometry, details)
431
432         if details.excluded:
433             sql = sql.where(_exclude_places(t))
434
435         if details.viewbox is not None and details.bounded_viewbox:
436             sql = sql.where(lambda: t.c.geometry.intersects(VIEWBOX_PARAM))
437
438         if details.near is not None and details.near_radius is not None:
439             sql = sql.where(_within_near(t))
440
441         results = nres.SearchResults()
442         for row in await conn.execute(sql, _details_to_bind_params(details)):
443             result = nres.create_from_placex_row(row, nres.SearchResult)
444             assert result
445             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
446             result.bbox = Bbox.from_wkb(row.bbox)
447             results.append(result)
448
449         return results or await self.lookup_in_country_table(conn, details)
450
451
452     async def lookup_in_country_table(self, conn: SearchConnection,
453                                       details: SearchDetails) -> nres.SearchResults:
454         """ Look up the country in the fallback country tables.
455         """
456         # Avoid the fallback search when this is a more search. Country results
457         # usually are in the first batch of results and it is not possible
458         # to exclude these fallbacks.
459         if details.excluded:
460             return nres.SearchResults()
461
462         t = conn.t.country_name
463         tgrid = conn.t.country_grid
464
465         sql = sa.select(tgrid.c.country_code,
466                         tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
467                               .label('centroid'),
468                         tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
469                 .where(tgrid.c.country_code.in_(self.countries.values))\
470                 .group_by(tgrid.c.country_code)
471
472         if details.viewbox is not None and details.bounded_viewbox:
473             sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM))
474         if details.near is not None and details.near_radius is not None:
475             sql = sql.where(_within_near(tgrid))
476
477         sub = sql.subquery('grid')
478
479         sql = sa.select(t.c.country_code,
480                         (t.c.name
481                          + sa.func.coalesce(t.c.derived_name,
482                                             sa.cast('', type_=conn.t.types.Composite))
483                         ).label('name'),
484                         sub.c.centroid, sub.c.bbox)\
485                 .join(sub, t.c.country_code == sub.c.country_code)
486
487         if details.geometry_output:
488             sql = _add_geometry_columns(sql, sub.c.centroid, details)
489
490         results = nres.SearchResults()
491         for row in await conn.execute(sql, _details_to_bind_params(details)):
492             result = nres.create_from_country_row(row, nres.SearchResult)
493             assert result
494             result.bbox = Bbox.from_wkb(row.bbox)
495             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
496             results.append(result)
497
498         return results
499
500
501
502 class PostcodeSearch(AbstractSearch):
503     """ Search for a postcode.
504     """
505     def __init__(self, extra_penalty: float, sdata: SearchData) -> None:
506         super().__init__(sdata.penalty + extra_penalty)
507         self.countries = sdata.countries
508         self.postcodes = sdata.postcodes
509         self.lookups = sdata.lookups
510         self.rankings = sdata.rankings
511
512
513     async def lookup(self, conn: SearchConnection,
514                      details: SearchDetails) -> nres.SearchResults:
515         """ Find results for the search in the database.
516         """
517         t = conn.t.postcode
518         pcs = self.postcodes.values
519
520         sql = sa.select(t.c.place_id, t.c.parent_place_id,
521                         t.c.rank_search, t.c.rank_address,
522                         t.c.postcode, t.c.country_code,
523                         t.c.geometry.label('centroid'))\
524                 .where(t.c.postcode.in_(pcs))
525
526         if details.geometry_output:
527             sql = _add_geometry_columns(sql, t.c.geometry, details)
528
529         penalty: SaExpression = sa.literal(self.penalty)
530
531         if details.viewbox is not None:
532             if details.bounded_viewbox:
533                 sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
534             else:
535                 penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
536                                    (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
537                                    else_=1.0)
538
539         if details.near is not None:
540             if details.near_radius is not None:
541                 sql = sql.where(_within_near(t))
542             sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
543
544         if self.countries:
545             sql = sql.where(t.c.country_code.in_(self.countries.values))
546
547         if details.excluded:
548             sql = sql.where(_exclude_places(t))
549
550         if self.lookups:
551             assert len(self.lookups) == 1
552             assert self.lookups[0].lookup_type == 'restrict'
553             tsearch = conn.t.search_name
554             sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
555                      .where(sa.func.array_cat(tsearch.c.name_vector,
556                                               tsearch.c.nameaddress_vector,
557                                               type_=ARRAY(sa.Integer))
558                                     .contains(self.lookups[0].tokens))
559
560         for ranking in self.rankings:
561             penalty += ranking.sql_penalty(conn.t.search_name)
562         penalty += sa.case(*((t.c.postcode == v, p) for v, p in self.postcodes),
563                        else_=1.0)
564
565
566         sql = sql.add_columns(penalty.label('accuracy'))
567         sql = sql.order_by('accuracy').limit(LIMIT_PARAM)
568
569         results = nres.SearchResults()
570         for row in await conn.execute(sql, _details_to_bind_params(details)):
571             result = nres.create_from_postcode_row(row, nres.SearchResult)
572             assert result
573             result.accuracy = row.accuracy
574             results.append(result)
575
576         return results
577
578
579
580 class PlaceSearch(AbstractSearch):
581     """ Generic search for an address or named place.
582     """
583     def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
584         super().__init__(sdata.penalty + extra_penalty)
585         self.countries = sdata.countries
586         self.postcodes = sdata.postcodes
587         self.housenumbers = sdata.housenumbers
588         self.qualifiers = sdata.qualifiers
589         self.lookups = sdata.lookups
590         self.rankings = sdata.rankings
591         self.expected_count = expected_count
592
593
594     async def lookup(self, conn: SearchConnection,
595                      details: SearchDetails) -> nres.SearchResults:
596         """ Find results for the search in the database.
597         """
598         t = conn.t.placex
599         tsearch = conn.t.search_name
600
601         sql: SaLambdaSelect = sa.lambda_stmt(lambda:
602                   sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
603                             t.c.class_, t.c.type,
604                             t.c.address, t.c.extratags, t.c.admin_level,
605                             t.c.housenumber, t.c.postcode, t.c.country_code,
606                             t.c.wikipedia,
607                             t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
608                             t.c.centroid,
609                             t.c.geometry.ST_Expand(0).label('bbox'))
610                    .where(t.c.place_id == tsearch.c.place_id))
611
612
613         if details.geometry_output:
614             sql = _add_geometry_columns(sql, t.c.geometry, details)
615
616         penalty: SaExpression = sa.literal(self.penalty)
617         for ranking in self.rankings:
618             penalty += ranking.sql_penalty(tsearch)
619
620         for lookup in self.lookups:
621             sql = sql.where(lookup.sql_condition(tsearch))
622
623         if self.countries:
624             sql = sql.where(tsearch.c.country_code.in_(self.countries.values))
625
626         if self.postcodes:
627             # if a postcode is given, don't search for state or country level objects
628             sql = sql.where(tsearch.c.address_rank > 9)
629             tpc = conn.t.postcode
630             pcs = self.postcodes.values
631             if self.expected_count > 1000:
632                 # Many results expected. Restrict by postcode.
633                 sql = sql.where(sa.select(tpc.c.postcode)
634                                   .where(tpc.c.postcode.in_(pcs))
635                                   .where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12))
636                                   .exists())
637
638             # Less results, only have a preference for close postcodes
639             pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\
640                       .where(tpc.c.postcode.in_(pcs))\
641                       .scalar_subquery()
642             penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
643                                else_=sa.func.coalesce(pc_near, 2.0))
644
645         if details.viewbox is not None:
646             if details.bounded_viewbox:
647                 if details.viewbox.area < 0.2:
648                     sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
649                 else:
650                     sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
651             elif self.expected_count >= 10000:
652                 if details.viewbox.area < 0.5:
653                     sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX2_PARAM))
654                 else:
655                     sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX2_PARAM))
656             else:
657                 penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
658                                    (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
659                                    else_=1.0)
660
661         if details.near is not None:
662             if details.near_radius is not None:
663                 if details.near_radius < 0.1:
664                     sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
665                 else:
666                     sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
667                                                                            NEAR_RADIUS_PARAM))
668             sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
669                                       .label('importance'))
670             sql = sql.order_by(sa.desc(sa.text('importance')))
671         else:
672             if self.expected_count < 10000\
673                or (details.viewbox is not None and details.viewbox.area < 0.5):
674                 sql = sql.order_by(
675                         penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
676                                     else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
677             sql = sql.add_columns(t.c.importance)
678
679
680         sql = sql.add_columns(penalty.label('accuracy'))
681
682         if self.expected_count < 10000:
683             sql = sql.order_by(sa.text('accuracy'))
684
685         if self.housenumbers:
686             hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
687             sql = sql.where(tsearch.c.address_rank.between(16, 30))\
688                      .where(sa.or_(tsearch.c.address_rank < 30,
689                                    t.c.housenumber.op('~*')(hnr_regexp)))
690
691             # Cross check for housenumbers, need to do that on a rather large
692             # set. Worst case there are 40.000 main streets in OSM.
693             inner = sql.limit(10000).subquery()
694
695             # Housenumbers from placex
696             thnr = conn.t.placex.alias('hnr')
697             pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call]
698             place_sql = sa.select(pid_list)\
699                           .where(thnr.c.parent_place_id == inner.c.place_id)\
700                           .where(thnr.c.housenumber.op('~*')(hnr_regexp))\
701                           .where(thnr.c.linked_place_id == None)\
702                           .where(thnr.c.indexed_status == 0)
703
704             if details.excluded:
705                 place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
706             if self.qualifiers:
707                 place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
708
709             numerals = [int(n) for n in self.housenumbers.values
710                         if n.isdigit() and len(n) < 8]
711             interpol_sql: SaColumn
712             tiger_sql: SaColumn
713             if numerals and \
714                (not self.qualifiers or ('place', 'house') in self.qualifiers.values):
715                 # Housenumbers from interpolations
716                 interpol_sql = _make_interpolation_subquery(conn.t.osmline, inner,
717                                                             numerals, details)
718                 # Housenumbers from Tiger
719                 tiger_sql = sa.case((inner.c.country_code == 'us',
720                                      _make_interpolation_subquery(conn.t.tiger, inner,
721                                                                   numerals, details)
722                                     ), else_=None)
723             else:
724                 interpol_sql = sa.null()
725                 tiger_sql = sa.null()
726
727             unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
728                                interpol_sql.label('interpol_hnr'),
729                                tiger_sql.label('tiger_hnr')).subquery('unsort')
730             sql = sa.select(unsort)\
731                     .order_by(sa.case((unsort.c.placex_hnr != None, 1),
732                                       (unsort.c.interpol_hnr != None, 2),
733                                       (unsort.c.tiger_hnr != None, 3),
734                                       else_=4),
735                               unsort.c.accuracy)
736         else:
737             sql = sql.where(t.c.linked_place_id == None)\
738                      .where(t.c.indexed_status == 0)
739             if self.qualifiers:
740                 sql = sql.where(self.qualifiers.sql_restrict(t))
741             if details.excluded:
742                 sql = sql.where(_exclude_places(tsearch))
743             if details.min_rank > 0:
744                 sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM,
745                                        tsearch.c.search_rank >= MIN_RANK_PARAM))
746             if details.max_rank < 30:
747                 sql = sql.where(sa.or_(tsearch.c.address_rank <= MAX_RANK_PARAM,
748                                        tsearch.c.search_rank <= MAX_RANK_PARAM))
749             if details.layers is not None:
750                 sql = sql.where(_filter_by_layer(t, details.layers))
751
752         sql = sql.limit(LIMIT_PARAM)
753
754         results = nres.SearchResults()
755         for row in await conn.execute(sql, _details_to_bind_params(details)):
756             result = nres.create_from_placex_row(row, nres.SearchResult)
757             assert result
758             result.bbox = Bbox.from_wkb(row.bbox)
759             result.accuracy = row.accuracy
760             if not details.excluded or not result.place_id in details.excluded:
761                 results.append(result)
762
763             if self.housenumbers and row.rank_address < 30:
764                 if row.placex_hnr:
765                     subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
766                 elif row.interpol_hnr:
767                     subs = _get_osmline(conn, row.interpol_hnr, numerals, details)
768                 elif row.tiger_hnr:
769                     subs = _get_tiger(conn, row.tiger_hnr, numerals, row.osm_id, details)
770                 else:
771                     subs = None
772
773                 if subs is not None:
774                     async for sub in subs:
775                         assert sub.housenumber
776                         sub.accuracy = result.accuracy
777                         if not any(nr in self.housenumbers.values
778                                    for nr in sub.housenumber.split(';')):
779                             sub.accuracy += 0.6
780                         results.append(sub)
781
782                 result.accuracy += 1.0 # penalty for missing housenumber
783
784         return results