]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
implement search builder
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
26 from nominatim.api.search.query_analyzer_factory import make_query_analyzer
27
28 # This file defines complex result data classes.
29 # pylint: disable=too-many-instance-attributes
30
31 class SourceTable(enum.Enum):
32     """ Enumeration of kinds of results.
33     """
34     PLACEX = 1
35     OSMLINE = 2
36     TIGER = 3
37     POSTCODE = 4
38     COUNTRY = 5
39
40
41 @dataclasses.dataclass
42 class AddressLine:
43     """ Detailed information about a related place.
44     """
45     place_id: Optional[int]
46     osm_object: Optional[Tuple[str, int]]
47     category: Tuple[str, str]
48     names: Dict[str, str]
49     extratags: Optional[Dict[str, str]]
50
51     admin_level: Optional[int]
52     fromarea: bool
53     isaddress: bool
54     rank_address: int
55     distance: float
56
57     local_name: Optional[str] = None
58
59
60 class AddressLines(List[AddressLine]):
61     """ Sequence of address lines order in descending order by their rank.
62     """
63
64     def localize(self, locales: Locales) -> List[str]:
65         """ Set the local name of address parts according to the chosen
66             locale. Return the list of local names without duplications.
67
68             Only address parts that are marked as isaddress are localized
69             and returned.
70         """
71         label_parts: List[str] = []
72
73         for line in self:
74             if line.isaddress and line.names:
75                 line.local_name = locales.display_name(line.names)
76                 if not label_parts or label_parts[-1] != line.local_name:
77                     label_parts.append(line.local_name)
78
79         return label_parts
80
81
82
83 @dataclasses.dataclass
84 class WordInfo:
85     """ Detailed information about a search term.
86     """
87     word_id: int
88     word_token: str
89     word: Optional[str] = None
90
91
92 WordInfos = Sequence[WordInfo]
93
94
95 @dataclasses.dataclass
96 class BaseResult:
97     """ Data class collecting information common to all
98         types of search results.
99     """
100     source_table: SourceTable
101     category: Tuple[str, str]
102     centroid: Point
103
104     place_id : Optional[int] = None
105     osm_object: Optional[Tuple[str, int]] = None
106
107     names: Optional[Dict[str, str]] = None
108     address: Optional[Dict[str, str]] = None
109     extratags: Optional[Dict[str, str]] = None
110
111     housenumber: Optional[str] = None
112     postcode: Optional[str] = None
113     wikipedia: Optional[str] = None
114
115     rank_address: int = 30
116     rank_search: int = 30
117     importance: Optional[float] = None
118
119     country_code: Optional[str] = None
120
121     address_rows: Optional[AddressLines] = None
122     linked_rows: Optional[AddressLines] = None
123     parented_rows: Optional[AddressLines] = None
124     name_keywords: Optional[WordInfos] = None
125     address_keywords: Optional[WordInfos] = None
126
127     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
128
129     @property
130     def lat(self) -> float:
131         """ Get the latitude (or y) of the center point of the place.
132         """
133         return self.centroid[1]
134
135
136     @property
137     def lon(self) -> float:
138         """ Get the longitude (or x) of the center point of the place.
139         """
140         return self.centroid[0]
141
142
143     def calculated_importance(self) -> float:
144         """ Get a valid importance value. This is either the stored importance
145             of the value or an artificial value computed from the place's
146             search rank.
147         """
148         return self.importance or (0.7500001 - (self.rank_search/40.0))
149
150 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
151
152 @dataclasses.dataclass
153 class DetailedResult(BaseResult):
154     """ A search result with more internal information from the database
155         added.
156     """
157     parent_place_id: Optional[int] = None
158     linked_place_id: Optional[int] = None
159     admin_level: int = 15
160     indexed_date: Optional[dt.datetime] = None
161
162
163 @dataclasses.dataclass
164 class ReverseResult(BaseResult):
165     """ A search result for reverse geocoding.
166     """
167     distance: Optional[float] = None
168     bbox: Optional[Bbox] = None
169
170
171 class ReverseResults(List[ReverseResult]):
172     """ Sequence of reverse lookup results ordered by distance.
173         May be empty when no result was found.
174     """
175
176
177 @dataclasses.dataclass
178 class SearchResult(BaseResult):
179     """ A search result for forward geocoding.
180     """
181     bbox: Optional[Bbox] = None
182
183
184 class SearchResults(List[SearchResult]):
185     """ Sequence of forward lookup results ordered by relevance.
186         May be empty when no result was found.
187     """
188
189
190 def _filter_geometries(row: SaRow) -> Dict[str, str]:
191     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
192             if k.startswith('geometry_')}
193
194
195 def create_from_placex_row(row: Optional[SaRow],
196                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
197     """ Construct a new result and add the data from the result row
198         from the placex table. 'class_type' defines the type of result
199         to return. Returns None if the row is None.
200     """
201     if row is None:
202         return None
203
204     return class_type(source_table=SourceTable.PLACEX,
205                       place_id=row.place_id,
206                       osm_object=(row.osm_type, row.osm_id),
207                       category=(row.class_, row.type),
208                       names=row.name,
209                       address=row.address,
210                       extratags=row.extratags,
211                       housenumber=row.housenumber,
212                       postcode=row.postcode,
213                       wikipedia=row.wikipedia,
214                       rank_address=row.rank_address,
215                       rank_search=row.rank_search,
216                       importance=row.importance,
217                       country_code=row.country_code,
218                       centroid=Point.from_wkb(row.centroid.data),
219                       geometry=_filter_geometries(row))
220
221
222 def create_from_osmline_row(row: Optional[SaRow],
223                             class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
224     """ Construct a new result and add the data from the result row
225         from the address interpolation table osmline. 'class_type' defines
226         the type of result to return. Returns None if the row is None.
227
228         If the row contains a housenumber, then the housenumber is filled out.
229         Otherwise the result contains the interpolation information in extratags.
230     """
231     if row is None:
232         return None
233
234     hnr = getattr(row, 'housenumber', None)
235
236     res = class_type(source_table=SourceTable.OSMLINE,
237                      place_id=row.place_id,
238                      osm_object=('W', row.osm_id),
239                      category=('place', 'houses' if hnr is None else 'house'),
240                      address=row.address,
241                      postcode=row.postcode,
242                      country_code=row.country_code,
243                      centroid=Point.from_wkb(row.centroid.data),
244                      geometry=_filter_geometries(row))
245
246     if hnr is None:
247         res.extratags = {'startnumber': str(row.startnumber),
248                          'endnumber': str(row.endnumber),
249                          'step': str(row.step)}
250     else:
251         res.housenumber = str(hnr)
252
253     return res
254
255
256 def create_from_tiger_row(row: Optional[SaRow],
257                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
258     """ Construct a new result and add the data from the result row
259         from the Tiger data interpolation table. 'class_type' defines
260         the type of result to return. Returns None if the row is None.
261
262         If the row contains a housenumber, then the housenumber is filled out.
263         Otherwise the result contains the interpolation information in extratags.
264     """
265     if row is None:
266         return None
267
268     hnr = getattr(row, 'housenumber', None)
269
270     res = class_type(source_table=SourceTable.TIGER,
271                      place_id=row.place_id,
272                      osm_object=(row.osm_type, row.osm_id),
273                      category=('place', 'houses' if hnr is None else 'house'),
274                      postcode=row.postcode,
275                      country_code='us',
276                      centroid=Point.from_wkb(row.centroid.data),
277                      geometry=_filter_geometries(row))
278
279     if hnr is None:
280         res.extratags = {'startnumber': str(row.startnumber),
281                          'endnumber': str(row.endnumber),
282                          'step': str(row.step)}
283     else:
284         res.housenumber = str(hnr)
285
286     return res
287
288
289 def create_from_postcode_row(row: Optional[SaRow],
290                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
291     """ Construct a new result and add the data from the result row
292         from the postcode table. 'class_type' defines
293         the type of result to return. Returns None if the row is None.
294     """
295     if row is None:
296         return None
297
298     return class_type(source_table=SourceTable.POSTCODE,
299                       place_id=row.place_id,
300                       category=('place', 'postcode'),
301                       names={'ref': row.postcode},
302                       rank_search=row.rank_search,
303                       rank_address=row.rank_address,
304                       country_code=row.country_code,
305                       centroid=Point.from_wkb(row.centroid.data),
306                       geometry=_filter_geometries(row))
307
308
309 async def add_result_details(conn: SearchConnection, result: BaseResult,
310                              details: LookupDetails) -> None:
311     """ Retrieve more details from the database according to the
312         parameters specified in 'details'.
313     """
314     log().section('Query details for result')
315     if details.address_details:
316         log().comment('Query address details')
317         await complete_address_details(conn, result)
318     if details.linked_places:
319         log().comment('Query linked places')
320         await complete_linked_places(conn, result)
321     if details.parented_places:
322         log().comment('Query parent places')
323         await complete_parented_places(conn, result)
324     if details.keywords:
325         log().comment('Query keywords')
326         await complete_keywords(conn, result)
327
328
329 def _result_row_to_address_row(row: SaRow) -> AddressLine:
330     """ Create a new AddressLine from the results of a datbase query.
331     """
332     extratags: Dict[str, str] = getattr(row, 'extratags', {})
333     if hasattr(row, 'place_type') and row.place_type:
334         extratags['place'] = row.place_type
335
336     names = row.name
337     if getattr(row, 'housenumber', None) is not None:
338         if names is None:
339             names = {}
340         names['housenumber'] = row.housenumber
341
342     return AddressLine(place_id=row.place_id,
343                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
344                        category=(getattr(row, 'class'), row.type),
345                        names=names,
346                        extratags=extratags,
347                        admin_level=row.admin_level,
348                        fromarea=row.fromarea,
349                        isaddress=getattr(row, 'isaddress', True),
350                        rank_address=row.rank_address,
351                        distance=row.distance)
352
353
354 async def complete_address_details(conn: SearchConnection, result: BaseResult) -> None:
355     """ Retrieve information about places that make up the address of the result.
356     """
357     housenumber = -1
358     if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
359         if result.housenumber is not None:
360             housenumber = int(result.housenumber)
361         elif result.extratags is not None and 'startnumber' in result.extratags:
362             # details requests do not come with a specific house number
363             housenumber = int(result.extratags['startnumber'])
364
365     sfn = sa.func.get_addressdata(result.place_id, housenumber)\
366             .table_valued( # type: ignore[no-untyped-call]
367                 sa.column('place_id', type_=sa.Integer),
368                 'osm_type',
369                 sa.column('osm_id', type_=sa.BigInteger),
370                 sa.column('name', type_=conn.t.types.Composite),
371                 'class', 'type', 'place_type',
372                 sa.column('admin_level', type_=sa.Integer),
373                 sa.column('fromarea', type_=sa.Boolean),
374                 sa.column('isaddress', type_=sa.Boolean),
375                 sa.column('rank_address', type_=sa.SmallInteger),
376                 sa.column('distance', type_=sa.Float))
377     sql = sa.select(sfn).order_by(sa.column('rank_address').desc(),
378                                   sa.column('isaddress').desc())
379
380     result.address_rows = AddressLines()
381     for row in await conn.execute(sql):
382         result.address_rows.append(_result_row_to_address_row(row))
383
384
385 # pylint: disable=consider-using-f-string
386 def _placex_select_address_row(conn: SearchConnection,
387                                centroid: Point) -> SaSelect:
388     t = conn.t.placex
389     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
390                      t.c.class_.label('class'), t.c.type,
391                      t.c.admin_level, t.c.housenumber,
392                      sa.literal_column("""ST_GeometryType(geometry) in
393                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
394                      t.c.rank_address,
395                      sa.literal_column(
396                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
397                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
398                          """ % centroid).label('distance'))
399
400
401 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
402     """ Retrieve information about places that link to the result.
403     """
404     result.linked_rows = AddressLines()
405     if result.source_table != SourceTable.PLACEX:
406         return
407
408     sql = _placex_select_address_row(conn, result.centroid)\
409             .where(conn.t.placex.c.linked_place_id == result.place_id)
410
411     for row in await conn.execute(sql):
412         result.linked_rows.append(_result_row_to_address_row(row))
413
414
415 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
416     """ Retrieve information about the search terms used for this place.
417     """
418     t = conn.t.search_name
419     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
420             .where(t.c.place_id == result.place_id)
421
422     result.name_keywords = []
423     result.address_keywords = []
424
425     await make_query_analyzer(conn)
426     t = conn.t.meta.tables['word']
427     sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
428
429     for name_tokens, address_tokens in await conn.execute(sql):
430         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
431             result.name_keywords.append(WordInfo(*row))
432
433         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
434             result.address_keywords.append(WordInfo(*row))
435
436
437 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
438     """ Retrieve information about places that the result provides the
439         address for.
440     """
441     result.parented_rows = AddressLines()
442     if result.source_table != SourceTable.PLACEX:
443         return
444
445     sql = _placex_select_address_row(conn, result.centroid)\
446             .where(conn.t.placex.c.parent_place_id == result.place_id)\
447             .where(conn.t.placex.c.rank_search == 30)
448
449     for row in await conn.execute(sql):
450         result.parented_rows.append(_result_row_to_address_row(row))