]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
1e77d0be5aba5b6b6c9ba7118380956adba90d7d
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
26
27 # This file defines complex result data classes.
28 # pylint: disable=too-many-instance-attributes
29
30 class SourceTable(enum.Enum):
31     """ Enumeration of kinds of results.
32     """
33     PLACEX = 1
34     OSMLINE = 2
35     TIGER = 3
36     POSTCODE = 4
37     COUNTRY = 5
38
39
40 @dataclasses.dataclass
41 class AddressLine:
42     """ Detailed information about a related place.
43     """
44     place_id: Optional[int]
45     osm_object: Optional[Tuple[str, int]]
46     category: Tuple[str, str]
47     names: Dict[str, str]
48     extratags: Optional[Dict[str, str]]
49
50     admin_level: Optional[int]
51     fromarea: bool
52     isaddress: bool
53     rank_address: int
54     distance: float
55
56     local_name: Optional[str] = None
57
58
59 class AddressLines(List[AddressLine]):
60     """ Sequence of address lines order in descending order by their rank.
61     """
62
63     def localize(self, locales: Locales) -> List[str]:
64         """ Set the local name of address parts according to the chosen
65             locale. Return the list of local names without duplications.
66
67             Only address parts that are marked as isaddress are localized
68             and returned.
69         """
70         label_parts: List[str] = []
71
72         for line in self:
73             if line.isaddress and line.names:
74                 line.local_name = locales.display_name(line.names)
75                 if not label_parts or label_parts[-1] != line.local_name:
76                     label_parts.append(line.local_name)
77
78         return label_parts
79
80
81
82 @dataclasses.dataclass
83 class WordInfo:
84     """ Detailed information about a search term.
85     """
86     word_id: int
87     word_token: str
88     word: Optional[str] = None
89
90
91 WordInfos = Sequence[WordInfo]
92
93
94 @dataclasses.dataclass
95 class BaseResult:
96     """ Data class collecting information common to all
97         types of search results.
98     """
99     source_table: SourceTable
100     category: Tuple[str, str]
101     centroid: Point
102
103     place_id : Optional[int] = None
104     osm_object: Optional[Tuple[str, int]] = None
105
106     locale_name: Optional[str] = None
107     display_name: Optional[str] = None
108
109     names: Optional[Dict[str, str]] = None
110     address: Optional[Dict[str, str]] = None
111     extratags: Optional[Dict[str, str]] = None
112
113     housenumber: Optional[str] = None
114     postcode: Optional[str] = None
115     wikipedia: Optional[str] = None
116
117     rank_address: int = 30
118     rank_search: int = 30
119     importance: Optional[float] = None
120
121     country_code: Optional[str] = None
122
123     address_rows: Optional[AddressLines] = None
124     linked_rows: Optional[AddressLines] = None
125     parented_rows: Optional[AddressLines] = None
126     name_keywords: Optional[WordInfos] = None
127     address_keywords: Optional[WordInfos] = None
128
129     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
130
131     @property
132     def lat(self) -> float:
133         """ Get the latitude (or y) of the center point of the place.
134         """
135         return self.centroid[1]
136
137
138     @property
139     def lon(self) -> float:
140         """ Get the longitude (or x) of the center point of the place.
141         """
142         return self.centroid[0]
143
144
145     def calculated_importance(self) -> float:
146         """ Get a valid importance value. This is either the stored importance
147             of the value or an artificial value computed from the place's
148             search rank.
149         """
150         return self.importance or (0.7500001 - (self.rank_search/40.0))
151
152
153     def localize(self, locales: Locales) -> None:
154         """ Fill the locale_name and the display_name field for the
155             place and, if available, its address information.
156         """
157         self.locale_name = locales.display_name(self.names)
158         if self.address_rows:
159             self.display_name = ', '.join(self.address_rows.localize(locales))
160         else:
161             self.display_name = self.locale_name
162
163
164
165 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
166
167 @dataclasses.dataclass
168 class DetailedResult(BaseResult):
169     """ A search result with more internal information from the database
170         added.
171     """
172     parent_place_id: Optional[int] = None
173     linked_place_id: Optional[int] = None
174     admin_level: int = 15
175     indexed_date: Optional[dt.datetime] = None
176
177
178 @dataclasses.dataclass
179 class ReverseResult(BaseResult):
180     """ A search result for reverse geocoding.
181     """
182     distance: Optional[float] = None
183     bbox: Optional[Bbox] = None
184
185
186 class ReverseResults(List[ReverseResult]):
187     """ Sequence of reverse lookup results ordered by distance.
188         May be empty when no result was found.
189     """
190
191
192 @dataclasses.dataclass
193 class SearchResult(BaseResult):
194     """ A search result for forward geocoding.
195     """
196     bbox: Optional[Bbox] = None
197     accuracy: float = 0.0
198
199
200     @property
201     def ranking(self) -> float:
202         """ Return the ranking, a combined measure of accuracy and importance.
203         """
204         return (self.accuracy if self.accuracy is not None else 1) \
205                - self.calculated_importance()
206
207
208 class SearchResults(List[SearchResult]):
209     """ Sequence of forward lookup results ordered by relevance.
210         May be empty when no result was found.
211     """
212
213
214 def _filter_geometries(row: SaRow) -> Dict[str, str]:
215     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
216             if k.startswith('geometry_')}
217
218
219 def create_from_placex_row(row: Optional[SaRow],
220                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
221     """ Construct a new result and add the data from the result row
222         from the placex table. 'class_type' defines the type of result
223         to return. Returns None if the row is None.
224     """
225     if row is None:
226         return None
227
228     return class_type(source_table=SourceTable.PLACEX,
229                       place_id=row.place_id,
230                       osm_object=(row.osm_type, row.osm_id),
231                       category=(row.class_, row.type),
232                       names=row.name,
233                       address=row.address,
234                       extratags=row.extratags,
235                       housenumber=row.housenumber,
236                       postcode=row.postcode,
237                       wikipedia=row.wikipedia,
238                       rank_address=row.rank_address,
239                       rank_search=row.rank_search,
240                       importance=row.importance,
241                       country_code=row.country_code,
242                       centroid=Point.from_wkb(row.centroid.data),
243                       geometry=_filter_geometries(row))
244
245
246 def create_from_osmline_row(row: Optional[SaRow],
247                             class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
248     """ Construct a new result and add the data from the result row
249         from the address interpolation table osmline. 'class_type' defines
250         the type of result to return. Returns None if the row is None.
251
252         If the row contains a housenumber, then the housenumber is filled out.
253         Otherwise the result contains the interpolation information in extratags.
254     """
255     if row is None:
256         return None
257
258     hnr = getattr(row, 'housenumber', None)
259
260     res = class_type(source_table=SourceTable.OSMLINE,
261                      place_id=row.place_id,
262                      osm_object=('W', row.osm_id),
263                      category=('place', 'houses' if hnr is None else 'house'),
264                      address=row.address,
265                      postcode=row.postcode,
266                      country_code=row.country_code,
267                      centroid=Point.from_wkb(row.centroid.data),
268                      geometry=_filter_geometries(row))
269
270     if hnr is None:
271         res.extratags = {'startnumber': str(row.startnumber),
272                          'endnumber': str(row.endnumber),
273                          'step': str(row.step)}
274     else:
275         res.housenumber = str(hnr)
276
277     return res
278
279
280 def create_from_tiger_row(row: Optional[SaRow],
281                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
282     """ Construct a new result and add the data from the result row
283         from the Tiger data interpolation table. 'class_type' defines
284         the type of result to return. Returns None if the row is None.
285
286         If the row contains a housenumber, then the housenumber is filled out.
287         Otherwise the result contains the interpolation information in extratags.
288     """
289     if row is None:
290         return None
291
292     hnr = getattr(row, 'housenumber', None)
293
294     res = class_type(source_table=SourceTable.TIGER,
295                      place_id=row.place_id,
296                      osm_object=(row.osm_type, row.osm_id),
297                      category=('place', 'houses' if hnr is None else 'house'),
298                      postcode=row.postcode,
299                      country_code='us',
300                      centroid=Point.from_wkb(row.centroid.data),
301                      geometry=_filter_geometries(row))
302
303     if hnr is None:
304         res.extratags = {'startnumber': str(row.startnumber),
305                          'endnumber': str(row.endnumber),
306                          'step': str(row.step)}
307     else:
308         res.housenumber = str(hnr)
309
310     return res
311
312
313 def create_from_postcode_row(row: Optional[SaRow],
314                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
315     """ Construct a new result and add the data from the result row
316         from the postcode table. 'class_type' defines
317         the type of result to return. Returns None if the row is None.
318     """
319     if row is None:
320         return None
321
322     return class_type(source_table=SourceTable.POSTCODE,
323                       place_id=row.place_id,
324                       category=('place', 'postcode'),
325                       names={'ref': row.postcode},
326                       rank_search=row.rank_search,
327                       rank_address=row.rank_address,
328                       country_code=row.country_code,
329                       centroid=Point.from_wkb(row.centroid.data),
330                       geometry=_filter_geometries(row))
331
332
333 def create_from_country_row(row: Optional[SaRow],
334                         class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
335     """ Construct a new result and add the data from the result row
336         from the fallback country tables. 'class_type' defines
337         the type of result to return. Returns None if the row is None.
338     """
339     if row is None:
340         return None
341
342     return class_type(source_table=SourceTable.COUNTRY,
343                       category=('place', 'country'),
344                       centroid=Point.from_wkb(row.centroid.data),
345                       names=row.name,
346                       rank_address=4, rank_search=4,
347                       country_code=row.country_code)
348
349
350 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
351                              details: LookupDetails) -> None:
352     """ Retrieve more details from the database according to the
353         parameters specified in 'details'.
354     """
355     if results:
356         log().section('Query details for result')
357         if details.address_details:
358             log().comment('Query address details')
359             await complete_address_details(conn, results)
360         if details.linked_places:
361             log().comment('Query linked places')
362             for result in results:
363                 await complete_linked_places(conn, result)
364         if details.parented_places:
365             log().comment('Query parent places')
366             for result in results:
367                 await complete_parented_places(conn, result)
368         if details.keywords:
369             log().comment('Query keywords')
370             for result in results:
371                 await complete_keywords(conn, result)
372
373
374 def _result_row_to_address_row(row: SaRow) -> AddressLine:
375     """ Create a new AddressLine from the results of a datbase query.
376     """
377     extratags: Dict[str, str] = getattr(row, 'extratags', {})
378     if hasattr(row, 'place_type') and row.place_type:
379         extratags['place'] = row.place_type
380
381     names = row.name
382     if getattr(row, 'housenumber', None) is not None:
383         if names is None:
384             names = {}
385         names['housenumber'] = row.housenumber
386
387     return AddressLine(place_id=row.place_id,
388                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
389                        category=(getattr(row, 'class'), row.type),
390                        names=names,
391                        extratags=extratags,
392                        admin_level=row.admin_level,
393                        fromarea=row.fromarea,
394                        isaddress=getattr(row, 'isaddress', True),
395                        rank_address=row.rank_address,
396                        distance=row.distance)
397
398
399 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
400     """ Retrieve information about places that make up the address of the result.
401     """
402     def get_hnr(result: BaseResult) -> Tuple[int, int]:
403         housenumber = -1
404         if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
405             if result.housenumber is not None:
406                 housenumber = int(result.housenumber)
407             elif result.extratags is not None and 'startnumber' in result.extratags:
408                 # details requests do not come with a specific house number
409                 housenumber = int(result.extratags['startnumber'])
410         assert result.place_id
411         return result.place_id, housenumber
412
413     data: List[Tuple[Any, ...]] = [get_hnr(r) for r in results if r.place_id]
414
415     if not data:
416         return
417
418     values = sa.values(sa.column('place_id', type_=sa.Integer),
419                        sa.column('housenumber', type_=sa.Integer),
420                        name='places',
421                        literal_binds=True).data(data)
422
423     sfn = sa.func.get_addressdata(values.c.place_id, values.c.housenumber)\
424                 .table_valued( # type: ignore[no-untyped-call]
425                     sa.column('place_id', type_=sa.Integer),
426                     'osm_type',
427                     sa.column('osm_id', type_=sa.BigInteger),
428                     sa.column('name', type_=conn.t.types.Composite),
429                     'class', 'type', 'place_type',
430                     sa.column('admin_level', type_=sa.Integer),
431                     sa.column('fromarea', type_=sa.Boolean),
432                     sa.column('isaddress', type_=sa.Boolean),
433                     sa.column('rank_address', type_=sa.SmallInteger),
434                     sa.column('distance', type_=sa.Float),
435                     joins_implicitly=True)
436
437     sql = sa.select(values.c.place_id.label('result_place_id'), sfn)\
438             .order_by(values.c.place_id,
439                       sa.column('rank_address').desc(),
440                       sa.column('isaddress').desc())
441
442     current_result = None
443     for row in await conn.execute(sql):
444         if current_result is None or row.result_place_id != current_result.place_id:
445             for result in results:
446                 if result.place_id == row.result_place_id:
447                     current_result = result
448                     break
449             else:
450                 assert False
451             current_result.address_rows = AddressLines()
452         current_result.address_rows.append(_result_row_to_address_row(row))
453
454
455 # pylint: disable=consider-using-f-string
456 def _placex_select_address_row(conn: SearchConnection,
457                                centroid: Point) -> SaSelect:
458     t = conn.t.placex
459     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
460                      t.c.class_.label('class'), t.c.type,
461                      t.c.admin_level, t.c.housenumber,
462                      sa.literal_column("""ST_GeometryType(geometry) in
463                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
464                      t.c.rank_address,
465                      sa.literal_column(
466                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
467                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
468                          """ % centroid).label('distance'))
469
470
471 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
472     """ Retrieve information about places that link to the result.
473     """
474     result.linked_rows = AddressLines()
475     if result.source_table != SourceTable.PLACEX:
476         return
477
478     sql = _placex_select_address_row(conn, result.centroid)\
479             .where(conn.t.placex.c.linked_place_id == result.place_id)
480
481     for row in await conn.execute(sql):
482         result.linked_rows.append(_result_row_to_address_row(row))
483
484
485 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
486     """ Retrieve information about the search terms used for this place.
487
488         Requires that the query analyzer was initialised to get access to
489         the word table.
490     """
491     t = conn.t.search_name
492     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
493             .where(t.c.place_id == result.place_id)
494
495     result.name_keywords = []
496     result.address_keywords = []
497
498     t = conn.t.meta.tables['word']
499     sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
500
501     for name_tokens, address_tokens in await conn.execute(sql):
502         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
503             result.name_keywords.append(WordInfo(*row))
504
505         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
506             result.address_keywords.append(WordInfo(*row))
507
508
509 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
510     """ Retrieve information about places that the result provides the
511         address for.
512     """
513     result.parented_rows = AddressLines()
514     if result.source_table != SourceTable.PLACEX:
515         return
516
517     sql = _placex_select_address_row(conn, result.centroid)\
518             .where(conn.t.placex.c.parent_place_id == result.place_id)\
519             .where(conn.t.placex.c.rank_search == 30)
520
521     for row in await conn.execute(sql):
522         result.parented_rows.append(_result_row_to_address_row(row))