]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
mingle names from linked places into results
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
26
27 # This file defines complex result data classes.
28 # pylint: disable=too-many-instance-attributes
29
30 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
31     """ Mix-in names from linked places, so that they show up
32         as standard names where necessary.
33     """
34     if not names:
35         return None
36
37     out = {}
38     for k, v in names.items():
39         if k.startswith('_place_'):
40             outkey = k[7:]
41             out[k if outkey in names else outkey] = v
42         else:
43             out[k] = v
44
45     return out
46
47
48 class SourceTable(enum.Enum):
49     """ Enumeration of kinds of results.
50     """
51     PLACEX = 1
52     OSMLINE = 2
53     TIGER = 3
54     POSTCODE = 4
55     COUNTRY = 5
56
57
58 @dataclasses.dataclass
59 class AddressLine:
60     """ Detailed information about a related place.
61     """
62     place_id: Optional[int]
63     osm_object: Optional[Tuple[str, int]]
64     category: Tuple[str, str]
65     names: Dict[str, str]
66     extratags: Optional[Dict[str, str]]
67
68     admin_level: Optional[int]
69     fromarea: bool
70     isaddress: bool
71     rank_address: int
72     distance: float
73
74     local_name: Optional[str] = None
75
76
77 class AddressLines(List[AddressLine]):
78     """ Sequence of address lines order in descending order by their rank.
79     """
80
81     def localize(self, locales: Locales) -> List[str]:
82         """ Set the local name of address parts according to the chosen
83             locale. Return the list of local names without duplications.
84
85             Only address parts that are marked as isaddress are localized
86             and returned.
87         """
88         label_parts: List[str] = []
89
90         for line in self:
91             if line.isaddress and line.names:
92                 line.local_name = locales.display_name(line.names)
93                 if not label_parts or label_parts[-1] != line.local_name:
94                     label_parts.append(line.local_name)
95
96         return label_parts
97
98
99
100 @dataclasses.dataclass
101 class WordInfo:
102     """ Detailed information about a search term.
103     """
104     word_id: int
105     word_token: str
106     word: Optional[str] = None
107
108
109 WordInfos = Sequence[WordInfo]
110
111
112 @dataclasses.dataclass
113 class BaseResult:
114     """ Data class collecting information common to all
115         types of search results.
116     """
117     source_table: SourceTable
118     category: Tuple[str, str]
119     centroid: Point
120
121     place_id : Optional[int] = None
122     osm_object: Optional[Tuple[str, int]] = None
123
124     locale_name: Optional[str] = None
125     display_name: Optional[str] = None
126
127     names: Optional[Dict[str, str]] = None
128     address: Optional[Dict[str, str]] = None
129     extratags: Optional[Dict[str, str]] = None
130
131     housenumber: Optional[str] = None
132     postcode: Optional[str] = None
133     wikipedia: Optional[str] = None
134
135     rank_address: int = 30
136     rank_search: int = 30
137     importance: Optional[float] = None
138
139     country_code: Optional[str] = None
140
141     address_rows: Optional[AddressLines] = None
142     linked_rows: Optional[AddressLines] = None
143     parented_rows: Optional[AddressLines] = None
144     name_keywords: Optional[WordInfos] = None
145     address_keywords: Optional[WordInfos] = None
146
147     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
148
149     @property
150     def lat(self) -> float:
151         """ Get the latitude (or y) of the center point of the place.
152         """
153         return self.centroid[1]
154
155
156     @property
157     def lon(self) -> float:
158         """ Get the longitude (or x) of the center point of the place.
159         """
160         return self.centroid[0]
161
162
163     def calculated_importance(self) -> float:
164         """ Get a valid importance value. This is either the stored importance
165             of the value or an artificial value computed from the place's
166             search rank.
167         """
168         return self.importance or (0.7500001 - (self.rank_search/40.0))
169
170
171     def localize(self, locales: Locales) -> None:
172         """ Fill the locale_name and the display_name field for the
173             place and, if available, its address information.
174         """
175         self.locale_name = locales.display_name(self.names)
176         if self.address_rows:
177             self.display_name = ', '.join(self.address_rows.localize(locales))
178         else:
179             self.display_name = self.locale_name
180
181
182
183 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
184
185 @dataclasses.dataclass
186 class DetailedResult(BaseResult):
187     """ A search result with more internal information from the database
188         added.
189     """
190     parent_place_id: Optional[int] = None
191     linked_place_id: Optional[int] = None
192     admin_level: int = 15
193     indexed_date: Optional[dt.datetime] = None
194
195
196 @dataclasses.dataclass
197 class ReverseResult(BaseResult):
198     """ A search result for reverse geocoding.
199     """
200     distance: Optional[float] = None
201     bbox: Optional[Bbox] = None
202
203
204 class ReverseResults(List[ReverseResult]):
205     """ Sequence of reverse lookup results ordered by distance.
206         May be empty when no result was found.
207     """
208
209
210 @dataclasses.dataclass
211 class SearchResult(BaseResult):
212     """ A search result for forward geocoding.
213     """
214     bbox: Optional[Bbox] = None
215     accuracy: float = 0.0
216
217
218     @property
219     def ranking(self) -> float:
220         """ Return the ranking, a combined measure of accuracy and importance.
221         """
222         return (self.accuracy if self.accuracy is not None else 1) \
223                - self.calculated_importance()
224
225
226 class SearchResults(List[SearchResult]):
227     """ Sequence of forward lookup results ordered by relevance.
228         May be empty when no result was found.
229     """
230
231
232 def _filter_geometries(row: SaRow) -> Dict[str, str]:
233     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
234             if k.startswith('geometry_')}
235
236
237 def create_from_placex_row(row: Optional[SaRow],
238                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
239     """ Construct a new result and add the data from the result row
240         from the placex table. 'class_type' defines the type of result
241         to return. Returns None if the row is None.
242     """
243     if row is None:
244         return None
245
246     return class_type(source_table=SourceTable.PLACEX,
247                       place_id=row.place_id,
248                       osm_object=(row.osm_type, row.osm_id),
249                       category=(row.class_, row.type),
250                       names=_mingle_name_tags(row.name),
251                       address=row.address,
252                       extratags=row.extratags,
253                       housenumber=row.housenumber,
254                       postcode=row.postcode,
255                       wikipedia=row.wikipedia,
256                       rank_address=row.rank_address,
257                       rank_search=row.rank_search,
258                       importance=row.importance,
259                       country_code=row.country_code,
260                       centroid=Point.from_wkb(row.centroid.data),
261                       geometry=_filter_geometries(row))
262
263
264 def create_from_osmline_row(row: Optional[SaRow],
265                             class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
266     """ Construct a new result and add the data from the result row
267         from the address interpolation table osmline. 'class_type' defines
268         the type of result to return. Returns None if the row is None.
269
270         If the row contains a housenumber, then the housenumber is filled out.
271         Otherwise the result contains the interpolation information in extratags.
272     """
273     if row is None:
274         return None
275
276     hnr = getattr(row, 'housenumber', None)
277
278     res = class_type(source_table=SourceTable.OSMLINE,
279                      place_id=row.place_id,
280                      osm_object=('W', row.osm_id),
281                      category=('place', 'houses' if hnr is None else 'house'),
282                      address=row.address,
283                      postcode=row.postcode,
284                      country_code=row.country_code,
285                      centroid=Point.from_wkb(row.centroid.data),
286                      geometry=_filter_geometries(row))
287
288     if hnr is None:
289         res.extratags = {'startnumber': str(row.startnumber),
290                          'endnumber': str(row.endnumber),
291                          'step': str(row.step)}
292     else:
293         res.housenumber = str(hnr)
294
295     return res
296
297
298 def create_from_tiger_row(row: Optional[SaRow],
299                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
300     """ Construct a new result and add the data from the result row
301         from the Tiger data interpolation table. 'class_type' defines
302         the type of result to return. Returns None if the row is None.
303
304         If the row contains a housenumber, then the housenumber is filled out.
305         Otherwise the result contains the interpolation information in extratags.
306     """
307     if row is None:
308         return None
309
310     hnr = getattr(row, 'housenumber', None)
311
312     res = class_type(source_table=SourceTable.TIGER,
313                      place_id=row.place_id,
314                      osm_object=(row.osm_type, row.osm_id),
315                      category=('place', 'houses' if hnr is None else 'house'),
316                      postcode=row.postcode,
317                      country_code='us',
318                      centroid=Point.from_wkb(row.centroid.data),
319                      geometry=_filter_geometries(row))
320
321     if hnr is None:
322         res.extratags = {'startnumber': str(row.startnumber),
323                          'endnumber': str(row.endnumber),
324                          'step': str(row.step)}
325     else:
326         res.housenumber = str(hnr)
327
328     return res
329
330
331 def create_from_postcode_row(row: Optional[SaRow],
332                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
333     """ Construct a new result and add the data from the result row
334         from the postcode table. 'class_type' defines
335         the type of result to return. Returns None if the row is None.
336     """
337     if row is None:
338         return None
339
340     return class_type(source_table=SourceTable.POSTCODE,
341                       place_id=row.place_id,
342                       category=('place', 'postcode'),
343                       names={'ref': row.postcode},
344                       rank_search=row.rank_search,
345                       rank_address=row.rank_address,
346                       country_code=row.country_code,
347                       centroid=Point.from_wkb(row.centroid.data),
348                       geometry=_filter_geometries(row))
349
350
351 def create_from_country_row(row: Optional[SaRow],
352                         class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
353     """ Construct a new result and add the data from the result row
354         from the fallback country tables. 'class_type' defines
355         the type of result to return. Returns None if the row is None.
356     """
357     if row is None:
358         return None
359
360     return class_type(source_table=SourceTable.COUNTRY,
361                       category=('place', 'country'),
362                       centroid=Point.from_wkb(row.centroid.data),
363                       names=row.name,
364                       rank_address=4, rank_search=4,
365                       country_code=row.country_code)
366
367
368 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
369                              details: LookupDetails) -> None:
370     """ Retrieve more details from the database according to the
371         parameters specified in 'details'.
372     """
373     if results:
374         log().section('Query details for result')
375         if details.address_details:
376             log().comment('Query address details')
377             await complete_address_details(conn, results)
378         if details.linked_places:
379             log().comment('Query linked places')
380             for result in results:
381                 await complete_linked_places(conn, result)
382         if details.parented_places:
383             log().comment('Query parent places')
384             for result in results:
385                 await complete_parented_places(conn, result)
386         if details.keywords:
387             log().comment('Query keywords')
388             for result in results:
389                 await complete_keywords(conn, result)
390
391
392 def _result_row_to_address_row(row: SaRow) -> AddressLine:
393     """ Create a new AddressLine from the results of a datbase query.
394     """
395     extratags: Dict[str, str] = getattr(row, 'extratags', {})
396     if hasattr(row, 'place_type') and row.place_type:
397         extratags['place'] = row.place_type
398
399     names = _mingle_name_tags(row.name) or {}
400     if getattr(row, 'housenumber', None) is not None:
401         names['housenumber'] = row.housenumber
402
403     return AddressLine(place_id=row.place_id,
404                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
405                        category=(getattr(row, 'class'), row.type),
406                        names=names,
407                        extratags=extratags,
408                        admin_level=row.admin_level,
409                        fromarea=row.fromarea,
410                        isaddress=getattr(row, 'isaddress', True),
411                        rank_address=row.rank_address,
412                        distance=row.distance)
413
414
415 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
416     """ Retrieve information about places that make up the address of the result.
417     """
418     def get_hnr(result: BaseResult) -> Tuple[int, int]:
419         housenumber = -1
420         if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
421             if result.housenumber is not None:
422                 housenumber = int(result.housenumber)
423             elif result.extratags is not None and 'startnumber' in result.extratags:
424                 # details requests do not come with a specific house number
425                 housenumber = int(result.extratags['startnumber'])
426         assert result.place_id
427         return result.place_id, housenumber
428
429     data: List[Tuple[Any, ...]] = [get_hnr(r) for r in results if r.place_id]
430
431     if not data:
432         return
433
434     values = sa.values(sa.column('place_id', type_=sa.Integer),
435                        sa.column('housenumber', type_=sa.Integer),
436                        name='places',
437                        literal_binds=True).data(data)
438
439     sfn = sa.func.get_addressdata(values.c.place_id, values.c.housenumber)\
440                 .table_valued( # type: ignore[no-untyped-call]
441                     sa.column('place_id', type_=sa.Integer),
442                     'osm_type',
443                     sa.column('osm_id', type_=sa.BigInteger),
444                     sa.column('name', type_=conn.t.types.Composite),
445                     'class', 'type', 'place_type',
446                     sa.column('admin_level', type_=sa.Integer),
447                     sa.column('fromarea', type_=sa.Boolean),
448                     sa.column('isaddress', type_=sa.Boolean),
449                     sa.column('rank_address', type_=sa.SmallInteger),
450                     sa.column('distance', type_=sa.Float),
451                     joins_implicitly=True)
452
453     sql = sa.select(values.c.place_id.label('result_place_id'), sfn)\
454             .order_by(values.c.place_id,
455                       sa.column('rank_address').desc(),
456                       sa.column('isaddress').desc())
457
458     current_result = None
459     for row in await conn.execute(sql):
460         if current_result is None or row.result_place_id != current_result.place_id:
461             for result in results:
462                 if result.place_id == row.result_place_id:
463                     current_result = result
464                     break
465             else:
466                 assert False
467             current_result.address_rows = AddressLines()
468         current_result.address_rows.append(_result_row_to_address_row(row))
469
470
471 # pylint: disable=consider-using-f-string
472 def _placex_select_address_row(conn: SearchConnection,
473                                centroid: Point) -> SaSelect:
474     t = conn.t.placex
475     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
476                      t.c.class_.label('class'), t.c.type,
477                      t.c.admin_level, t.c.housenumber,
478                      sa.literal_column("""ST_GeometryType(geometry) in
479                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
480                      t.c.rank_address,
481                      sa.literal_column(
482                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
483                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
484                          """ % centroid).label('distance'))
485
486
487 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
488     """ Retrieve information about places that link to the result.
489     """
490     result.linked_rows = AddressLines()
491     if result.source_table != SourceTable.PLACEX:
492         return
493
494     sql = _placex_select_address_row(conn, result.centroid)\
495             .where(conn.t.placex.c.linked_place_id == result.place_id)
496
497     for row in await conn.execute(sql):
498         result.linked_rows.append(_result_row_to_address_row(row))
499
500
501 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
502     """ Retrieve information about the search terms used for this place.
503
504         Requires that the query analyzer was initialised to get access to
505         the word table.
506     """
507     t = conn.t.search_name
508     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
509             .where(t.c.place_id == result.place_id)
510
511     result.name_keywords = []
512     result.address_keywords = []
513
514     t = conn.t.meta.tables['word']
515     sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
516
517     for name_tokens, address_tokens in await conn.execute(sql):
518         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
519             result.name_keywords.append(WordInfo(*row))
520
521         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
522             result.address_keywords.append(WordInfo(*row))
523
524
525 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
526     """ Retrieve information about places that the result provides the
527         address for.
528     """
529     result.parented_rows = AddressLines()
530     if result.source_table != SourceTable.PLACEX:
531         return
532
533     sql = _placex_select_address_row(conn, result.centroid)\
534             .where(conn.t.placex.c.parent_place_id == result.place_id)\
535             .where(conn.t.placex.c.rank_search == 30)
536
537     for row in await conn.execute(sql):
538         result.parented_rows.append(_result_row_to_address_row(row))