]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
add API functions for search functions
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
26
27 # This file defines complex result data classes.
28 # pylint: disable=too-many-instance-attributes
29
30 class SourceTable(enum.Enum):
31     """ Enumeration of kinds of results.
32     """
33     PLACEX = 1
34     OSMLINE = 2
35     TIGER = 3
36     POSTCODE = 4
37     COUNTRY = 5
38
39
40 @dataclasses.dataclass
41 class AddressLine:
42     """ Detailed information about a related place.
43     """
44     place_id: Optional[int]
45     osm_object: Optional[Tuple[str, int]]
46     category: Tuple[str, str]
47     names: Dict[str, str]
48     extratags: Optional[Dict[str, str]]
49
50     admin_level: Optional[int]
51     fromarea: bool
52     isaddress: bool
53     rank_address: int
54     distance: float
55
56     local_name: Optional[str] = None
57
58
59 class AddressLines(List[AddressLine]):
60     """ Sequence of address lines order in descending order by their rank.
61     """
62
63     def localize(self, locales: Locales) -> List[str]:
64         """ Set the local name of address parts according to the chosen
65             locale. Return the list of local names without duplications.
66
67             Only address parts that are marked as isaddress are localized
68             and returned.
69         """
70         label_parts: List[str] = []
71
72         for line in self:
73             if line.isaddress and line.names:
74                 line.local_name = locales.display_name(line.names)
75                 if not label_parts or label_parts[-1] != line.local_name:
76                     label_parts.append(line.local_name)
77
78         return label_parts
79
80
81
82 @dataclasses.dataclass
83 class WordInfo:
84     """ Detailed information about a search term.
85     """
86     word_id: int
87     word_token: str
88     word: Optional[str] = None
89
90
91 WordInfos = Sequence[WordInfo]
92
93
94 @dataclasses.dataclass
95 class BaseResult:
96     """ Data class collecting information common to all
97         types of search results.
98     """
99     source_table: SourceTable
100     category: Tuple[str, str]
101     centroid: Point
102
103     place_id : Optional[int] = None
104     osm_object: Optional[Tuple[str, int]] = None
105
106     names: Optional[Dict[str, str]] = None
107     address: Optional[Dict[str, str]] = None
108     extratags: Optional[Dict[str, str]] = None
109
110     housenumber: Optional[str] = None
111     postcode: Optional[str] = None
112     wikipedia: Optional[str] = None
113
114     rank_address: int = 30
115     rank_search: int = 30
116     importance: Optional[float] = None
117
118     country_code: Optional[str] = None
119
120     address_rows: Optional[AddressLines] = None
121     linked_rows: Optional[AddressLines] = None
122     parented_rows: Optional[AddressLines] = None
123     name_keywords: Optional[WordInfos] = None
124     address_keywords: Optional[WordInfos] = None
125
126     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
127
128     @property
129     def lat(self) -> float:
130         """ Get the latitude (or y) of the center point of the place.
131         """
132         return self.centroid[1]
133
134
135     @property
136     def lon(self) -> float:
137         """ Get the longitude (or x) of the center point of the place.
138         """
139         return self.centroid[0]
140
141
142     def calculated_importance(self) -> float:
143         """ Get a valid importance value. This is either the stored importance
144             of the value or an artificial value computed from the place's
145             search rank.
146         """
147         return self.importance or (0.7500001 - (self.rank_search/40.0))
148
149
150 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
151
152 @dataclasses.dataclass
153 class DetailedResult(BaseResult):
154     """ A search result with more internal information from the database
155         added.
156     """
157     parent_place_id: Optional[int] = None
158     linked_place_id: Optional[int] = None
159     admin_level: int = 15
160     indexed_date: Optional[dt.datetime] = None
161
162
163 @dataclasses.dataclass
164 class ReverseResult(BaseResult):
165     """ A search result for reverse geocoding.
166     """
167     distance: Optional[float] = None
168     bbox: Optional[Bbox] = None
169
170
171 class ReverseResults(List[ReverseResult]):
172     """ Sequence of reverse lookup results ordered by distance.
173         May be empty when no result was found.
174     """
175
176
177 @dataclasses.dataclass
178 class SearchResult(BaseResult):
179     """ A search result for forward geocoding.
180     """
181     bbox: Optional[Bbox] = None
182     accuracy: float = 0.0
183
184
185     @property
186     def ranking(self) -> float:
187         """ Return the ranking, a combined measure of accuracy and importance.
188         """
189         return (self.accuracy if self.accuracy is not None else 1) \
190                - self.calculated_importance()
191
192
193 class SearchResults(List[SearchResult]):
194     """ Sequence of forward lookup results ordered by relevance.
195         May be empty when no result was found.
196     """
197
198
199 def _filter_geometries(row: SaRow) -> Dict[str, str]:
200     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
201             if k.startswith('geometry_')}
202
203
204 def create_from_placex_row(row: Optional[SaRow],
205                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
206     """ Construct a new result and add the data from the result row
207         from the placex table. 'class_type' defines the type of result
208         to return. Returns None if the row is None.
209     """
210     if row is None:
211         return None
212
213     return class_type(source_table=SourceTable.PLACEX,
214                       place_id=row.place_id,
215                       osm_object=(row.osm_type, row.osm_id),
216                       category=(row.class_, row.type),
217                       names=row.name,
218                       address=row.address,
219                       extratags=row.extratags,
220                       housenumber=row.housenumber,
221                       postcode=row.postcode,
222                       wikipedia=row.wikipedia,
223                       rank_address=row.rank_address,
224                       rank_search=row.rank_search,
225                       importance=row.importance,
226                       country_code=row.country_code,
227                       centroid=Point.from_wkb(row.centroid.data),
228                       geometry=_filter_geometries(row))
229
230
231 def create_from_osmline_row(row: Optional[SaRow],
232                             class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
233     """ Construct a new result and add the data from the result row
234         from the address interpolation table osmline. 'class_type' defines
235         the type of result to return. Returns None if the row is None.
236
237         If the row contains a housenumber, then the housenumber is filled out.
238         Otherwise the result contains the interpolation information in extratags.
239     """
240     if row is None:
241         return None
242
243     hnr = getattr(row, 'housenumber', None)
244
245     res = class_type(source_table=SourceTable.OSMLINE,
246                      place_id=row.place_id,
247                      osm_object=('W', row.osm_id),
248                      category=('place', 'houses' if hnr is None else 'house'),
249                      address=row.address,
250                      postcode=row.postcode,
251                      country_code=row.country_code,
252                      centroid=Point.from_wkb(row.centroid.data),
253                      geometry=_filter_geometries(row))
254
255     if hnr is None:
256         res.extratags = {'startnumber': str(row.startnumber),
257                          'endnumber': str(row.endnumber),
258                          'step': str(row.step)}
259     else:
260         res.housenumber = str(hnr)
261
262     return res
263
264
265 def create_from_tiger_row(row: Optional[SaRow],
266                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
267     """ Construct a new result and add the data from the result row
268         from the Tiger data interpolation table. 'class_type' defines
269         the type of result to return. Returns None if the row is None.
270
271         If the row contains a housenumber, then the housenumber is filled out.
272         Otherwise the result contains the interpolation information in extratags.
273     """
274     if row is None:
275         return None
276
277     hnr = getattr(row, 'housenumber', None)
278
279     res = class_type(source_table=SourceTable.TIGER,
280                      place_id=row.place_id,
281                      osm_object=(row.osm_type, row.osm_id),
282                      category=('place', 'houses' if hnr is None else 'house'),
283                      postcode=row.postcode,
284                      country_code='us',
285                      centroid=Point.from_wkb(row.centroid.data),
286                      geometry=_filter_geometries(row))
287
288     if hnr is None:
289         res.extratags = {'startnumber': str(row.startnumber),
290                          'endnumber': str(row.endnumber),
291                          'step': str(row.step)}
292     else:
293         res.housenumber = str(hnr)
294
295     return res
296
297
298 def create_from_postcode_row(row: Optional[SaRow],
299                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
300     """ Construct a new result and add the data from the result row
301         from the postcode table. 'class_type' defines
302         the type of result to return. Returns None if the row is None.
303     """
304     if row is None:
305         return None
306
307     return class_type(source_table=SourceTable.POSTCODE,
308                       place_id=row.place_id,
309                       category=('place', 'postcode'),
310                       names={'ref': row.postcode},
311                       rank_search=row.rank_search,
312                       rank_address=row.rank_address,
313                       country_code=row.country_code,
314                       centroid=Point.from_wkb(row.centroid.data),
315                       geometry=_filter_geometries(row))
316
317
318 def create_from_country_row(row: Optional[SaRow],
319                         class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
320     """ Construct a new result and add the data from the result row
321         from the fallback country tables. 'class_type' defines
322         the type of result to return. Returns None if the row is None.
323     """
324     if row is None:
325         return None
326
327     return class_type(source_table=SourceTable.COUNTRY,
328                       category=('place', 'country'),
329                       centroid=Point.from_wkb(row.centroid.data),
330                       names=row.name,
331                       rank_address=4, rank_search=4,
332                       country_code=row.country_code)
333
334
335 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
336                              details: LookupDetails) -> None:
337     """ Retrieve more details from the database according to the
338         parameters specified in 'details'.
339     """
340     if results:
341         log().section('Query details for result')
342         if details.address_details:
343             log().comment('Query address details')
344             await complete_address_details(conn, results)
345         if details.linked_places:
346             log().comment('Query linked places')
347             for result in results:
348                 await complete_linked_places(conn, result)
349         if details.parented_places:
350             log().comment('Query parent places')
351             for result in results:
352                 await complete_parented_places(conn, result)
353         if details.keywords:
354             log().comment('Query keywords')
355             for result in results:
356                 await complete_keywords(conn, result)
357
358
359 def _result_row_to_address_row(row: SaRow) -> AddressLine:
360     """ Create a new AddressLine from the results of a datbase query.
361     """
362     extratags: Dict[str, str] = getattr(row, 'extratags', {})
363     if hasattr(row, 'place_type') and row.place_type:
364         extratags['place'] = row.place_type
365
366     names = row.name
367     if getattr(row, 'housenumber', None) is not None:
368         if names is None:
369             names = {}
370         names['housenumber'] = row.housenumber
371
372     return AddressLine(place_id=row.place_id,
373                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
374                        category=(getattr(row, 'class'), row.type),
375                        names=names,
376                        extratags=extratags,
377                        admin_level=row.admin_level,
378                        fromarea=row.fromarea,
379                        isaddress=getattr(row, 'isaddress', True),
380                        rank_address=row.rank_address,
381                        distance=row.distance)
382
383
384 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
385     """ Retrieve information about places that make up the address of the result.
386     """
387     def get_hnr(result: BaseResult) -> Tuple[int, int]:
388         housenumber = -1
389         if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
390             if result.housenumber is not None:
391                 housenumber = int(result.housenumber)
392             elif result.extratags is not None and 'startnumber' in result.extratags:
393                 # details requests do not come with a specific house number
394                 housenumber = int(result.extratags['startnumber'])
395         assert result.place_id
396         return result.place_id, housenumber
397
398     data: List[Tuple[Any, ...]] = [get_hnr(r) for r in results if r.place_id]
399
400     if not data:
401         return
402
403     values = sa.values(sa.column('place_id', type_=sa.Integer),
404                        sa.column('housenumber', type_=sa.Integer),
405                        name='places',
406                        literal_binds=True).data(data)
407
408     sfn = sa.func.get_addressdata(values.c.place_id, values.c.housenumber)\
409                 .table_valued( # type: ignore[no-untyped-call]
410                     sa.column('place_id', type_=sa.Integer),
411                     'osm_type',
412                     sa.column('osm_id', type_=sa.BigInteger),
413                     sa.column('name', type_=conn.t.types.Composite),
414                     'class', 'type', 'place_type',
415                     sa.column('admin_level', type_=sa.Integer),
416                     sa.column('fromarea', type_=sa.Boolean),
417                     sa.column('isaddress', type_=sa.Boolean),
418                     sa.column('rank_address', type_=sa.SmallInteger),
419                     sa.column('distance', type_=sa.Float),
420                     joins_implicitly=True)
421
422     sql = sa.select(values.c.place_id.label('result_place_id'), sfn)\
423             .order_by(values.c.place_id,
424                       sa.column('rank_address').desc(),
425                       sa.column('isaddress').desc())
426
427     current_result = None
428     for row in await conn.execute(sql):
429         if current_result is None or row.result_place_id != current_result.place_id:
430             for result in results:
431                 if result.place_id == row.result_place_id:
432                     current_result = result
433                     break
434             else:
435                 assert False
436             current_result.address_rows = AddressLines()
437         current_result.address_rows.append(_result_row_to_address_row(row))
438
439
440 # pylint: disable=consider-using-f-string
441 def _placex_select_address_row(conn: SearchConnection,
442                                centroid: Point) -> SaSelect:
443     t = conn.t.placex
444     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
445                      t.c.class_.label('class'), t.c.type,
446                      t.c.admin_level, t.c.housenumber,
447                      sa.literal_column("""ST_GeometryType(geometry) in
448                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
449                      t.c.rank_address,
450                      sa.literal_column(
451                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
452                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
453                          """ % centroid).label('distance'))
454
455
456 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
457     """ Retrieve information about places that link to the result.
458     """
459     result.linked_rows = AddressLines()
460     if result.source_table != SourceTable.PLACEX:
461         return
462
463     sql = _placex_select_address_row(conn, result.centroid)\
464             .where(conn.t.placex.c.linked_place_id == result.place_id)
465
466     for row in await conn.execute(sql):
467         result.linked_rows.append(_result_row_to_address_row(row))
468
469
470 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
471     """ Retrieve information about the search terms used for this place.
472
473         Requires that the query analyzer was initialised to get access to
474         the word table.
475     """
476     t = conn.t.search_name
477     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
478             .where(t.c.place_id == result.place_id)
479
480     result.name_keywords = []
481     result.address_keywords = []
482
483     t = conn.t.meta.tables['word']
484     sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
485
486     for name_tokens, address_tokens in await conn.execute(sql):
487         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
488             result.name_keywords.append(WordInfo(*row))
489
490         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
491             result.address_keywords.append(WordInfo(*row))
492
493
494 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
495     """ Retrieve information about places that the result provides the
496         address for.
497     """
498     result.parented_rows = AddressLines()
499     if result.source_table != SourceTable.PLACEX:
500         return
501
502     sql = _placex_select_address_row(conn, result.centroid)\
503             .where(conn.t.placex.c.parent_place_id == result.place_id)\
504             .where(conn.t.placex.c.rank_search == 30)
505
506     for row in await conn.execute(sql):
507         result.parented_rows.append(_result_row_to_address_row(row))