]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/results.py
query analyzer for ICU tokenizer
[nominatim.git] / nominatim / api / results.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Dataclasses for search results and helper functions to fill them.
9
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
13 """
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List
15 import enum
16 import dataclasses
17 import datetime as dt
18
19 import sqlalchemy as sa
20
21 from nominatim.typing import SaSelect, SaRow
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
26
27 # This file defines complex result data classes.
28 # pylint: disable=too-many-instance-attributes
29
30 class SourceTable(enum.Enum):
31     """ Enumeration of kinds of results.
32     """
33     PLACEX = 1
34     OSMLINE = 2
35     TIGER = 3
36     POSTCODE = 4
37     COUNTRY = 5
38
39
40 @dataclasses.dataclass
41 class AddressLine:
42     """ Detailed information about a related place.
43     """
44     place_id: Optional[int]
45     osm_object: Optional[Tuple[str, int]]
46     category: Tuple[str, str]
47     names: Dict[str, str]
48     extratags: Optional[Dict[str, str]]
49
50     admin_level: Optional[int]
51     fromarea: bool
52     isaddress: bool
53     rank_address: int
54     distance: float
55
56     local_name: Optional[str] = None
57
58
59 class AddressLines(List[AddressLine]):
60     """ Sequence of address lines order in descending order by their rank.
61     """
62
63     def localize(self, locales: Locales) -> List[str]:
64         """ Set the local name of address parts according to the chosen
65             locale. Return the list of local names without duplications.
66
67             Only address parts that are marked as isaddress are localized
68             and returned.
69         """
70         label_parts: List[str] = []
71
72         for line in self:
73             if line.isaddress and line.names:
74                 line.local_name = locales.display_name(line.names)
75                 if not label_parts or label_parts[-1] != line.local_name:
76                     label_parts.append(line.local_name)
77
78         return label_parts
79
80
81
82 @dataclasses.dataclass
83 class WordInfo:
84     """ Detailed information about a search term.
85     """
86     word_id: int
87     word_token: str
88     word: Optional[str] = None
89
90
91 WordInfos = Sequence[WordInfo]
92
93
94 @dataclasses.dataclass
95 class BaseResult:
96     """ Data class collecting information common to all
97         types of search results.
98     """
99     source_table: SourceTable
100     category: Tuple[str, str]
101     centroid: Point
102
103     place_id : Optional[int] = None
104     osm_object: Optional[Tuple[str, int]] = None
105
106     names: Optional[Dict[str, str]] = None
107     address: Optional[Dict[str, str]] = None
108     extratags: Optional[Dict[str, str]] = None
109
110     housenumber: Optional[str] = None
111     postcode: Optional[str] = None
112     wikipedia: Optional[str] = None
113
114     rank_address: int = 30
115     rank_search: int = 30
116     importance: Optional[float] = None
117
118     country_code: Optional[str] = None
119
120     address_rows: Optional[AddressLines] = None
121     linked_rows: Optional[AddressLines] = None
122     parented_rows: Optional[AddressLines] = None
123     name_keywords: Optional[WordInfos] = None
124     address_keywords: Optional[WordInfos] = None
125
126     geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
127
128     @property
129     def lat(self) -> float:
130         """ Get the latitude (or y) of the center point of the place.
131         """
132         return self.centroid[1]
133
134
135     @property
136     def lon(self) -> float:
137         """ Get the longitude (or x) of the center point of the place.
138         """
139         return self.centroid[0]
140
141
142     def calculated_importance(self) -> float:
143         """ Get a valid importance value. This is either the stored importance
144             of the value or an artificial value computed from the place's
145             search rank.
146         """
147         return self.importance or (0.7500001 - (self.rank_search/40.0))
148
149 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
150
151 @dataclasses.dataclass
152 class DetailedResult(BaseResult):
153     """ A search result with more internal information from the database
154         added.
155     """
156     parent_place_id: Optional[int] = None
157     linked_place_id: Optional[int] = None
158     admin_level: int = 15
159     indexed_date: Optional[dt.datetime] = None
160
161
162 @dataclasses.dataclass
163 class ReverseResult(BaseResult):
164     """ A search result for reverse geocoding.
165     """
166     distance: Optional[float] = None
167     bbox: Optional[Bbox] = None
168
169
170 class ReverseResults(List[ReverseResult]):
171     """ Sequence of reverse lookup results ordered by distance.
172         May be empty when no result was found.
173     """
174
175
176 @dataclasses.dataclass
177 class SearchResult(BaseResult):
178     """ A search result for forward geocoding.
179     """
180     bbox: Optional[Bbox] = None
181
182
183 class SearchResults(List[SearchResult]):
184     """ Sequence of forward lookup results ordered by relevance.
185         May be empty when no result was found.
186     """
187
188
189 def _filter_geometries(row: SaRow) -> Dict[str, str]:
190     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
191             if k.startswith('geometry_')}
192
193
194 def create_from_placex_row(row: Optional[SaRow],
195                            class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
196     """ Construct a new result and add the data from the result row
197         from the placex table. 'class_type' defines the type of result
198         to return. Returns None if the row is None.
199     """
200     if row is None:
201         return None
202
203     return class_type(source_table=SourceTable.PLACEX,
204                       place_id=row.place_id,
205                       osm_object=(row.osm_type, row.osm_id),
206                       category=(row.class_, row.type),
207                       names=row.name,
208                       address=row.address,
209                       extratags=row.extratags,
210                       housenumber=row.housenumber,
211                       postcode=row.postcode,
212                       wikipedia=row.wikipedia,
213                       rank_address=row.rank_address,
214                       rank_search=row.rank_search,
215                       importance=row.importance,
216                       country_code=row.country_code,
217                       centroid=Point.from_wkb(row.centroid.data),
218                       geometry=_filter_geometries(row))
219
220
221 def create_from_osmline_row(row: Optional[SaRow],
222                             class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
223     """ Construct a new result and add the data from the result row
224         from the address interpolation table osmline. 'class_type' defines
225         the type of result to return. Returns None if the row is None.
226
227         If the row contains a housenumber, then the housenumber is filled out.
228         Otherwise the result contains the interpolation information in extratags.
229     """
230     if row is None:
231         return None
232
233     hnr = getattr(row, 'housenumber', None)
234
235     res = class_type(source_table=SourceTable.OSMLINE,
236                      place_id=row.place_id,
237                      osm_object=('W', row.osm_id),
238                      category=('place', 'houses' if hnr is None else 'house'),
239                      address=row.address,
240                      postcode=row.postcode,
241                      country_code=row.country_code,
242                      centroid=Point.from_wkb(row.centroid.data),
243                      geometry=_filter_geometries(row))
244
245     if hnr is None:
246         res.extratags = {'startnumber': str(row.startnumber),
247                          'endnumber': str(row.endnumber),
248                          'step': str(row.step)}
249     else:
250         res.housenumber = str(hnr)
251
252     return res
253
254
255 def create_from_tiger_row(row: Optional[SaRow],
256                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
257     """ Construct a new result and add the data from the result row
258         from the Tiger data interpolation table. 'class_type' defines
259         the type of result to return. Returns None if the row is None.
260
261         If the row contains a housenumber, then the housenumber is filled out.
262         Otherwise the result contains the interpolation information in extratags.
263     """
264     if row is None:
265         return None
266
267     hnr = getattr(row, 'housenumber', None)
268
269     res = class_type(source_table=SourceTable.TIGER,
270                      place_id=row.place_id,
271                      osm_object=(row.osm_type, row.osm_id),
272                      category=('place', 'houses' if hnr is None else 'house'),
273                      postcode=row.postcode,
274                      country_code='us',
275                      centroid=Point.from_wkb(row.centroid.data),
276                      geometry=_filter_geometries(row))
277
278     if hnr is None:
279         res.extratags = {'startnumber': str(row.startnumber),
280                          'endnumber': str(row.endnumber),
281                          'step': str(row.step)}
282     else:
283         res.housenumber = str(hnr)
284
285     return res
286
287
288 def create_from_postcode_row(row: Optional[SaRow],
289                           class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
290     """ Construct a new result and add the data from the result row
291         from the postcode table. 'class_type' defines
292         the type of result to return. Returns None if the row is None.
293     """
294     if row is None:
295         return None
296
297     return class_type(source_table=SourceTable.POSTCODE,
298                       place_id=row.place_id,
299                       category=('place', 'postcode'),
300                       names={'ref': row.postcode},
301                       rank_search=row.rank_search,
302                       rank_address=row.rank_address,
303                       country_code=row.country_code,
304                       centroid=Point.from_wkb(row.centroid.data),
305                       geometry=_filter_geometries(row))
306
307
308 async def add_result_details(conn: SearchConnection, result: BaseResult,
309                              details: LookupDetails) -> None:
310     """ Retrieve more details from the database according to the
311         parameters specified in 'details'.
312     """
313     log().section('Query details for result')
314     if details.address_details:
315         log().comment('Query address details')
316         await complete_address_details(conn, result)
317     if details.linked_places:
318         log().comment('Query linked places')
319         await complete_linked_places(conn, result)
320     if details.parented_places:
321         log().comment('Query parent places')
322         await complete_parented_places(conn, result)
323     if details.keywords:
324         log().comment('Query keywords')
325         await complete_keywords(conn, result)
326
327
328 def _result_row_to_address_row(row: SaRow) -> AddressLine:
329     """ Create a new AddressLine from the results of a datbase query.
330     """
331     extratags: Dict[str, str] = getattr(row, 'extratags', {})
332     if hasattr(row, 'place_type') and row.place_type:
333         extratags['place'] = row.place_type
334
335     names = row.name
336     if getattr(row, 'housenumber', None) is not None:
337         if names is None:
338             names = {}
339         names['housenumber'] = row.housenumber
340
341     return AddressLine(place_id=row.place_id,
342                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
343                        category=(getattr(row, 'class'), row.type),
344                        names=names,
345                        extratags=extratags,
346                        admin_level=row.admin_level,
347                        fromarea=row.fromarea,
348                        isaddress=getattr(row, 'isaddress', True),
349                        rank_address=row.rank_address,
350                        distance=row.distance)
351
352
353 async def complete_address_details(conn: SearchConnection, result: BaseResult) -> None:
354     """ Retrieve information about places that make up the address of the result.
355     """
356     housenumber = -1
357     if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
358         if result.housenumber is not None:
359             housenumber = int(result.housenumber)
360         elif result.extratags is not None and 'startnumber' in result.extratags:
361             # details requests do not come with a specific house number
362             housenumber = int(result.extratags['startnumber'])
363
364     sfn = sa.func.get_addressdata(result.place_id, housenumber)\
365             .table_valued( # type: ignore[no-untyped-call]
366                 sa.column('place_id', type_=sa.Integer),
367                 'osm_type',
368                 sa.column('osm_id', type_=sa.BigInteger),
369                 sa.column('name', type_=conn.t.types.Composite),
370                 'class', 'type', 'place_type',
371                 sa.column('admin_level', type_=sa.Integer),
372                 sa.column('fromarea', type_=sa.Boolean),
373                 sa.column('isaddress', type_=sa.Boolean),
374                 sa.column('rank_address', type_=sa.SmallInteger),
375                 sa.column('distance', type_=sa.Float))
376     sql = sa.select(sfn).order_by(sa.column('rank_address').desc(),
377                                   sa.column('isaddress').desc())
378
379     result.address_rows = AddressLines()
380     for row in await conn.execute(sql):
381         result.address_rows.append(_result_row_to_address_row(row))
382
383
384 # pylint: disable=consider-using-f-string
385 def _placex_select_address_row(conn: SearchConnection,
386                                centroid: Point) -> SaSelect:
387     t = conn.t.placex
388     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
389                      t.c.class_.label('class'), t.c.type,
390                      t.c.admin_level, t.c.housenumber,
391                      sa.literal_column("""ST_GeometryType(geometry) in
392                                         ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
393                      t.c.rank_address,
394                      sa.literal_column(
395                          """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
396                               'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
397                          """ % centroid).label('distance'))
398
399
400 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
401     """ Retrieve information about places that link to the result.
402     """
403     result.linked_rows = AddressLines()
404     if result.source_table != SourceTable.PLACEX:
405         return
406
407     sql = _placex_select_address_row(conn, result.centroid)\
408             .where(conn.t.placex.c.linked_place_id == result.place_id)
409
410     for row in await conn.execute(sql):
411         result.linked_rows.append(_result_row_to_address_row(row))
412
413
414 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
415     """ Retrieve information about the search terms used for this place.
416     """
417     t = conn.t.search_name
418     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
419             .where(t.c.place_id == result.place_id)
420
421     result.name_keywords = []
422     result.address_keywords = []
423     for name_tokens, address_tokens in await conn.execute(sql):
424         t = conn.t.word
425         sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
426
427         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
428             result.name_keywords.append(WordInfo(*row))
429
430         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
431             result.address_keywords.append(WordInfo(*row))
432
433
434 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
435     """ Retrieve information about places that the result provides the
436         address for.
437     """
438     result.parented_rows = AddressLines()
439     if result.source_table != SourceTable.PLACEX:
440         return
441
442     sql = _placex_select_address_row(conn, result.centroid)\
443             .where(conn.t.placex.c.parent_place_id == result.place_id)\
444             .where(conn.t.placex.c.rank_search == 30)
445
446     for row in await conn.execute(sql):
447         result.parented_rows.append(_result_row_to_address_row(row))