1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Dataclasses for search results and helper functions to fill them.
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
15 Optional, Tuple, Dict, Sequence, TypeVar, Type, List,
22 import sqlalchemy as sa
24 from .typing import SaSelect, SaRow
25 from .sql.sqlalchemy_types import Geometry
26 from .types import Point, Bbox, LookupDetails, EntranceDetails
27 from .connection import SearchConnection
28 from .logging import log
30 # This file defines complex result data classes.
33 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
34 """ Mix-in names from linked places, so that they show up
35 as standard names where necessary.
41 for k, v in names.items():
42 if k.startswith('_place_'):
44 out[k if outkey in names else outkey] = v
51 class SourceTable(enum.Enum):
52 """ The `SourceTable` type lists the possible sources a result can have.
55 """ The placex table is the main source for result usually containing
59 """ The osmline table contains address interpolations from OSM data.
60 Interpolation addresses are always approximate. The OSM id in the
61 result refers to the OSM way with the interpolation line object.
64 """ TIGER address data contains US addresses imported on the side,
65 see [Installing TIGER data](../customize/Tiger.md).
66 TIGER address are also interpolations. The addresses always refer
67 to a street from OSM data. The OSM id in the result refers to
71 """ The postcode table contains artificial centroids for postcodes,
72 computed from the postcodes available with address points. Results
73 are always approximate.
76 """ The country table provides a fallback, when country data is missing
81 @dataclasses.dataclass
83 """ The `AddressLine` may contain the following fields about a related place
84 and its function as an address object. Most fields are optional.
85 Their presence depends on the kind and function of the address part.
87 category: Tuple[str, str]
88 """ Main category of the place, described by a key-value pair.
91 """ All available names for the place including references, alternative
92 names and translations.
95 """ If true, then the exact area of the place is known. Without area
96 information, Nominatim has to make an educated guess if an address
97 belongs to one place or another.
100 """ If true, this place should be considered for the final address display.
101 Nominatim will sometimes include more than one candidate for
102 the address in the list when it cannot reliably determine where the
103 place belongs. It will consider names of all candidates when searching
104 but when displaying the result, only the most likely candidate should
108 """ [Address rank](../customize/Ranking.md#address-rank) of the place.
111 """ Distance in degrees between the result place and this address part.
113 place_id: Optional[int] = None
114 """ Internal ID of the place.
116 osm_object: Optional[Tuple[str, int]] = None
117 """ OSM type and ID of the place, if such an object exists.
119 extratags: Optional[Dict[str, str]] = None
120 """ Any extra information available about the place. This is a dictionary
121 that usually contains OSM tag key-value pairs.
124 admin_level: Optional[int] = None
125 """ The administrative level of a boundary as tagged in the input data.
126 This field is only meaningful for places of the category
127 (boundary, administrative).
130 local_name: Optional[str] = None
131 """ Place holder for localization of this address part. See
132 [Localization](Result-Handling.md#localization) below.
136 def display_name(self) -> Optional[str]:
137 """ Dynamically compute the display name for the Address Line component
140 return self.local_name
141 elif 'name' in self.names:
142 return self.names['name']
144 return next(iter(self.names.values()), None)
148 class AddressLines(List[AddressLine]):
149 """ A wrapper around a list of AddressLine objects."""
152 @dataclasses.dataclass
154 """ Each entry in the list of search terms contains the
155 following detailed information.
158 """ Internal identifier for the word.
161 """ Normalised and transliterated form of the word.
162 This form is used for searching.
164 word: Optional[str] = None
165 """ Untransliterated form, if available.
169 WordInfos = Sequence[WordInfo]
172 @dataclasses.dataclass
174 """ Data class collecting information common to all
175 types of search results.
177 source_table: SourceTable
178 category: Tuple[str, str]
181 place_id: Optional[int] = None
182 osm_object: Optional[Tuple[str, int]] = None
183 parent_place_id: Optional[int] = None
184 linked_place_id: Optional[int] = None
185 admin_level: int = 15
187 locale_name: Optional[str] = None
189 names: Optional[Dict[str, str]] = None
190 address: Optional[Dict[str, str]] = None
191 extratags: Optional[Dict[str, str]] = None
193 housenumber: Optional[str] = None
194 postcode: Optional[str] = None
195 wikipedia: Optional[str] = None
197 rank_address: int = 30
198 rank_search: int = 30
199 importance: Optional[float] = None
201 country_code: Optional[str] = None
203 address_rows: Optional[AddressLines] = None
204 linked_rows: Optional[AddressLines] = None
205 parented_rows: Optional[AddressLines] = None
206 name_keywords: Optional[WordInfos] = None
207 address_keywords: Optional[WordInfos] = None
209 entrances: Optional[List[EntranceDetails]] = None
211 geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
214 def lat(self) -> float:
215 """ Get the latitude (or y) of the center point of the place.
217 return self.centroid[1]
220 def lon(self) -> float:
221 """ Get the longitude (or x) of the center point of the place.
223 return self.centroid[0]
226 def display_name(self) -> Optional[str]:
227 """ Dynamically compute the display name for the result place
228 and, if available, its address information..
230 if self.address_rows: # if this is true we need additional processing
231 label_parts: List[str] = []
233 for line in self.address_rows: # assume locale_name is set by external formatter
234 if line.isaddress and line.names:
235 address_name = line.display_name
237 if address_name and (not label_parts or label_parts[-1] != address_name):
238 label_parts.append(address_name)
241 return ', '.join(label_parts)
243 # Now adding additional information for reranking
245 return self.locale_name
246 elif self.names and 'name' in self.names:
247 return self.names['name']
249 return next(iter(self.names.values()))
250 elif self.housenumber:
251 return self.housenumber
254 def calculated_importance(self) -> float:
255 """ Get a valid importance value. This is either the stored importance
256 of the value or an artificial value computed from the place's
259 return self.importance or (0.40001 - (self.rank_search/75.0))
262 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
265 @dataclasses.dataclass
266 class DetailedResult(BaseResult):
267 """ A search result with more internal information from the database
270 indexed_date: Optional[dt.datetime] = None
273 @dataclasses.dataclass
274 class ReverseResult(BaseResult):
275 """ A search result for reverse geocoding.
277 distance: Optional[float] = None
278 bbox: Optional[Bbox] = None
281 class ReverseResults(List[ReverseResult]):
282 """ Sequence of reverse lookup results ordered by distance.
283 May be empty when no result was found.
287 @dataclasses.dataclass
288 class SearchResult(BaseResult):
289 """ A search result for forward geocoding.
291 bbox: Optional[Bbox] = None
292 accuracy: float = 0.0
295 def ranking(self) -> float:
296 """ Return the ranking, a combined measure of accuracy and importance.
298 return (self.accuracy if self.accuracy is not None else 1) \
299 - self.calculated_importance()
302 class SearchResults(List[SearchResult]):
303 """ Sequence of forward lookup results ordered by relevance.
304 May be empty when no result was found.
308 def _filter_geometries(row: SaRow) -> Dict[str, str]:
309 return {k[9:]: v for k, v in row._mapping.items()
310 if k.startswith('geometry_')}
313 def create_from_placex_row(row: SaRow, class_type: Type[BaseResultT]) -> BaseResultT:
314 """ Construct a new result and add the data from the result row
315 from the placex table. 'class_type' defines the type of result
316 to return. Returns None if the row is None.
318 return class_type(source_table=SourceTable.PLACEX,
319 place_id=row.place_id,
320 osm_object=(row.osm_type, row.osm_id),
321 category=(row.class_, row.type),
322 parent_place_id=row.parent_place_id,
323 linked_place_id=getattr(row, 'linked_place_id', None),
324 admin_level=getattr(row, 'admin_level', 15),
325 names=_mingle_name_tags(row.name),
327 extratags=row.extratags,
328 housenumber=row.housenumber,
329 postcode=row.postcode,
330 wikipedia=row.wikipedia,
331 rank_address=row.rank_address,
332 rank_search=row.rank_search,
333 importance=row.importance,
334 country_code=row.country_code,
335 centroid=Point.from_wkb(row.centroid),
336 geometry=_filter_geometries(row))
339 def create_from_osmline_row(row: SaRow, class_type: Type[BaseResultT]) -> BaseResultT:
340 """ Construct a new result and add the data from the result row
341 from the address interpolation table osmline. 'class_type' defines
342 the type of result to return. Returns None if the row is None.
344 If the row contains a housenumber, then the housenumber is filled out.
345 Otherwise the result contains the interpolation information in extratags.
347 hnr = getattr(row, 'housenumber', None)
349 res = class_type(source_table=SourceTable.OSMLINE,
350 place_id=row.place_id,
351 parent_place_id=row.parent_place_id,
352 osm_object=('W', row.osm_id),
353 category=('place', 'houses' if hnr is None else 'house'),
355 postcode=row.postcode,
356 country_code=row.country_code,
357 centroid=Point.from_wkb(row.centroid),
358 geometry=_filter_geometries(row))
361 res.extratags = {'startnumber': str(row.startnumber),
362 'endnumber': str(row.endnumber),
363 'step': str(row.step)}
365 res.housenumber = str(hnr)
370 def create_from_tiger_row(row: SaRow,
371 class_type: Type[BaseResultT],
372 osm_type: Optional[str] = None,
373 osm_id: Optional[int] = None) -> BaseResultT:
374 """ Construct a new result and add the data from the result row
375 from the Tiger data interpolation table. 'class_type' defines
376 the type of result to return. Returns None if the row is None.
378 If the row contains a housenumber, then the housenumber is filled out.
379 Otherwise the result contains the interpolation information in extratags.
381 hnr = getattr(row, 'housenumber', None)
383 res = class_type(source_table=SourceTable.TIGER,
384 place_id=row.place_id,
385 parent_place_id=row.parent_place_id,
386 osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
387 category=('place', 'houses' if hnr is None else 'house'),
388 postcode=row.postcode,
390 centroid=Point.from_wkb(row.centroid),
391 geometry=_filter_geometries(row))
394 res.extratags = {'startnumber': str(row.startnumber),
395 'endnumber': str(row.endnumber),
396 'step': str(row.step)}
398 res.housenumber = str(hnr)
403 def create_from_postcode_row(row: SaRow, class_type: Type[BaseResultT]) -> BaseResultT:
404 """ Construct a new result and add the data from the result row
405 from the postcode table. 'class_type' defines
406 the type of result to return. Returns None if the row is None.
408 return class_type(source_table=SourceTable.POSTCODE,
409 place_id=row.place_id,
410 parent_place_id=row.parent_place_id,
411 category=('place', 'postcode'),
412 names={'ref': row.postcode},
413 rank_search=row.rank_search,
414 rank_address=row.rank_address,
415 country_code=row.country_code,
416 centroid=Point.from_wkb(row.centroid),
417 geometry=_filter_geometries(row))
420 def create_from_country_row(row: SaRow, class_type: Type[BaseResultT]) -> BaseResultT:
421 """ Construct a new result and add the data from the result row
422 from the fallback country tables. 'class_type' defines
423 the type of result to return. Returns None if the row is None.
425 return class_type(source_table=SourceTable.COUNTRY,
426 category=('place', 'country'),
427 centroid=Point.from_wkb(row.centroid),
429 rank_address=4, rank_search=4,
430 country_code=row.country_code,
431 geometry=_filter_geometries(row))
434 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
435 details: LookupDetails) -> None:
436 """ Retrieve more details from the database according to the
437 parameters specified in 'details'.
440 log().section('Query details for result')
441 if details.address_details:
442 log().comment('Query address details')
443 await complete_address_details(conn, results)
444 if details.linked_places:
445 log().comment('Query linked places')
446 for result in results:
447 await complete_linked_places(conn, result)
448 if details.parented_places:
449 log().comment('Query parent places')
450 for result in results:
451 await complete_parented_places(conn, result)
452 if details.entrances:
453 log().comment('Query entrances details')
454 await complete_entrances_details(conn, results)
456 log().comment('Query keywords')
457 for result in results:
458 await complete_keywords(conn, result)
461 def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
462 """ Create a new AddressLine from the results of a database query.
464 extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
465 if 'linked_place' in extratags:
466 extratags['place'] = extratags['linked_place']
468 names = _mingle_name_tags(row.name) or {}
469 if getattr(row, 'housenumber', None) is not None:
470 names['housenumber'] = row.housenumber
472 if isaddress is None:
473 isaddress = getattr(row, 'isaddress', True)
475 return AddressLine(place_id=row.place_id,
476 osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
477 category=(getattr(row, 'class'), row.type),
480 admin_level=row.admin_level,
481 fromarea=row.fromarea,
483 rank_address=row.rank_address,
484 distance=row.distance)
487 def _get_address_lookup_id(result: BaseResultT) -> int:
488 assert result.place_id
489 if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
490 return result.parent_place_id or result.place_id
492 return result.linked_place_id or result.place_id
495 async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
496 assert result.address_rows is not None
497 if result.category[0] not in ('boundary', 'place')\
498 or result.category[1] not in ('postal_code', 'postcode'):
499 postcode = result.postcode
500 if not postcode and result.address:
501 postcode = result.address.get('postcode')
502 if postcode and ',' not in postcode and ';' not in postcode:
503 result.address_rows.append(AddressLine(
504 category=('place', 'postcode'),
505 names={'ref': postcode},
506 fromarea=False, isaddress=True, rank_address=5,
508 if result.country_code:
509 async def _get_country_names() -> Optional[Dict[str, str]]:
510 t = conn.t.country_name
511 sql = sa.select(t.c.name, t.c.derived_name)\
512 .where(t.c.country_code == result.country_code)
513 for cres in await conn.execute(sql):
514 names = cast(Dict[str, str], cres[0])
516 names.update(cast(Dict[str, str], cres[1]))
520 country_names = await conn.get_cached_value('COUNTRY_NAME',
524 result.address_rows.append(AddressLine(
525 category=('place', 'country'),
527 fromarea=False, isaddress=True, rank_address=4,
529 result.address_rows.append(AddressLine(
530 category=('place', 'country_code'),
531 names={'ref': result.country_code}, extratags={},
532 fromarea=True, isaddress=False, rank_address=4,
536 def _setup_address_details(result: BaseResultT) -> None:
537 """ Retrieve information about places that make up the address of the result.
539 result.address_rows = AddressLines()
541 result.address_rows.append(AddressLine(
542 place_id=result.place_id,
543 osm_object=result.osm_object,
544 category=result.category,
546 extratags=result.extratags or {},
547 admin_level=result.admin_level,
548 fromarea=True, isaddress=True,
549 rank_address=result.rank_address, distance=0.0))
550 if result.source_table == SourceTable.PLACEX and result.address:
551 housenumber = result.address.get('housenumber')\
552 or result.address.get('streetnumber')\
553 or result.address.get('conscriptionnumber')
554 elif result.housenumber:
555 housenumber = result.housenumber
559 result.address_rows.append(AddressLine(
560 category=('place', 'house_number'),
561 names={'ref': housenumber},
562 fromarea=True, isaddress=True, rank_address=28, distance=0))
563 if result.address and '_unlisted_place' in result.address:
564 result.address_rows.append(AddressLine(
565 category=('place', 'locality'),
566 names={'name': result.address['_unlisted_place']},
567 fromarea=False, isaddress=True, rank_address=25, distance=0))
570 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
571 """ Retrieve information about places that make up the address of the result.
573 for result in results:
574 _setup_address_details(result)
576 # Lookup entries from place_address line
578 lookup_ids = [{'pid': r.place_id,
579 'lid': _get_address_lookup_id(r),
580 'names': list(r.address.values()) if r.address else [],
581 'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else ''}
582 for r in results if r.place_id]
587 ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
588 .table_valued(sa.column('value', type_=sa.JSON))
591 taddr = conn.t.addressline
593 sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
594 t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
595 t.c.class_, t.c.type, t.c.extratags,
596 t.c.admin_level, taddr.c.fromarea,
597 sa.case((t.c.rank_address == 11, 5),
598 else_=t.c.rank_address).label('rank_address'),
599 taddr.c.distance, t.c.country_code, t.c.postcode)\
600 .join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
601 taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
602 .join(t, taddr.c.address_place_id == t.c.place_id)\
603 .order_by('src_place_id')\
604 .order_by(sa.column('rank_address').desc())\
605 .order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
606 .order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
607 (taddr.c.isaddress, 0),
608 (sa.and_(taddr.c.fromarea,
609 t.c.geometry.ST_Contains(
610 sa.func.ST_GeomFromEWKT(
611 ltab.c.value['c'].as_string()))), 1),
613 .order_by(taddr.c.fromarea.desc())\
614 .order_by(taddr.c.distance.desc())\
615 .order_by(t.c.rank_search.desc())
617 current_result = None
618 current_rank_address = -1
619 for row in await conn.execute(sql):
620 if current_result is None or row.src_place_id != current_result.place_id:
621 current_result = next((r for r in results if r.place_id == row.src_place_id), None)
622 assert current_result is not None
623 current_rank_address = -1
625 location_isaddress = row.rank_address != current_rank_address
627 if current_result.country_code is None and row.country_code:
628 current_result.country_code = row.country_code
630 if row.type in ('postcode', 'postal_code') and location_isaddress:
631 if not row.fromarea or \
632 (current_result.address and 'postcode' in current_result.address):
633 location_isaddress = False
635 current_result.postcode = None
637 assert current_result.address_rows is not None
638 current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
639 current_rank_address = row.rank_address
641 for result in results:
642 await _finalize_entry(conn, result)
644 # Finally add the record for the parent entry where necessary.
646 parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
647 if parent_lookup_ids:
648 ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
649 .table_valued(sa.column('value', type_=sa.JSON))
650 sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
651 t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
652 t.c.class_, t.c.type, t.c.extratags,
655 .where(t.c.place_id == ltab.c.value['lid'].as_integer())
657 for row in await conn.execute(sql):
658 current_result = next((r for r in results if r.place_id == row.src_place_id), None)
659 assert current_result is not None
660 assert current_result.address_rows is not None
662 current_result.address_rows.append(AddressLine(
663 place_id=row.place_id,
664 osm_object=(row.osm_type, row.osm_id),
665 category=(row.class_, row.type),
666 names=row.name, extratags=row.extratags or {},
667 admin_level=row.admin_level,
668 fromarea=True, isaddress=True,
669 rank_address=row.rank_address, distance=0.0))
671 # Now sort everything
672 def mk_sort_key(place_id: Optional[int]) -> Callable[[AddressLine], Tuple[bool, int, bool]]:
673 return lambda a: (a.place_id != place_id, -a.rank_address, a.isaddress)
675 for result in results:
676 assert result.address_rows is not None
677 result.address_rows.sort(key=mk_sort_key(result.place_id))
680 def _placex_select_address_row(conn: SearchConnection,
681 centroid: Point) -> SaSelect:
683 return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
684 t.c.class_.label('class'), t.c.type,
685 t.c.admin_level, t.c.housenumber,
686 t.c.geometry.is_area().label('fromarea'),
688 t.c.geometry.distance_spheroid(
689 sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
692 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
693 """ Retrieve information about places that link to the result.
695 result.linked_rows = AddressLines()
696 if result.source_table != SourceTable.PLACEX:
699 sql = _placex_select_address_row(conn, result.centroid)\
700 .where(conn.t.placex.c.linked_place_id == result.place_id)
702 for row in await conn.execute(sql):
703 result.linked_rows.append(_result_row_to_address_row(row))
706 async def complete_entrances_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
707 """ Retrieve information about tagged entrances for the given results.
709 place_ids = (r.place_id for r in results if r.source_table == SourceTable.PLACEX)
711 t = conn.t.placex_entrance
712 sql = sa.select(t.c.place_id, t.c.osm_id, t.c.type, t.c.location, t.c.extratags)\
713 .where(t.c.place_id.in_(place_ids))
715 current_result = None
716 for row in await conn.execute(sql):
717 if current_result is None or row.place_id != current_result.place_id:
718 current_result = next((r for r in results if r.place_id == row.place_id), None)
719 assert current_result is not None
720 if current_result.entrances is None:
721 current_result.entrances = []
722 current_result.entrances.append(EntranceDetails(
725 location=Point.from_wkb(row.location),
726 extratags=row.extratags,
730 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
731 """ Retrieve information about the search terms used for this place.
733 Requires that the query analyzer was initialised to get access to
736 t = conn.t.search_name
737 sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
738 .where(t.c.place_id == result.place_id)
740 result.name_keywords = []
741 result.address_keywords = []
743 t = conn.t.meta.tables['word']
744 sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
746 for name_tokens, address_tokens in await conn.execute(sql):
747 for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
748 result.name_keywords.append(WordInfo(*row))
750 for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
751 result.address_keywords.append(WordInfo(*row))
754 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
755 """ Retrieve information about places that the result provides the
758 result.parented_rows = AddressLines()
759 if result.source_table != SourceTable.PLACEX:
762 sql = _placex_select_address_row(conn, result.centroid)\
763 .where(conn.t.placex.c.parent_place_id == result.place_id)\
764 .where(conn.t.placex.c.rank_search == 30)
766 for row in await conn.execute(sql):
767 result.parented_rows.append(_result_row_to_address_row(row))