1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Dataclasses for search results and helper functions to fill them.
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
15 Optional, Tuple, Dict, Sequence, TypeVar, Type, List,
22 import sqlalchemy as sa
24 from .typing import SaSelect, SaRow
25 from .sql.sqlalchemy_types import Geometry
26 from .types import Point, Bbox, LookupDetails, EntranceDetails
27 from .connection import SearchConnection
28 from .logging import log
30 # This file defines complex result data classes.
33 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
34 """ Mix-in names from linked places, so that they show up
35 as standard names where necessary.
41 for k, v in names.items():
42 if k.startswith('_place_'):
44 out[k if outkey in names else outkey] = v
51 class SourceTable(enum.Enum):
52 """ The `SourceTable` type lists the possible sources a result can have.
55 """ The placex table is the main source for result usually containing
59 """ The osmline table contains address interpolations from OSM data.
60 Interpolation addresses are always approximate. The OSM id in the
61 result refers to the OSM way with the interpolation line object.
64 """ TIGER address data contains US addresses imported on the side,
65 see [Installing TIGER data](../customize/Tiger.md).
66 TIGER address are also interpolations. The addresses always refer
67 to a street from OSM data. The OSM id in the result refers to
71 """ The postcode table contains artificial centroids for postcodes,
72 computed from the postcodes available with address points. Results
73 are always approximate.
76 """ The country table provides a fallback, when country data is missing
81 @dataclasses.dataclass
83 """ The `AddressLine` may contain the following fields about a related place
84 and its function as an address object. Most fields are optional.
85 Their presence depends on the kind and function of the address part.
87 category: Tuple[str, str]
88 """ Main category of the place, described by a key-value pair.
91 """ All available names for the place including references, alternative
92 names and translations.
95 """ If true, then the exact area of the place is known. Without area
96 information, Nominatim has to make an educated guess if an address
97 belongs to one place or another.
100 """ If true, this place should be considered for the final address display.
101 Nominatim will sometimes include more than one candidate for
102 the address in the list when it cannot reliably determine where the
103 place belongs. It will consider names of all candidates when searching
104 but when displaying the result, only the most likely candidate should
108 """ [Address rank](../customize/Ranking.md#address-rank) of the place.
111 """ Distance in degrees between the result place and this address part.
113 place_id: Optional[int] = None
114 """ Internal ID of the place.
116 osm_object: Optional[Tuple[str, int]] = None
117 """ OSM type and ID of the place, if such an object exists.
119 extratags: Optional[Dict[str, str]] = None
120 """ Any extra information available about the place. This is a dictionary
121 that usually contains OSM tag key-value pairs.
124 admin_level: Optional[int] = None
125 """ The administrative level of a boundary as tagged in the input data.
126 This field is only meaningful for places of the category
127 (boundary, administrative).
130 local_name: Optional[str] = None
131 """ Place holder for localization of this address part. See
132 [Localization](Result-Handling.md#localization) below.
136 def display_name(self) -> Optional[str]:
137 """ Dynamically compute the display name for the Address Line component
140 return self.local_name
141 elif 'name' in self.names:
142 return self.names['name']
144 return next(iter(self.names.values()), None)
148 class AddressLines(List[AddressLine]):
149 """ A wrapper around a list of AddressLine objects."""
152 @dataclasses.dataclass
154 """ Each entry in the list of search terms contains the
155 following detailed information.
158 """ Internal identifier for the word.
161 """ Normalised and transliterated form of the word.
162 This form is used for searching.
164 word: Optional[str] = None
165 """ Untransliterated form, if available.
169 WordInfos = Sequence[WordInfo]
172 @dataclasses.dataclass
174 """ Data class collecting information common to all
175 types of search results.
177 source_table: SourceTable
178 category: Tuple[str, str]
181 place_id: Optional[int] = None
182 osm_object: Optional[Tuple[str, int]] = None
183 parent_place_id: Optional[int] = None
184 linked_place_id: Optional[int] = None
185 admin_level: int = 15
187 locale_name: Optional[str] = None
189 names: Optional[Dict[str, str]] = None
190 address: Optional[Dict[str, str]] = None
191 extratags: Optional[Dict[str, str]] = None
193 housenumber: Optional[str] = None
194 postcode: Optional[str] = None
195 wikipedia: Optional[str] = None
197 rank_address: int = 30
198 rank_search: int = 30
199 importance: Optional[float] = None
201 country_code: Optional[str] = None
203 address_rows: Optional[AddressLines] = None
204 linked_rows: Optional[AddressLines] = None
205 parented_rows: Optional[AddressLines] = None
206 name_keywords: Optional[WordInfos] = None
207 address_keywords: Optional[WordInfos] = None
209 entrances: Optional[List[EntranceDetails]] = None
211 geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
214 def lat(self) -> float:
215 """ Get the latitude (or y) of the center point of the place.
217 return self.centroid[1]
220 def lon(self) -> float:
221 """ Get the longitude (or x) of the center point of the place.
223 return self.centroid[0]
226 def display_name(self) -> Optional[str]:
227 """ Dynamically compute the display name for the result place
228 and, if available, its address information..
230 if self.address_rows: # if this is true we need additional processing
231 label_parts: List[str] = []
233 for line in self.address_rows: # assume locale_name is set by external formatter
234 if line.isaddress and line.names:
235 address_name = line.display_name
237 if address_name and (not label_parts or label_parts[-1] != address_name):
238 label_parts.append(address_name)
241 return ', '.join(label_parts)
243 # Now adding additional information for reranking
245 return self.locale_name
246 elif self.names and 'name' in self.names:
247 return self.names['name']
249 return next(iter(self.names.values()))
250 elif self.housenumber:
251 return self.housenumber
254 def calculated_importance(self) -> float:
255 """ Get a valid importance value. This is either the stored importance
256 of the value or an artificial value computed from the place's
259 return self.importance or (0.40001 - (self.rank_search/75.0))
262 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
265 @dataclasses.dataclass
266 class DetailedResult(BaseResult):
267 """ A search result with more internal information from the database
270 indexed_date: Optional[dt.datetime] = None
273 @dataclasses.dataclass
274 class ReverseResult(BaseResult):
275 """ A search result for reverse geocoding.
277 distance: Optional[float] = None
278 bbox: Optional[Bbox] = None
281 class ReverseResults(List[ReverseResult]):
282 """ Sequence of reverse lookup results ordered by distance.
283 May be empty when no result was found.
287 @dataclasses.dataclass
288 class SearchResult(BaseResult):
289 """ A search result for forward geocoding.
291 bbox: Optional[Bbox] = None
292 accuracy: float = 0.0
295 def ranking(self) -> float:
296 """ Return the ranking, a combined measure of accuracy and importance.
298 return (self.accuracy if self.accuracy is not None else 1) \
299 - self.calculated_importance()
302 class SearchResults(List[SearchResult]):
303 """ Sequence of forward lookup results ordered by relevance.
304 May be empty when no result was found.
308 def _filter_geometries(row: SaRow) -> Dict[str, str]:
309 return {k[9:]: v for k, v in row._mapping.items()
310 if k.startswith('geometry_')}
313 def create_from_placex_row(row: Optional[SaRow],
314 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
315 """ Construct a new result and add the data from the result row
316 from the placex table. 'class_type' defines the type of result
317 to return. Returns None if the row is None.
322 return class_type(source_table=SourceTable.PLACEX,
323 place_id=row.place_id,
324 osm_object=(row.osm_type, row.osm_id),
325 category=(row.class_, row.type),
326 parent_place_id=row.parent_place_id,
327 linked_place_id=getattr(row, 'linked_place_id', None),
328 admin_level=getattr(row, 'admin_level', 15),
329 names=_mingle_name_tags(row.name),
331 extratags=row.extratags,
332 housenumber=row.housenumber,
333 postcode=row.postcode,
334 wikipedia=row.wikipedia,
335 rank_address=row.rank_address,
336 rank_search=row.rank_search,
337 importance=row.importance,
338 country_code=row.country_code,
339 centroid=Point.from_wkb(row.centroid),
340 geometry=_filter_geometries(row))
343 def create_from_osmline_row(row: Optional[SaRow],
344 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
345 """ Construct a new result and add the data from the result row
346 from the address interpolation table osmline. 'class_type' defines
347 the type of result to return. Returns None if the row is None.
349 If the row contains a housenumber, then the housenumber is filled out.
350 Otherwise the result contains the interpolation information in extratags.
355 hnr = getattr(row, 'housenumber', None)
357 res = class_type(source_table=SourceTable.OSMLINE,
358 place_id=row.place_id,
359 parent_place_id=row.parent_place_id,
360 osm_object=('W', row.osm_id),
361 category=('place', 'houses' if hnr is None else 'house'),
363 postcode=row.postcode,
364 country_code=row.country_code,
365 centroid=Point.from_wkb(row.centroid),
366 geometry=_filter_geometries(row))
369 res.extratags = {'startnumber': str(row.startnumber),
370 'endnumber': str(row.endnumber),
371 'step': str(row.step)}
373 res.housenumber = str(hnr)
378 def create_from_tiger_row(row: Optional[SaRow],
379 class_type: Type[BaseResultT],
380 osm_type: Optional[str] = None,
381 osm_id: Optional[int] = None) -> Optional[BaseResultT]:
382 """ Construct a new result and add the data from the result row
383 from the Tiger data interpolation table. 'class_type' defines
384 the type of result to return. Returns None if the row is None.
386 If the row contains a housenumber, then the housenumber is filled out.
387 Otherwise the result contains the interpolation information in extratags.
392 hnr = getattr(row, 'housenumber', None)
394 res = class_type(source_table=SourceTable.TIGER,
395 place_id=row.place_id,
396 parent_place_id=row.parent_place_id,
397 osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
398 category=('place', 'houses' if hnr is None else 'house'),
399 postcode=row.postcode,
401 centroid=Point.from_wkb(row.centroid),
402 geometry=_filter_geometries(row))
405 res.extratags = {'startnumber': str(row.startnumber),
406 'endnumber': str(row.endnumber),
407 'step': str(row.step)}
409 res.housenumber = str(hnr)
414 def create_from_postcode_row(row: Optional[SaRow],
415 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
416 """ Construct a new result and add the data from the result row
417 from the postcode table. 'class_type' defines
418 the type of result to return. Returns None if the row is None.
423 return class_type(source_table=SourceTable.POSTCODE,
424 place_id=row.place_id,
425 parent_place_id=row.parent_place_id,
426 category=('place', 'postcode'),
427 names={'ref': row.postcode},
428 rank_search=row.rank_search,
429 rank_address=row.rank_address,
430 country_code=row.country_code,
431 centroid=Point.from_wkb(row.centroid),
432 geometry=_filter_geometries(row))
435 def create_from_country_row(row: Optional[SaRow],
436 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
437 """ Construct a new result and add the data from the result row
438 from the fallback country tables. 'class_type' defines
439 the type of result to return. Returns None if the row is None.
444 return class_type(source_table=SourceTable.COUNTRY,
445 category=('place', 'country'),
446 centroid=Point.from_wkb(row.centroid),
448 rank_address=4, rank_search=4,
449 country_code=row.country_code,
450 geometry=_filter_geometries(row))
453 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
454 details: LookupDetails) -> None:
455 """ Retrieve more details from the database according to the
456 parameters specified in 'details'.
459 log().section('Query details for result')
460 if details.address_details:
461 log().comment('Query address details')
462 await complete_address_details(conn, results)
463 if details.linked_places:
464 log().comment('Query linked places')
465 for result in results:
466 await complete_linked_places(conn, result)
467 if details.parented_places:
468 log().comment('Query parent places')
469 for result in results:
470 await complete_parented_places(conn, result)
471 if details.entrances:
472 log().comment('Query entrances details')
473 await complete_entrances_details(conn, results)
475 log().comment('Query keywords')
476 for result in results:
477 await complete_keywords(conn, result)
480 def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
481 """ Create a new AddressLine from the results of a database query.
483 extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
484 if 'linked_place' in extratags:
485 extratags['place'] = extratags['linked_place']
487 names = _mingle_name_tags(row.name) or {}
488 if getattr(row, 'housenumber', None) is not None:
489 names['housenumber'] = row.housenumber
491 if isaddress is None:
492 isaddress = getattr(row, 'isaddress', True)
494 return AddressLine(place_id=row.place_id,
495 osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
496 category=(getattr(row, 'class'), row.type),
499 admin_level=row.admin_level,
500 fromarea=row.fromarea,
502 rank_address=row.rank_address,
503 distance=row.distance)
506 def _get_address_lookup_id(result: BaseResultT) -> int:
507 assert result.place_id
508 if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
509 return result.parent_place_id or result.place_id
511 return result.linked_place_id or result.place_id
514 async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
515 assert result.address_rows is not None
516 if result.category[0] not in ('boundary', 'place')\
517 or result.category[1] not in ('postal_code', 'postcode'):
518 postcode = result.postcode
519 if not postcode and result.address:
520 postcode = result.address.get('postcode')
521 if postcode and ',' not in postcode and ';' not in postcode:
522 result.address_rows.append(AddressLine(
523 category=('place', 'postcode'),
524 names={'ref': postcode},
525 fromarea=False, isaddress=True, rank_address=5,
527 if result.country_code:
528 async def _get_country_names() -> Optional[Dict[str, str]]:
529 t = conn.t.country_name
530 sql = sa.select(t.c.name, t.c.derived_name)\
531 .where(t.c.country_code == result.country_code)
532 for cres in await conn.execute(sql):
533 names = cast(Dict[str, str], cres[0])
535 names.update(cast(Dict[str, str], cres[1]))
539 country_names = await conn.get_cached_value('COUNTRY_NAME',
543 result.address_rows.append(AddressLine(
544 category=('place', 'country'),
546 fromarea=False, isaddress=True, rank_address=4,
548 result.address_rows.append(AddressLine(
549 category=('place', 'country_code'),
550 names={'ref': result.country_code}, extratags={},
551 fromarea=True, isaddress=False, rank_address=4,
555 def _setup_address_details(result: BaseResultT) -> None:
556 """ Retrieve information about places that make up the address of the result.
558 result.address_rows = AddressLines()
560 result.address_rows.append(AddressLine(
561 place_id=result.place_id,
562 osm_object=result.osm_object,
563 category=result.category,
565 extratags=result.extratags or {},
566 admin_level=result.admin_level,
567 fromarea=True, isaddress=True,
568 rank_address=result.rank_address, distance=0.0))
569 if result.source_table == SourceTable.PLACEX and result.address:
570 housenumber = result.address.get('housenumber')\
571 or result.address.get('streetnumber')\
572 or result.address.get('conscriptionnumber')
573 elif result.housenumber:
574 housenumber = result.housenumber
578 result.address_rows.append(AddressLine(
579 category=('place', 'house_number'),
580 names={'ref': housenumber},
581 fromarea=True, isaddress=True, rank_address=28, distance=0))
582 if result.address and '_unlisted_place' in result.address:
583 result.address_rows.append(AddressLine(
584 category=('place', 'locality'),
585 names={'name': result.address['_unlisted_place']},
586 fromarea=False, isaddress=True, rank_address=25, distance=0))
589 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
590 """ Retrieve information about places that make up the address of the result.
592 for result in results:
593 _setup_address_details(result)
595 # Lookup entries from place_address line
597 lookup_ids = [{'pid': r.place_id,
598 'lid': _get_address_lookup_id(r),
599 'names': list(r.address.values()) if r.address else [],
600 'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else ''}
601 for r in results if r.place_id]
606 ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
607 .table_valued(sa.column('value', type_=sa.JSON))
610 taddr = conn.t.addressline
612 sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
613 t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
614 t.c.class_, t.c.type, t.c.extratags,
615 t.c.admin_level, taddr.c.fromarea,
616 sa.case((t.c.rank_address == 11, 5),
617 else_=t.c.rank_address).label('rank_address'),
618 taddr.c.distance, t.c.country_code, t.c.postcode)\
619 .join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
620 taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
621 .join(t, taddr.c.address_place_id == t.c.place_id)\
622 .order_by('src_place_id')\
623 .order_by(sa.column('rank_address').desc())\
624 .order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
625 .order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
626 (taddr.c.isaddress, 0),
627 (sa.and_(taddr.c.fromarea,
628 t.c.geometry.ST_Contains(
629 sa.func.ST_GeomFromEWKT(
630 ltab.c.value['c'].as_string()))), 1),
632 .order_by(taddr.c.fromarea.desc())\
633 .order_by(taddr.c.distance.desc())\
634 .order_by(t.c.rank_search.desc())
636 current_result = None
637 current_rank_address = -1
638 for row in await conn.execute(sql):
639 if current_result is None or row.src_place_id != current_result.place_id:
640 current_result = next((r for r in results if r.place_id == row.src_place_id), None)
641 assert current_result is not None
642 current_rank_address = -1
644 location_isaddress = row.rank_address != current_rank_address
646 if current_result.country_code is None and row.country_code:
647 current_result.country_code = row.country_code
649 if row.type in ('postcode', 'postal_code') and location_isaddress:
650 if not row.fromarea or \
651 (current_result.address and 'postcode' in current_result.address):
652 location_isaddress = False
654 current_result.postcode = None
656 assert current_result.address_rows is not None
657 current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
658 current_rank_address = row.rank_address
660 for result in results:
661 await _finalize_entry(conn, result)
663 # Finally add the record for the parent entry where necessary.
665 parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
666 if parent_lookup_ids:
667 ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
668 .table_valued(sa.column('value', type_=sa.JSON))
669 sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
670 t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
671 t.c.class_, t.c.type, t.c.extratags,
674 .where(t.c.place_id == ltab.c.value['lid'].as_integer())
676 for row in await conn.execute(sql):
677 current_result = next((r for r in results if r.place_id == row.src_place_id), None)
678 assert current_result is not None
679 assert current_result.address_rows is not None
681 current_result.address_rows.append(AddressLine(
682 place_id=row.place_id,
683 osm_object=(row.osm_type, row.osm_id),
684 category=(row.class_, row.type),
685 names=row.name, extratags=row.extratags or {},
686 admin_level=row.admin_level,
687 fromarea=True, isaddress=True,
688 rank_address=row.rank_address, distance=0.0))
690 # Now sort everything
691 def mk_sort_key(place_id: Optional[int]) -> Callable[[AddressLine], Tuple[bool, int, bool]]:
692 return lambda a: (a.place_id != place_id, -a.rank_address, a.isaddress)
694 for result in results:
695 assert result.address_rows is not None
696 result.address_rows.sort(key=mk_sort_key(result.place_id))
699 def _placex_select_address_row(conn: SearchConnection,
700 centroid: Point) -> SaSelect:
702 return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
703 t.c.class_.label('class'), t.c.type,
704 t.c.admin_level, t.c.housenumber,
705 t.c.geometry.is_area().label('fromarea'),
707 t.c.geometry.distance_spheroid(
708 sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
711 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
712 """ Retrieve information about places that link to the result.
714 result.linked_rows = AddressLines()
715 if result.source_table != SourceTable.PLACEX:
718 sql = _placex_select_address_row(conn, result.centroid)\
719 .where(conn.t.placex.c.linked_place_id == result.place_id)
721 for row in await conn.execute(sql):
722 result.linked_rows.append(_result_row_to_address_row(row))
725 async def complete_entrances_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
726 """ Retrieve information about tagged entrances for the given results.
728 place_ids = (r.place_id for r in results if r.source_table == SourceTable.PLACEX)
730 t = conn.t.placex_entrance
731 sql = sa.select(t.c.place_id, t.c.osm_id, t.c.type, t.c.location, t.c.extratags)\
732 .where(t.c.place_id.in_(place_ids))
734 current_result = None
735 for row in await conn.execute(sql):
736 if current_result is None or row.place_id != current_result.place_id:
737 current_result = next((r for r in results if r.place_id == row.place_id), None)
738 assert current_result is not None
739 if current_result.entrances is None:
740 current_result.entrances = []
741 current_result.entrances.append(EntranceDetails(
744 location=Point.from_wkb(row.location),
745 extratags=row.extratags,
749 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
750 """ Retrieve information about the search terms used for this place.
752 Requires that the query analyzer was initialised to get access to
755 t = conn.t.search_name
756 sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
757 .where(t.c.place_id == result.place_id)
759 result.name_keywords = []
760 result.address_keywords = []
762 t = conn.t.meta.tables['word']
763 sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
765 for name_tokens, address_tokens in await conn.execute(sql):
766 for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
767 result.name_keywords.append(WordInfo(*row))
769 for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
770 result.address_keywords.append(WordInfo(*row))
773 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
774 """ Retrieve information about places that the result provides the
777 result.parented_rows = AddressLines()
778 if result.source_table != SourceTable.PLACEX:
781 sql = _placex_select_address_row(conn, result.centroid)\
782 .where(conn.t.placex.c.parent_place_id == result.place_id)\
783 .where(conn.t.placex.c.rank_search == 30)
785 for row in await conn.execute(sql):
786 result.parented_rows.append(_result_row_to_address_row(row))