1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Dataclasses for search results and helper functions to fill them.
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
15 Optional, Tuple, Dict, Sequence, TypeVar, Type, List,
22 import sqlalchemy as sa
24 from .typing import SaSelect, SaRow
25 from .sql.sqlalchemy_types import Geometry
26 from .types import Point, Bbox, LookupDetails, EntranceDetails
27 from .connection import SearchConnection
28 from .logging import log
30 # This file defines complex result data classes.
33 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
34 """ Mix-in names from linked places, so that they show up
35 as standard names where necessary.
41 for k, v in names.items():
42 if k.startswith('_place_'):
44 out[k if outkey in names else outkey] = v
51 class SourceTable(enum.Enum):
52 """ The `SourceTable` type lists the possible sources a result can have.
55 """ The placex table is the main source for result usually containing
59 """ The osmline table contains address interpolations from OSM data.
60 Interpolation addresses are always approximate. The OSM id in the
61 result refers to the OSM way with the interpolation line object.
64 """ TIGER address data contains US addresses imported on the side,
65 see [Installing TIGER data](../customize/Tiger.md).
66 TIGER address are also interpolations. The addresses always refer
67 to a street from OSM data. The OSM id in the result refers to
71 """ The postcode table contains artificial centroids for postcodes,
72 computed from the postcodes available with address points. Results
73 are always approximate.
76 """ The country table provides a fallback, when country data is missing
81 @dataclasses.dataclass
83 """ The `AddressLine` may contain the following fields about a related place
84 and its function as an address object. Most fields are optional.
85 Their presence depends on the kind and function of the address part.
87 category: Tuple[str, str]
88 """ Main category of the place, described by a key-value pair.
91 """ All available names for the place including references, alternative
92 names and translations.
95 """ If true, then the exact area of the place is known. Without area
96 information, Nominatim has to make an educated guess if an address
97 belongs to one place or another.
100 """ If true, this place should be considered for the final address display.
101 Nominatim will sometimes include more than one candidate for
102 the address in the list when it cannot reliably determine where the
103 place belongs. It will consider names of all candidates when searching
104 but when displaying the result, only the most likely candidate should
108 """ [Address rank](../customize/Ranking.md#address-rank) of the place.
111 """ Distance in degrees between the result place and this address part.
113 place_id: Optional[int] = None
114 """ Internal ID of the place.
116 osm_object: Optional[Tuple[str, int]] = None
117 """ OSM type and ID of the place, if such an object exists.
119 extratags: Optional[Dict[str, str]] = None
120 """ Any extra information available about the place. This is a dictionary
121 that usually contains OSM tag key-value pairs.
124 admin_level: Optional[int] = None
125 """ The administrative level of a boundary as tagged in the input data.
126 This field is only meaningful for places of the category
127 (boundary, administrative).
130 local_name: Optional[str] = None
131 """ Place holder for localization of this address part. See
132 [Localization](Result-Handling.md#localization) below.
136 def display_name(self) -> Optional[str]:
137 """ Dynamically compute the display name for the Address Line component
140 return self.local_name
141 elif 'name' in self.names:
142 return self.names['name']
144 return next(iter(self.names.values()), None)
148 class AddressLines(List[AddressLine]):
149 """ A wrapper around a list of AddressLine objects."""
152 @dataclasses.dataclass
154 """ Each entry in the list of search terms contains the
155 following detailed information.
158 """ Internal identifier for the word.
161 """ Normalised and transliterated form of the word.
162 This form is used for searching.
164 word: Optional[str] = None
165 """ Untransliterated form, if available.
169 WordInfos = Sequence[WordInfo]
172 @dataclasses.dataclass
174 """ Data class collecting information common to all
175 types of search results.
177 source_table: SourceTable
178 category: Tuple[str, str]
181 place_id: Optional[int] = None
182 osm_object: Optional[Tuple[str, int]] = None
183 parent_place_id: Optional[int] = None
184 linked_place_id: Optional[int] = None
185 admin_level: int = 15
187 locale_name: Optional[str] = None
189 names: Optional[Dict[str, str]] = None
190 address: Optional[Dict[str, str]] = None
191 extratags: Optional[Dict[str, str]] = None
193 housenumber: Optional[str] = None
194 postcode: Optional[str] = None
195 wikipedia: Optional[str] = None
197 rank_address: int = 30
198 rank_search: int = 30
199 importance: Optional[float] = None
201 country_code: Optional[str] = None
203 address_rows: Optional[AddressLines] = None
204 linked_rows: Optional[AddressLines] = None
205 parented_rows: Optional[AddressLines] = None
206 name_keywords: Optional[WordInfos] = None
207 address_keywords: Optional[WordInfos] = None
209 entrances: Optional[List[EntranceDetails]] = None
211 geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
214 def lat(self) -> float:
215 """ Get the latitude (or y) of the center point of the place.
217 return self.centroid[1]
220 def lon(self) -> float:
221 """ Get the longitude (or x) of the center point of the place.
223 return self.centroid[0]
226 def display_name(self) -> Optional[str]:
227 """ Dynamically compute the display name for the result place
228 and, if available, its address information..
230 if self.address_rows: # if this is true we need additional processing
231 label_parts: List[str] = []
233 for line in self.address_rows: # assume locale_name is set by external formatter
234 if line.isaddress and line.names:
235 address_name = line.display_name
237 if address_name and (not label_parts or label_parts[-1] != address_name):
238 label_parts.append(address_name)
241 return ', '.join(label_parts)
243 # Now adding additional information for reranking
245 return self.locale_name
246 elif self.names and 'name' in self.names:
247 return self.names['name']
249 return next(iter(self.names.values()))
250 elif self.housenumber:
251 return self.housenumber
254 def calculated_importance(self) -> float:
255 """ Get a valid importance value. This is either the stored importance
256 of the value or an artificial value computed from the place's
259 return self.importance or (0.40001 - (self.rank_search/75.0))
262 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
265 @dataclasses.dataclass
266 class DetailedResult(BaseResult):
267 """ A search result with more internal information from the database
270 indexed_date: Optional[dt.datetime] = None
273 @dataclasses.dataclass
274 class ReverseResult(BaseResult):
275 """ A search result for reverse geocoding.
277 distance: Optional[float] = None
278 bbox: Optional[Bbox] = None
281 class ReverseResults(List[ReverseResult]):
282 """ Sequence of reverse lookup results ordered by distance.
283 May be empty when no result was found.
287 @dataclasses.dataclass
288 class SearchResult(BaseResult):
289 """ A search result for forward geocoding.
291 bbox: Optional[Bbox] = None
292 accuracy: float = 0.0
295 def ranking(self) -> float:
296 """ Return the ranking, a combined measure of accuracy and importance.
298 return (self.accuracy if self.accuracy is not None else 1) \
299 - self.calculated_importance()
302 class SearchResults(List[SearchResult]):
303 """ Sequence of forward lookup results ordered by relevance.
304 May be empty when no result was found.
308 def _filter_geometries(row: SaRow) -> Dict[str, str]:
309 return {k[9:]: v for k, v in row._mapping.items()
310 if k.startswith('geometry_')}
313 def create_from_placex_row(row: Optional[SaRow],
314 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
315 """ Construct a new result and add the data from the result row
316 from the placex table. 'class_type' defines the type of result
317 to return. Returns None if the row is None.
322 return class_type(source_table=SourceTable.PLACEX,
323 place_id=row.place_id,
324 osm_object=(row.osm_type, row.osm_id),
325 category=(row.class_, row.type),
326 parent_place_id=row.parent_place_id,
327 linked_place_id=getattr(row, 'linked_place_id', None),
328 admin_level=getattr(row, 'admin_level', 15),
329 names=_mingle_name_tags(row.name),
331 extratags=row.extratags,
332 housenumber=row.housenumber,
333 postcode=row.postcode,
334 wikipedia=row.wikipedia,
335 rank_address=row.rank_address,
336 rank_search=row.rank_search,
337 importance=row.importance,
338 country_code=row.country_code,
339 centroid=Point.from_wkb(row.centroid),
340 geometry=_filter_geometries(row))
343 def create_from_osmline_row(row: Optional[SaRow],
344 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
345 """ Construct a new result and add the data from the result row
346 from the address interpolation table osmline. 'class_type' defines
347 the type of result to return. Returns None if the row is None.
349 If the row contains a housenumber, then the housenumber is filled out.
350 Otherwise the result contains the interpolation information in extratags.
355 hnr = getattr(row, 'housenumber', None)
357 res = class_type(source_table=SourceTable.OSMLINE,
358 place_id=row.place_id,
359 parent_place_id=row.parent_place_id,
360 osm_object=('W', row.osm_id),
361 category=('place', 'houses' if hnr is None else 'house'),
363 postcode=row.postcode,
364 country_code=row.country_code,
365 centroid=Point.from_wkb(row.centroid),
366 geometry=_filter_geometries(row))
369 res.extratags = {'startnumber': str(row.startnumber),
370 'endnumber': str(row.endnumber),
371 'step': str(row.step)}
373 res.housenumber = str(hnr)
378 def create_from_tiger_row(row: Optional[SaRow],
379 class_type: Type[BaseResultT],
380 osm_type: Optional[str] = None,
381 osm_id: Optional[int] = None) -> Optional[BaseResultT]:
382 """ Construct a new result and add the data from the result row
383 from the Tiger data interpolation table. 'class_type' defines
384 the type of result to return. Returns None if the row is None.
386 If the row contains a housenumber, then the housenumber is filled out.
387 Otherwise the result contains the interpolation information in extratags.
392 hnr = getattr(row, 'housenumber', None)
394 res = class_type(source_table=SourceTable.TIGER,
395 place_id=row.place_id,
396 parent_place_id=row.parent_place_id,
397 osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
398 category=('place', 'houses' if hnr is None else 'house'),
399 postcode=row.postcode,
401 centroid=Point.from_wkb(row.centroid),
402 geometry=_filter_geometries(row))
405 res.extratags = {'startnumber': str(row.startnumber),
406 'endnumber': str(row.endnumber),
407 'step': str(row.step)}
409 res.housenumber = str(hnr)
414 def create_from_postcode_row(row: Optional[SaRow],
415 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
416 """ Construct a new result and add the data from the result row
417 from the postcode table. 'class_type' defines
418 the type of result to return. Returns None if the row is None.
423 return class_type(source_table=SourceTable.POSTCODE,
424 place_id=row.place_id,
425 parent_place_id=row.parent_place_id,
426 category=('place', 'postcode'),
427 names={'ref': row.postcode},
428 rank_search=row.rank_search,
429 rank_address=row.rank_address,
430 country_code=row.country_code,
431 centroid=Point.from_wkb(row.centroid),
432 geometry=_filter_geometries(row))
435 def create_from_country_row(row: Optional[SaRow],
436 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
437 """ Construct a new result and add the data from the result row
438 from the fallback country tables. 'class_type' defines
439 the type of result to return. Returns None if the row is None.
444 return class_type(source_table=SourceTable.COUNTRY,
445 category=('place', 'country'),
446 centroid=Point.from_wkb(row.centroid),
448 rank_address=4, rank_search=4,
449 country_code=row.country_code,
450 geometry=_filter_geometries(row))
453 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
454 details: LookupDetails) -> None:
455 """ Retrieve more details from the database according to the
456 parameters specified in 'details'.
459 log().section('Query details for result')
460 if details.address_details:
461 log().comment('Query address details')
462 await complete_address_details(conn, results)
463 if details.linked_places:
464 log().comment('Query linked places')
465 for result in results:
466 await complete_linked_places(conn, result)
467 if details.parented_places:
468 log().comment('Query parent places')
469 for result in results:
470 await complete_parented_places(conn, result)
471 if details.entrances:
472 log().comment('Query entrances details')
473 for result in results:
474 await complete_entrances_details(conn, result)
476 log().comment('Query keywords')
477 for result in results:
478 await complete_keywords(conn, result)
481 def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
482 """ Create a new AddressLine from the results of a database query.
484 extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
485 if 'linked_place' in extratags:
486 extratags['place'] = extratags['linked_place']
488 names = _mingle_name_tags(row.name) or {}
489 if getattr(row, 'housenumber', None) is not None:
490 names['housenumber'] = row.housenumber
492 if isaddress is None:
493 isaddress = getattr(row, 'isaddress', True)
495 return AddressLine(place_id=row.place_id,
496 osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
497 category=(getattr(row, 'class'), row.type),
500 admin_level=row.admin_level,
501 fromarea=row.fromarea,
503 rank_address=row.rank_address,
504 distance=row.distance)
507 def _get_address_lookup_id(result: BaseResultT) -> int:
508 assert result.place_id
509 if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
510 return result.parent_place_id or result.place_id
512 return result.linked_place_id or result.place_id
515 async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
516 assert result.address_rows is not None
517 if result.category[0] not in ('boundary', 'place')\
518 or result.category[1] not in ('postal_code', 'postcode'):
519 postcode = result.postcode
520 if not postcode and result.address:
521 postcode = result.address.get('postcode')
522 if postcode and ',' not in postcode and ';' not in postcode:
523 result.address_rows.append(AddressLine(
524 category=('place', 'postcode'),
525 names={'ref': postcode},
526 fromarea=False, isaddress=True, rank_address=5,
528 if result.country_code:
529 async def _get_country_names() -> Optional[Dict[str, str]]:
530 t = conn.t.country_name
531 sql = sa.select(t.c.name, t.c.derived_name)\
532 .where(t.c.country_code == result.country_code)
533 for cres in await conn.execute(sql):
534 names = cast(Dict[str, str], cres[0])
536 names.update(cast(Dict[str, str], cres[1]))
540 country_names = await conn.get_cached_value('COUNTRY_NAME',
544 result.address_rows.append(AddressLine(
545 category=('place', 'country'),
547 fromarea=False, isaddress=True, rank_address=4,
549 result.address_rows.append(AddressLine(
550 category=('place', 'country_code'),
551 names={'ref': result.country_code}, extratags={},
552 fromarea=True, isaddress=False, rank_address=4,
556 def _setup_address_details(result: BaseResultT) -> None:
557 """ Retrieve information about places that make up the address of the result.
559 result.address_rows = AddressLines()
561 result.address_rows.append(AddressLine(
562 place_id=result.place_id,
563 osm_object=result.osm_object,
564 category=result.category,
566 extratags=result.extratags or {},
567 admin_level=result.admin_level,
568 fromarea=True, isaddress=True,
569 rank_address=result.rank_address, distance=0.0))
570 if result.source_table == SourceTable.PLACEX and result.address:
571 housenumber = result.address.get('housenumber')\
572 or result.address.get('streetnumber')\
573 or result.address.get('conscriptionnumber')
574 elif result.housenumber:
575 housenumber = result.housenumber
579 result.address_rows.append(AddressLine(
580 category=('place', 'house_number'),
581 names={'ref': housenumber},
582 fromarea=True, isaddress=True, rank_address=28, distance=0))
583 if result.address and '_unlisted_place' in result.address:
584 result.address_rows.append(AddressLine(
585 category=('place', 'locality'),
586 names={'name': result.address['_unlisted_place']},
587 fromarea=False, isaddress=True, rank_address=25, distance=0))
590 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
591 """ Retrieve information about places that make up the address of the result.
593 for result in results:
594 _setup_address_details(result)
596 # Lookup entries from place_address line
598 lookup_ids = [{'pid': r.place_id,
599 'lid': _get_address_lookup_id(r),
600 'names': list(r.address.values()) if r.address else [],
601 'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else ''}
602 for r in results if r.place_id]
607 ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
608 .table_valued(sa.column('value', type_=sa.JSON))
611 taddr = conn.t.addressline
613 sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
614 t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
615 t.c.class_, t.c.type, t.c.extratags,
616 t.c.admin_level, taddr.c.fromarea,
617 sa.case((t.c.rank_address == 11, 5),
618 else_=t.c.rank_address).label('rank_address'),
619 taddr.c.distance, t.c.country_code, t.c.postcode)\
620 .join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
621 taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
622 .join(t, taddr.c.address_place_id == t.c.place_id)\
623 .order_by('src_place_id')\
624 .order_by(sa.column('rank_address').desc())\
625 .order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
626 .order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
627 (taddr.c.isaddress, 0),
628 (sa.and_(taddr.c.fromarea,
629 t.c.geometry.ST_Contains(
630 sa.func.ST_GeomFromEWKT(
631 ltab.c.value['c'].as_string()))), 1),
633 .order_by(taddr.c.fromarea.desc())\
634 .order_by(taddr.c.distance.desc())\
635 .order_by(t.c.rank_search.desc())
637 current_result = None
638 current_rank_address = -1
639 for row in await conn.execute(sql):
640 if current_result is None or row.src_place_id != current_result.place_id:
641 current_result = next((r for r in results if r.place_id == row.src_place_id), None)
642 assert current_result is not None
643 current_rank_address = -1
645 location_isaddress = row.rank_address != current_rank_address
647 if current_result.country_code is None and row.country_code:
648 current_result.country_code = row.country_code
650 if row.type in ('postcode', 'postal_code') and location_isaddress:
651 if not row.fromarea or \
652 (current_result.address and 'postcode' in current_result.address):
653 location_isaddress = False
655 current_result.postcode = None
657 assert current_result.address_rows is not None
658 current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
659 current_rank_address = row.rank_address
661 for result in results:
662 await _finalize_entry(conn, result)
664 # Finally add the record for the parent entry where necessary.
666 parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
667 if parent_lookup_ids:
668 ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
669 .table_valued(sa.column('value', type_=sa.JSON))
670 sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
671 t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
672 t.c.class_, t.c.type, t.c.extratags,
675 .where(t.c.place_id == ltab.c.value['lid'].as_integer())
677 for row in await conn.execute(sql):
678 current_result = next((r for r in results if r.place_id == row.src_place_id), None)
679 assert current_result is not None
680 assert current_result.address_rows is not None
682 current_result.address_rows.append(AddressLine(
683 place_id=row.place_id,
684 osm_object=(row.osm_type, row.osm_id),
685 category=(row.class_, row.type),
686 names=row.name, extratags=row.extratags or {},
687 admin_level=row.admin_level,
688 fromarea=True, isaddress=True,
689 rank_address=row.rank_address, distance=0.0))
691 # Now sort everything
692 def mk_sort_key(place_id: Optional[int]) -> Callable[[AddressLine], Tuple[bool, int, bool]]:
693 return lambda a: (a.place_id != place_id, -a.rank_address, a.isaddress)
695 for result in results:
696 assert result.address_rows is not None
697 result.address_rows.sort(key=mk_sort_key(result.place_id))
700 def _placex_select_address_row(conn: SearchConnection,
701 centroid: Point) -> SaSelect:
703 return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
704 t.c.class_.label('class'), t.c.type,
705 t.c.admin_level, t.c.housenumber,
706 t.c.geometry.is_area().label('fromarea'),
708 t.c.geometry.distance_spheroid(
709 sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
712 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
713 """ Retrieve information about places that link to the result.
715 result.linked_rows = AddressLines()
716 if result.source_table != SourceTable.PLACEX:
719 sql = _placex_select_address_row(conn, result.centroid)\
720 .where(conn.t.placex.c.linked_place_id == result.place_id)
722 for row in await conn.execute(sql):
723 result.linked_rows.append(_result_row_to_address_row(row))
726 async def complete_entrances_details(conn: SearchConnection, result: BaseResult) -> None:
727 """ Retrieve information about tagged entrances for this place.
729 if result.source_table != SourceTable.PLACEX:
732 t = conn.t.place_entrance
733 sql = sa.select(t.c.entrances).where(t.c.place_id == result.place_id)
735 for results in await conn.execute(sql):
736 result.entrances = [EntranceDetails(**r) for r in results[0]]
739 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
740 """ Retrieve information about the search terms used for this place.
742 Requires that the query analyzer was initialised to get access to
745 t = conn.t.search_name
746 sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
747 .where(t.c.place_id == result.place_id)
749 result.name_keywords = []
750 result.address_keywords = []
752 t = conn.t.meta.tables['word']
753 sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
755 for name_tokens, address_tokens in await conn.execute(sql):
756 for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
757 result.name_keywords.append(WordInfo(*row))
759 for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
760 result.address_keywords.append(WordInfo(*row))
763 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
764 """ Retrieve information about places that the result provides the
767 result.parented_rows = AddressLines()
768 if result.source_table != SourceTable.PLACEX:
771 sql = _placex_select_address_row(conn, result.centroid)\
772 .where(conn.t.placex.c.parent_place_id == result.place_id)\
773 .where(conn.t.placex.c.rank_search == 30)
775 for row in await conn.execute(sql):
776 result.parented_rows.append(_result_row_to_address_row(row))