1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Dataclasses for search results and helper functions to fill them.
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
15 Optional, Tuple, Dict, Sequence, TypeVar, Type, List,
22 import sqlalchemy as sa
24 from .typing import SaSelect, SaRow
25 from .sql.sqlalchemy_types import Geometry
26 from .types import Point, Bbox, LookupDetails
27 from .connection import SearchConnection
28 from .logging import log
30 # This file defines complex result data classes.
33 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
34 """ Mix-in names from linked places, so that they show up
35 as standard names where necessary.
41 for k, v in names.items():
42 if k.startswith('_place_'):
44 out[k if outkey in names else outkey] = v
51 class SourceTable(enum.Enum):
52 """ The `SourceTable` type lists the possible sources a result can have.
55 """ The placex table is the main source for result usually containing
59 """ The osmline table contains address interpolations from OSM data.
60 Interpolation addresses are always approximate. The OSM id in the
61 result refers to the OSM way with the interpolation line object.
64 """ TIGER address data contains US addresses imported on the side,
65 see [Installing TIGER data](../customize/Tiger.md).
66 TIGER address are also interpolations. The addresses always refer
67 to a street from OSM data. The OSM id in the result refers to
71 """ The postcode table contains artificial centroids for postcodes,
72 computed from the postcodes available with address points. Results
73 are always approximate.
76 """ The country table provides a fallback, when country data is missing
81 @dataclasses.dataclass
83 """ The `AddressLine` may contain the following fields about a related place
84 and its function as an address object. Most fields are optional.
85 Their presence depends on the kind and function of the address part.
87 category: Tuple[str, str]
88 """ Main category of the place, described by a key-value pair.
91 """ All available names for the place including references, alternative
92 names and translations.
95 """ If true, then the exact area of the place is known. Without area
96 information, Nominatim has to make an educated guess if an address
97 belongs to one place or another.
100 """ If true, this place should be considered for the final address display.
101 Nominatim will sometimes include more than one candidate for
102 the address in the list when it cannot reliably determine where the
103 place belongs. It will consider names of all candidates when searching
104 but when displaying the result, only the most likely candidate should
108 """ [Address rank](../customize/Ranking.md#address-rank) of the place.
111 """ Distance in degrees between the result place and this address part.
113 place_id: Optional[int] = None
114 """ Internal ID of the place.
116 osm_object: Optional[Tuple[str, int]] = None
117 """ OSM type and ID of the place, if such an object exists.
119 extratags: Optional[Dict[str, str]] = None
120 """ Any extra information available about the place. This is a dictionary
121 that usually contains OSM tag key-value pairs.
124 admin_level: Optional[int] = None
125 """ The administrative level of a boundary as tagged in the input data.
126 This field is only meaningful for places of the category
127 (boundary, administrative).
130 local_name: Optional[str] = None
131 """ Place holder for localization of this address part. See
132 [Localization](Result-Handling.md#localization) below.
136 def display_name(self) -> Optional[str]:
137 """ Dynamically compute the display name for the Address Line component
140 return self.local_name
141 elif 'name' in self.names:
142 return self.names['name']
144 return next(iter(self.names.values()), None)
148 class AddressLines(List[AddressLine]):
149 """ A wrapper around a list of AddressLine objects."""
152 @dataclasses.dataclass
154 """ Each entry in the list of search terms contains the
155 following detailed information.
158 """ Internal identifier for the word.
161 """ Normalised and transliterated form of the word.
162 This form is used for searching.
164 word: Optional[str] = None
165 """ Untransliterated form, if available.
169 WordInfos = Sequence[WordInfo]
172 @dataclasses.dataclass
174 """ Data class collecting information common to all
175 types of search results.
177 source_table: SourceTable
178 category: Tuple[str, str]
181 place_id: Optional[int] = None
182 osm_object: Optional[Tuple[str, int]] = None
183 parent_place_id: Optional[int] = None
184 linked_place_id: Optional[int] = None
185 admin_level: int = 15
187 locale_name: Optional[str] = None
189 names: Optional[Dict[str, str]] = None
190 address: Optional[Dict[str, str]] = None
191 extratags: Optional[Dict[str, str]] = None
193 housenumber: Optional[str] = None
194 postcode: Optional[str] = None
195 wikipedia: Optional[str] = None
197 rank_address: int = 30
198 rank_search: int = 30
199 importance: Optional[float] = None
201 country_code: Optional[str] = None
203 address_rows: Optional[AddressLines] = None
204 linked_rows: Optional[AddressLines] = None
205 parented_rows: Optional[AddressLines] = None
206 name_keywords: Optional[WordInfos] = None
207 address_keywords: Optional[WordInfos] = None
209 geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
212 def lat(self) -> float:
213 """ Get the latitude (or y) of the center point of the place.
215 return self.centroid[1]
218 def lon(self) -> float:
219 """ Get the longitude (or x) of the center point of the place.
221 return self.centroid[0]
224 def display_name(self) -> Optional[str]:
225 """ Dynamically compute the display name for the result place
226 and, if available, its address information..
228 if self.address_rows: # if this is true we need additional processing
229 label_parts: List[str] = []
231 for line in self.address_rows: # assume locale_name is set by external formatter
232 if line.isaddress and line.names:
233 address_name = line.display_name
235 if address_name and (not label_parts or label_parts[-1] != address_name):
236 label_parts.append(address_name)
239 return ', '.join(label_parts)
241 # Now adding additional information for reranking
243 return self.locale_name
244 elif self.names and 'name' in self.names:
245 return self.names['name']
247 return next(iter(self.names.values()))
248 elif self.housenumber:
249 return self.housenumber
252 def calculated_importance(self) -> float:
253 """ Get a valid importance value. This is either the stored importance
254 of the value or an artificial value computed from the place's
257 return self.importance or (0.40001 - (self.rank_search/75.0))
260 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
263 @dataclasses.dataclass
264 class DetailedResult(BaseResult):
265 """ A search result with more internal information from the database
268 indexed_date: Optional[dt.datetime] = None
271 @dataclasses.dataclass
272 class ReverseResult(BaseResult):
273 """ A search result for reverse geocoding.
275 distance: Optional[float] = None
276 bbox: Optional[Bbox] = None
279 class ReverseResults(List[ReverseResult]):
280 """ Sequence of reverse lookup results ordered by distance.
281 May be empty when no result was found.
285 @dataclasses.dataclass
286 class SearchResult(BaseResult):
287 """ A search result for forward geocoding.
289 bbox: Optional[Bbox] = None
290 accuracy: float = 0.0
293 def ranking(self) -> float:
294 """ Return the ranking, a combined measure of accuracy and importance.
296 return (self.accuracy if self.accuracy is not None else 1) \
297 - self.calculated_importance()
300 class SearchResults(List[SearchResult]):
301 """ Sequence of forward lookup results ordered by relevance.
302 May be empty when no result was found.
306 def _filter_geometries(row: SaRow) -> Dict[str, str]:
307 return {k[9:]: v for k, v in row._mapping.items()
308 if k.startswith('geometry_')}
311 def create_from_placex_row(row: Optional[SaRow],
312 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
313 """ Construct a new result and add the data from the result row
314 from the placex table. 'class_type' defines the type of result
315 to return. Returns None if the row is None.
320 return class_type(source_table=SourceTable.PLACEX,
321 place_id=row.place_id,
322 osm_object=(row.osm_type, row.osm_id),
323 category=(row.class_, row.type),
324 parent_place_id=row.parent_place_id,
325 linked_place_id=getattr(row, 'linked_place_id', None),
326 admin_level=getattr(row, 'admin_level', 15),
327 names=_mingle_name_tags(row.name),
329 extratags=row.extratags,
330 housenumber=row.housenumber,
331 postcode=row.postcode,
332 wikipedia=row.wikipedia,
333 rank_address=row.rank_address,
334 rank_search=row.rank_search,
335 importance=row.importance,
336 country_code=row.country_code,
337 centroid=Point.from_wkb(row.centroid),
338 geometry=_filter_geometries(row))
341 def create_from_osmline_row(row: Optional[SaRow],
342 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
343 """ Construct a new result and add the data from the result row
344 from the address interpolation table osmline. 'class_type' defines
345 the type of result to return. Returns None if the row is None.
347 If the row contains a housenumber, then the housenumber is filled out.
348 Otherwise the result contains the interpolation information in extratags.
353 hnr = getattr(row, 'housenumber', None)
355 res = class_type(source_table=SourceTable.OSMLINE,
356 place_id=row.place_id,
357 parent_place_id=row.parent_place_id,
358 osm_object=('W', row.osm_id),
359 category=('place', 'houses' if hnr is None else 'house'),
361 postcode=row.postcode,
362 country_code=row.country_code,
363 centroid=Point.from_wkb(row.centroid),
364 geometry=_filter_geometries(row))
367 res.extratags = {'startnumber': str(row.startnumber),
368 'endnumber': str(row.endnumber),
369 'step': str(row.step)}
371 res.housenumber = str(hnr)
376 def create_from_tiger_row(row: Optional[SaRow],
377 class_type: Type[BaseResultT],
378 osm_type: Optional[str] = None,
379 osm_id: Optional[int] = None) -> Optional[BaseResultT]:
380 """ Construct a new result and add the data from the result row
381 from the Tiger data interpolation table. 'class_type' defines
382 the type of result to return. Returns None if the row is None.
384 If the row contains a housenumber, then the housenumber is filled out.
385 Otherwise the result contains the interpolation information in extratags.
390 hnr = getattr(row, 'housenumber', None)
392 res = class_type(source_table=SourceTable.TIGER,
393 place_id=row.place_id,
394 parent_place_id=row.parent_place_id,
395 osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
396 category=('place', 'houses' if hnr is None else 'house'),
397 postcode=row.postcode,
399 centroid=Point.from_wkb(row.centroid),
400 geometry=_filter_geometries(row))
403 res.extratags = {'startnumber': str(row.startnumber),
404 'endnumber': str(row.endnumber),
405 'step': str(row.step)}
407 res.housenumber = str(hnr)
412 def create_from_postcode_row(row: Optional[SaRow],
413 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
414 """ Construct a new result and add the data from the result row
415 from the postcode table. 'class_type' defines
416 the type of result to return. Returns None if the row is None.
421 return class_type(source_table=SourceTable.POSTCODE,
422 place_id=row.place_id,
423 parent_place_id=row.parent_place_id,
424 category=('place', 'postcode'),
425 names={'ref': row.postcode},
426 rank_search=row.rank_search,
427 rank_address=row.rank_address,
428 country_code=row.country_code,
429 centroid=Point.from_wkb(row.centroid),
430 geometry=_filter_geometries(row))
433 def create_from_country_row(row: Optional[SaRow],
434 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
435 """ Construct a new result and add the data from the result row
436 from the fallback country tables. 'class_type' defines
437 the type of result to return. Returns None if the row is None.
442 return class_type(source_table=SourceTable.COUNTRY,
443 category=('place', 'country'),
444 centroid=Point.from_wkb(row.centroid),
446 rank_address=4, rank_search=4,
447 country_code=row.country_code,
448 geometry=_filter_geometries(row))
451 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
452 details: LookupDetails) -> None:
453 """ Retrieve more details from the database according to the
454 parameters specified in 'details'.
457 log().section('Query details for result')
458 if details.address_details:
459 log().comment('Query address details')
460 await complete_address_details(conn, results)
461 if details.linked_places:
462 log().comment('Query linked places')
463 for result in results:
464 await complete_linked_places(conn, result)
465 if details.parented_places:
466 log().comment('Query parent places')
467 for result in results:
468 await complete_parented_places(conn, result)
470 log().comment('Query keywords')
471 for result in results:
472 await complete_keywords(conn, result)
475 def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
476 """ Create a new AddressLine from the results of a database query.
478 extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
479 if 'linked_place' in extratags:
480 extratags['place'] = extratags['linked_place']
482 names = _mingle_name_tags(row.name) or {}
483 if getattr(row, 'housenumber', None) is not None:
484 names['housenumber'] = row.housenumber
486 if isaddress is None:
487 isaddress = getattr(row, 'isaddress', True)
489 return AddressLine(place_id=row.place_id,
490 osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
491 category=(getattr(row, 'class'), row.type),
494 admin_level=row.admin_level,
495 fromarea=row.fromarea,
497 rank_address=row.rank_address,
498 distance=row.distance)
501 def _get_address_lookup_id(result: BaseResultT) -> int:
502 assert result.place_id
503 if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
504 return result.parent_place_id or result.place_id
506 return result.linked_place_id or result.place_id
509 async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
510 assert result.address_rows is not None
511 if result.category[0] not in ('boundary', 'place')\
512 or result.category[1] not in ('postal_code', 'postcode'):
513 postcode = result.postcode
514 if not postcode and result.address:
515 postcode = result.address.get('postcode')
516 if postcode and ',' not in postcode and ';' not in postcode:
517 result.address_rows.append(AddressLine(
518 category=('place', 'postcode'),
519 names={'ref': postcode},
520 fromarea=False, isaddress=True, rank_address=5,
522 if result.country_code:
523 async def _get_country_names() -> Optional[Dict[str, str]]:
524 t = conn.t.country_name
525 sql = sa.select(t.c.name, t.c.derived_name)\
526 .where(t.c.country_code == result.country_code)
527 for cres in await conn.execute(sql):
528 names = cast(Dict[str, str], cres[0])
530 names.update(cast(Dict[str, str], cres[1]))
534 country_names = await conn.get_cached_value('COUNTRY_NAME',
538 result.address_rows.append(AddressLine(
539 category=('place', 'country'),
541 fromarea=False, isaddress=True, rank_address=4,
543 result.address_rows.append(AddressLine(
544 category=('place', 'country_code'),
545 names={'ref': result.country_code}, extratags={},
546 fromarea=True, isaddress=False, rank_address=4,
550 def _setup_address_details(result: BaseResultT) -> None:
551 """ Retrieve information about places that make up the address of the result.
553 result.address_rows = AddressLines()
555 result.address_rows.append(AddressLine(
556 place_id=result.place_id,
557 osm_object=result.osm_object,
558 category=result.category,
560 extratags=result.extratags or {},
561 admin_level=result.admin_level,
562 fromarea=True, isaddress=True,
563 rank_address=result.rank_address, distance=0.0))
564 if result.source_table == SourceTable.PLACEX and result.address:
565 housenumber = result.address.get('housenumber')\
566 or result.address.get('streetnumber')\
567 or result.address.get('conscriptionnumber')
568 elif result.housenumber:
569 housenumber = result.housenumber
573 result.address_rows.append(AddressLine(
574 category=('place', 'house_number'),
575 names={'ref': housenumber},
576 fromarea=True, isaddress=True, rank_address=28, distance=0))
577 if result.address and '_unlisted_place' in result.address:
578 result.address_rows.append(AddressLine(
579 category=('place', 'locality'),
580 names={'name': result.address['_unlisted_place']},
581 fromarea=False, isaddress=True, rank_address=25, distance=0))
584 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
585 """ Retrieve information about places that make up the address of the result.
587 for result in results:
588 _setup_address_details(result)
590 # Lookup entries from place_address line
592 lookup_ids = [{'pid': r.place_id,
593 'lid': _get_address_lookup_id(r),
594 'names': list(r.address.values()) if r.address else [],
595 'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else ''}
596 for r in results if r.place_id]
601 ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
602 .table_valued(sa.column('value', type_=sa.JSON))
605 taddr = conn.t.addressline
607 sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
608 t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
609 t.c.class_, t.c.type, t.c.extratags,
610 t.c.admin_level, taddr.c.fromarea,
611 sa.case((t.c.rank_address == 11, 5),
612 else_=t.c.rank_address).label('rank_address'),
613 taddr.c.distance, t.c.country_code, t.c.postcode)\
614 .join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
615 taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
616 .join(t, taddr.c.address_place_id == t.c.place_id)\
617 .order_by('src_place_id')\
618 .order_by(sa.column('rank_address').desc())\
619 .order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
620 .order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
621 (taddr.c.isaddress, 0),
622 (sa.and_(taddr.c.fromarea,
623 t.c.geometry.ST_Contains(
624 sa.func.ST_GeomFromEWKT(
625 ltab.c.value['c'].as_string()))), 1),
627 .order_by(taddr.c.fromarea.desc())\
628 .order_by(taddr.c.distance.desc())\
629 .order_by(t.c.rank_search.desc())
631 current_result = None
632 current_rank_address = -1
633 for row in await conn.execute(sql):
634 if current_result is None or row.src_place_id != current_result.place_id:
635 current_result = next((r for r in results if r.place_id == row.src_place_id), None)
636 assert current_result is not None
637 current_rank_address = -1
639 location_isaddress = row.rank_address != current_rank_address
641 if current_result.country_code is None and row.country_code:
642 current_result.country_code = row.country_code
644 if row.type in ('postcode', 'postal_code') and location_isaddress:
645 if not row.fromarea or \
646 (current_result.address and 'postcode' in current_result.address):
647 location_isaddress = False
649 current_result.postcode = None
651 assert current_result.address_rows is not None
652 current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
653 current_rank_address = row.rank_address
655 for result in results:
656 await _finalize_entry(conn, result)
658 # Finally add the record for the parent entry where necessary.
660 parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
661 if parent_lookup_ids:
662 ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
663 .table_valued(sa.column('value', type_=sa.JSON))
664 sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
665 t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
666 t.c.class_, t.c.type, t.c.extratags,
669 .where(t.c.place_id == ltab.c.value['lid'].as_integer())
671 for row in await conn.execute(sql):
672 current_result = next((r for r in results if r.place_id == row.src_place_id), None)
673 assert current_result is not None
674 assert current_result.address_rows is not None
676 current_result.address_rows.append(AddressLine(
677 place_id=row.place_id,
678 osm_object=(row.osm_type, row.osm_id),
679 category=(row.class_, row.type),
680 names=row.name, extratags=row.extratags or {},
681 admin_level=row.admin_level,
682 fromarea=True, isaddress=True,
683 rank_address=row.rank_address, distance=0.0))
685 # Now sort everything
686 def mk_sort_key(place_id: Optional[int]) -> Callable[[AddressLine], Tuple[bool, int, bool]]:
687 return lambda a: (a.place_id != place_id, -a.rank_address, a.isaddress)
689 for result in results:
690 assert result.address_rows is not None
691 result.address_rows.sort(key=mk_sort_key(result.place_id))
694 def _placex_select_address_row(conn: SearchConnection,
695 centroid: Point) -> SaSelect:
697 return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
698 t.c.class_.label('class'), t.c.type,
699 t.c.admin_level, t.c.housenumber,
700 t.c.geometry.is_area().label('fromarea'),
702 t.c.geometry.distance_spheroid(
703 sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
706 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
707 """ Retrieve information about places that link to the result.
709 result.linked_rows = AddressLines()
710 if result.source_table != SourceTable.PLACEX:
713 sql = _placex_select_address_row(conn, result.centroid)\
714 .where(conn.t.placex.c.linked_place_id == result.place_id)
716 for row in await conn.execute(sql):
717 result.linked_rows.append(_result_row_to_address_row(row))
720 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
721 """ Retrieve information about the search terms used for this place.
723 Requires that the query analyzer was initialised to get access to
726 t = conn.t.search_name
727 sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
728 .where(t.c.place_id == result.place_id)
730 result.name_keywords = []
731 result.address_keywords = []
733 t = conn.t.meta.tables['word']
734 sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
736 for name_tokens, address_tokens in await conn.execute(sql):
737 for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
738 result.name_keywords.append(WordInfo(*row))
740 for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
741 result.address_keywords.append(WordInfo(*row))
744 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
745 """ Retrieve information about places that the result provides the
748 result.parented_rows = AddressLines()
749 if result.source_table != SourceTable.PLACEX:
752 sql = _placex_select_address_row(conn, result.centroid)\
753 .where(conn.t.placex.c.parent_place_id == result.place_id)\
754 .where(conn.t.placex.c.rank_search == 30)
756 for row in await conn.execute(sql):
757 result.parented_rows.append(_result_row_to_address_row(row))