nominatim/api/search/geocoder.py

   1 # SPDX-License-Identifier: GPL-3.0-or-later
   2 #
   3 # This file is part of Nominatim. (https://nominatim.org)
   4 #
   5 # Copyright (C) 2023 by the Nominatim developer community.
   6 # For a full list of authors see the git log.
   7 """
   8 Public interface to the search code.
   9 """
  10 from typing import List, Any, Optional, Iterator, Tuple
  11 import itertools
  12
  13 from nominatim.api.connection import SearchConnection
  14 from nominatim.api.types import SearchDetails
  15 from nominatim.api.results import SearchResults, add_result_details
  16 from nominatim.api.search.token_assignment import yield_token_assignments
  17 from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
  18 from nominatim.api.search.db_searches import AbstractSearch
  19 from nominatim.api.search.query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
  20 from nominatim.api.search.query import Phrase, QueryStruct
  21 from nominatim.api.logging import log
  22
  23 class ForwardGeocoder:
  24     """ Main class responsible for place search.
  25     """
  26
  27     def __init__(self, conn: SearchConnection, params: SearchDetails) -> None:
  28         self.conn = conn
  29         self.params = params
  30         self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
  31
  32
  33     @property
  34     def limit(self) -> int:
  35         """ Return the configured maximum number of search results.
  36         """
  37         return self.params.max_results
  38
  39
  40     async def build_searches(self,
  41                              phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
  42         """ Analyse the query and return the tokenized query and list of
  43             possible searches over it.
  44         """
  45         if self.query_analyzer is None:
  46             self.query_analyzer = await make_query_analyzer(self.conn)
  47
  48         query = await self.query_analyzer.analyze_query(phrases)
  49
  50         searches: List[AbstractSearch] = []
  51         if query.num_token_slots() > 0:
  52             # 2. Compute all possible search interpretations
  53             log().section('Compute abstract searches')
  54             search_builder = SearchBuilder(query, self.params)
  55             num_searches = 0
  56             for assignment in yield_token_assignments(query):
  57                 searches.extend(search_builder.build(assignment))
  58                 if num_searches < len(searches):
  59                     log().table_dump('Searches for assignment',
  60                                      _dump_searches(searches, query, num_searches))
  61                 num_searches = len(searches)
  62             searches.sort(key=lambda s: s.penalty)
  63
  64         return query, searches
  65
  66
  67     async def execute_searches(self, query: QueryStruct,
  68                                searches: List[AbstractSearch]) -> SearchResults:
  69         """ Run the abstract searches against the database until a result
  70             is found.
  71         """
  72         log().section('Execute database searches')
  73         results = SearchResults()
  74
  75         num_results = 0
  76         min_ranking = 1000.0
  77         prev_penalty = 0.0
  78         for i, search in enumerate(searches):
  79             if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
  80                 break
  81             log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
  82             for result in await search.lookup(self.conn, self.params):
  83                 results.append(result)
  84                 min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
  85             log().result_dump('Results', ((r.accuracy, r) for r in results[num_results:]))
  86             num_results = len(results)
  87             prev_penalty = search.penalty
  88
  89         if results:
  90             min_ranking = min(r.ranking for r in results)
  91             results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
  92
  93         if results:
  94             min_rank = min(r.rank_search for r in results)
  95
  96             results = SearchResults(r for r in results
  97                                     if r.ranking + 0.05 * (r.rank_search - min_rank)
  98                                        < min_ranking + 0.5)
  99
 100             results.sort(key=lambda r: r.accuracy - r.calculated_importance())
 101             results = SearchResults(results[:self.limit])
 102
 103         return results
 104
 105
 106     async def lookup_pois(self, categories: List[Tuple[str, str]],
 107                           phrases: List[Phrase]) -> SearchResults:
 108         """ Look up places by category. If phrase is given, a place search
 109             over the phrase will be executed first and places close to the
 110             results returned.
 111         """
 112         log().function('forward_lookup_pois', categories=categories, params=self.params)
 113
 114         if phrases:
 115             query, searches = await self.build_searches(phrases)
 116
 117             if query:
 118                 searches = [wrap_near_search(categories, s) for s in searches[:50]]
 119                 results = await self.execute_searches(query, searches)
 120             else:
 121                 results = SearchResults()
 122         else:
 123             search = build_poi_search(categories, self.params.countries)
 124             results = await search.lookup(self.conn, self.params)
 125
 126         await add_result_details(self.conn, results, self.params)
 127         log().result_dump('Final Results', ((r.accuracy, r) for r in results))
 128
 129         return results
 130
 131
 132     async def lookup(self, phrases: List[Phrase]) -> SearchResults:
 133         """ Look up a single free-text query.
 134         """
 135         log().function('forward_lookup', phrases=phrases, params=self.params)
 136         results = SearchResults()
 137
 138         if self.params.is_impossible():
 139             return results
 140
 141         query, searches = await self.build_searches(phrases)
 142
 143         if searches:
 144             # Execute SQL until an appropriate result is found.
 145             results = await self.execute_searches(query, searches[:50])
 146             await add_result_details(self.conn, results, self.params)
 147             log().result_dump('Final Results', ((r.accuracy, r) for r in results))
 148
 149         return results
 150
 151
 152 # pylint: disable=invalid-name,too-many-locals
 153 def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
 154                    start: int = 0) -> Iterator[Optional[List[Any]]]:
 155     yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries', 'Qualifier', 'Rankings']
 156
 157     def tk(tl: List[int]) -> str:
 158         tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
 159
 160         return f"[{','.join(tstr)}]"
 161
 162     def fmt_ranking(f: Any) -> str:
 163         if not f:
 164             return ''
 165         ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
 166         if len(ranks) > 100:
 167             ranks = ranks[:100] + '...'
 168         return f"{f.column}({ranks},def={f.default:.3g})"
 169
 170     def fmt_lookup(l: Any) -> str:
 171         if not l:
 172             return ''
 173
 174         return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
 175
 176
 177     def fmt_cstr(c: Any) -> str:
 178         if not c:
 179             return ''
 180
 181         return f'{c[0]}^{c[1]}'
 182
 183     for search in searches[start:]:
 184         fields = ('lookups', 'rankings', 'countries', 'housenumbers',
 185                   'postcodes', 'qualifier')
 186         iters = itertools.zip_longest([f"{search.penalty:.3g}"],
 187                                       *(getattr(search, attr, []) for attr in fields),
 188                                       fillvalue= '')
 189         for penalty, lookup, rank, cc, hnr, pc, qual in iters:
 190             yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
 191                    fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_ranking(rank)]
 192         yield None