nominatim/api/search/geocoder.py

   1 # SPDX-License-Identifier: GPL-3.0-or-later
   2 #
   3 # This file is part of Nominatim. (https://nominatim.org)
   4 #
   5 # Copyright (C) 2023 by the Nominatim developer community.
   6 # For a full list of authors see the git log.
   7 """
   8 Public interface to the search code.
   9 """
  10 from typing import List, Any, Optional, Iterator, Tuple
  11 import itertools
  12 import datetime as dt
  13
  14 from nominatim.api.connection import SearchConnection
  15 from nominatim.api.types import SearchDetails
  16 from nominatim.api.results import SearchResults, add_result_details
  17 from nominatim.api.search.token_assignment import yield_token_assignments
  18 from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
  19 from nominatim.api.search.db_searches import AbstractSearch
  20 from nominatim.api.search.query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
  21 from nominatim.api.search.query import Phrase, QueryStruct
  22 from nominatim.api.logging import log
  23
  24 class ForwardGeocoder:
  25     """ Main class responsible for place search.
  26     """
  27
  28     def __init__(self, conn: SearchConnection,
  29                  params: SearchDetails, timeout: Optional[int]) -> None:
  30         self.conn = conn
  31         self.params = params
  32         self.timeout = dt.timedelta(seconds=timeout or 1000000)
  33         self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
  34
  35
  36     @property
  37     def limit(self) -> int:
  38         """ Return the configured maximum number of search results.
  39         """
  40         return self.params.max_results
  41
  42
  43     async def build_searches(self,
  44                              phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
  45         """ Analyse the query and return the tokenized query and list of
  46             possible searches over it.
  47         """
  48         if self.query_analyzer is None:
  49             self.query_analyzer = await make_query_analyzer(self.conn)
  50
  51         query = await self.query_analyzer.analyze_query(phrases)
  52
  53         searches: List[AbstractSearch] = []
  54         if query.num_token_slots() > 0:
  55             # 2. Compute all possible search interpretations
  56             log().section('Compute abstract searches')
  57             search_builder = SearchBuilder(query, self.params)
  58             num_searches = 0
  59             for assignment in yield_token_assignments(query):
  60                 searches.extend(search_builder.build(assignment))
  61                 if num_searches < len(searches):
  62                     log().table_dump('Searches for assignment',
  63                                      _dump_searches(searches, query, num_searches))
  64                 num_searches = len(searches)
  65             searches.sort(key=lambda s: s.penalty)
  66
  67         return query, searches
  68
  69
  70     async def execute_searches(self, query: QueryStruct,
  71                                searches: List[AbstractSearch]) -> SearchResults:
  72         """ Run the abstract searches against the database until a result
  73             is found.
  74         """
  75         log().section('Execute database searches')
  76         results = SearchResults()
  77         end_time = dt.datetime.now() + self.timeout
  78
  79         num_results = 0
  80         min_ranking = 1000.0
  81         prev_penalty = 0.0
  82         for i, search in enumerate(searches):
  83             if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
  84                 break
  85             log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
  86             for result in await search.lookup(self.conn, self.params):
  87                 results.append(result)
  88                 min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
  89             log().result_dump('Results', ((r.accuracy, r) for r in results[num_results:]))
  90             num_results = len(results)
  91             prev_penalty = search.penalty
  92             if dt.datetime.now() >= end_time:
  93                 break
  94
  95         if results:
  96             min_ranking = min(r.ranking for r in results)
  97             results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
  98
  99         if results:
 100             min_rank = min(r.rank_search for r in results)
 101
 102             results = SearchResults(r for r in results
 103                                     if r.ranking + 0.05 * (r.rank_search - min_rank)
 104                                        < min_ranking + 0.5)
 105
 106             results.sort(key=lambda r: r.accuracy - r.calculated_importance())
 107             results = SearchResults(results[:self.limit])
 108
 109         return results
 110
 111
 112     async def lookup_pois(self, categories: List[Tuple[str, str]],
 113                           phrases: List[Phrase]) -> SearchResults:
 114         """ Look up places by category. If phrase is given, a place search
 115             over the phrase will be executed first and places close to the
 116             results returned.
 117         """
 118         log().function('forward_lookup_pois', categories=categories, params=self.params)
 119
 120         if phrases:
 121             query, searches = await self.build_searches(phrases)
 122
 123             if query:
 124                 searches = [wrap_near_search(categories, s) for s in searches[:50]]
 125                 results = await self.execute_searches(query, searches)
 126             else:
 127                 results = SearchResults()
 128         else:
 129             search = build_poi_search(categories, self.params.countries)
 130             results = await search.lookup(self.conn, self.params)
 131
 132         await add_result_details(self.conn, results, self.params)
 133         log().result_dump('Final Results', ((r.accuracy, r) for r in results))
 134
 135         return results
 136
 137
 138     async def lookup(self, phrases: List[Phrase]) -> SearchResults:
 139         """ Look up a single free-text query.
 140         """
 141         log().function('forward_lookup', phrases=phrases, params=self.params)
 142         results = SearchResults()
 143
 144         if self.params.is_impossible():
 145             return results
 146
 147         query, searches = await self.build_searches(phrases)
 148
 149         if searches:
 150             # Execute SQL until an appropriate result is found.
 151             results = await self.execute_searches(query, searches[:50])
 152             await add_result_details(self.conn, results, self.params)
 153             log().result_dump('Final Results', ((r.accuracy, r) for r in results))
 154
 155         return results
 156
 157
 158 # pylint: disable=invalid-name,too-many-locals
 159 def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
 160                    start: int = 0) -> Iterator[Optional[List[Any]]]:
 161     yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
 162            'Qualifier', 'Catgeory', 'Rankings']
 163
 164     def tk(tl: List[int]) -> str:
 165         tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
 166
 167         return f"[{','.join(tstr)}]"
 168
 169     def fmt_ranking(f: Any) -> str:
 170         if not f:
 171             return ''
 172         ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
 173         if len(ranks) > 100:
 174             ranks = ranks[:100] + '...'
 175         return f"{f.column}({ranks},def={f.default:.3g})"
 176
 177     def fmt_lookup(l: Any) -> str:
 178         if not l:
 179             return ''
 180
 181         return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
 182
 183
 184     def fmt_cstr(c: Any) -> str:
 185         if not c:
 186             return ''
 187
 188         return f'{c[0]}^{c[1]}'
 189
 190     for search in searches[start:]:
 191         fields = ('lookups', 'rankings', 'countries', 'housenumbers',
 192                   'postcodes', 'qualifiers')
 193         if hasattr(search, 'search'):
 194             iters = itertools.zip_longest([f"{search.penalty:.3g}"],
 195                                           *(getattr(search.search, attr, []) for attr in fields),
 196                                           getattr(search, 'categories', []),
 197                                           fillvalue='')
 198         else:
 199             iters = itertools.zip_longest([f"{search.penalty:.3g}"],
 200                                           *(getattr(search, attr, []) for attr in fields),
 201                                           [],
 202                                           fillvalue='')
 203         for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
 204             yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
 205                    fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]
 206         yield None