]> git.openstreetmap.org Git - nominatim.git/commitdiff
prefilter bad results before adding details and reranking
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 6 Feb 2024 19:29:48 +0000 (20:29 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Tue, 6 Feb 2024 19:29:48 +0000 (20:29 +0100)
Move the first cutting of the result list before reranking
by result match. This means that results with significantly
less importance are removed early and independently of the
fact how well they match the original query.

Fixes #3266.

nominatim/api/search/geocoder.py

index 27e4d91ea6ae412a5b359bf06a65bdc165b8fdc1..711f83833f9408ff980c29f5eeca046b8baa28d6 100644 (file)
@@ -104,19 +104,27 @@ class ForwardGeocoder:
         return SearchResults(results.values())
 
 
         return SearchResults(results.values())
 
 
+    def pre_filter_results(self, results: SearchResults) -> SearchResults:
+        """ Remove results that are significantly worse than the
+            best match.
+        """
+        if results:
+            max_ranking = min(r.ranking for r in results) + 0.5
+            results = SearchResults(r for r in results if r.ranking < max_ranking)
+
+        return results
+
+
     def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
         """ Remove badly matching results, sort by ranking and
             limit to the configured number of results.
         """
         if results:
     def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
         """ Remove badly matching results, sort by ranking and
             limit to the configured number of results.
         """
         if results:
-            min_ranking = min(r.ranking for r in results)
-            results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
             results.sort(key=lambda r: r.ranking)
             results.sort(key=lambda r: r.ranking)
-
-        if results:
             min_rank = results[0].rank_search
             min_rank = results[0].rank_search
+            min_ranking = results[0].ranking
             results = SearchResults(r for r in results
             results = SearchResults(r for r in results
-                                    if r.ranking + 0.05 * (r.rank_search - min_rank)
+                                    if r.ranking + 0.03 * (r.rank_search - min_rank)
                                        < min_ranking + 0.5)
 
             results = SearchResults(results[:self.limit])
                                        < min_ranking + 0.5)
 
             results = SearchResults(results[:self.limit])
@@ -174,6 +182,7 @@ class ForwardGeocoder:
             if query:
                 searches = [wrap_near_search(categories, s) for s in searches[:50]]
                 results = await self.execute_searches(query, searches)
             if query:
                 searches = [wrap_near_search(categories, s) for s in searches[:50]]
                 results = await self.execute_searches(query, searches)
+                results = self.pre_filter_results(results)
                 await add_result_details(self.conn, results, self.params)
                 log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
                 results = self.sort_and_cut_results(results)
                 await add_result_details(self.conn, results, self.params)
                 log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
                 results = self.sort_and_cut_results(results)
@@ -203,6 +212,7 @@ class ForwardGeocoder:
         if searches:
             # Execute SQL until an appropriate result is found.
             results = await self.execute_searches(query, searches[:50])
         if searches:
             # Execute SQL until an appropriate result is found.
             results = await self.execute_searches(query, searches[:50])
+            results = self.pre_filter_results(results)
             await add_result_details(self.conn, results, self.params)
             log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
             self.rerank_by_query(query, results)
             await add_result_details(self.conn, results, self.params)
             log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
             self.rerank_by_query(query, results)