From: Sarah Hoffmann <lonvia@denofr.de>
Date: Sat, 12 Aug 2023 15:06:38 +0000 (+0200)
Subject: Merge remote-tracking branch 'upstream/master'
X-Git-Tag: deploy~56
X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/56f9535aa9cf3f9f500471603e5876cd3e0ddb0b?hp=e7ccd46a61f6fd4a2be17a983f02d3a13ce460e8

Merge remote-tracking branch 'upstream/master'
---

diff --git a/nominatim/api/search/db_search_builder.py b/nominatim/api/search/db_search_builder.py
index 8dd435d0..3ed66a42 100644
--- a/nominatim/api/search/db_search_builder.py
+++ b/nominatim/api/search/db_search_builder.py
@@ -111,9 +111,11 @@ class SearchBuilder:
             penalty = min(categories.penalties)
             categories.penalties = [p - penalty for p in categories.penalties]
             for search in builder:
-                yield dbs.NearSearch(penalty, categories, search)
+                yield dbs.NearSearch(penalty + assignment.penalty, categories, search)
         else:
-            yield from builder
+            for search in builder:
+                search.penalty += assignment.penalty
+                yield search
 
 
     def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch]:
@@ -210,8 +212,7 @@ class SearchBuilder:
             yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens)
             return
 
-        exp_count = min(exp_count, min(t.count for t in addr_partials)) \
-                    if addr_partials else exp_count
+        exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count
 
         # Partial term to frequent. Try looking up by rare full names first.
         name_fulls = self.query.get_tokens(name, TokenType.WORD)
diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py
index 3f402436..02a45e7c 100644
--- a/nominatim/api/search/db_searches.py
+++ b/nominatim/api/search/db_searches.py
@@ -317,7 +317,7 @@ class PoiSearch(AbstractSearch):
     """
     def __init__(self, sdata: SearchData) -> None:
         super().__init__(sdata.penalty)
-        self.categories = sdata.qualifiers
+        self.qualifiers = sdata.qualifiers
         self.countries = sdata.countries
 
 
@@ -339,7 +339,7 @@ class PoiSearch(AbstractSearch):
                            .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
                            .limit(LIMIT_PARAM)
 
-            classtype = self.categories.values
+            classtype = self.qualifiers.values
             if len(classtype) == 1:
                 cclass, ctype = classtype[0]
                 sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
@@ -358,7 +358,7 @@ class PoiSearch(AbstractSearch):
             rows.extend(await conn.execute(sql, bind_params))
         else:
             # use the class type tables
-            for category in self.categories.values:
+            for category in self.qualifiers.values:
                 table = await conn.get_class_table(*category)
                 if table is not None:
                     sql = _select_placex(t)\
@@ -384,7 +384,7 @@ class PoiSearch(AbstractSearch):
         for row in rows:
             result = nres.create_from_placex_row(row, nres.SearchResult)
             assert result
-            result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type))
+            result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
             result.bbox = Bbox.from_wkb(row.bbox)
             results.append(result)
 
@@ -622,7 +622,10 @@ class PlaceSearch(AbstractSearch):
 
         if details.viewbox is not None:
             if details.bounded_viewbox:
-                sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+                if details.viewbox.area < 0.2:
+                    sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+                else:
+                    sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
             else:
                 penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
                                    (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
@@ -630,7 +633,11 @@ class PlaceSearch(AbstractSearch):
 
         if details.near is not None:
             if details.near_radius is not None:
-                sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+                if details.near_radius < 0.1:
+                    sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+                else:
+                    sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
+                                                                           NEAR_RADIUS_PARAM))
             sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(NEAR_PARAM)
                                       .label('importance'))
             sql = sql.order_by(sa.desc(sa.text('importance')))
diff --git a/nominatim/api/search/geocoder.py b/nominatim/api/search/geocoder.py
index d341b6cd..564e3d8d 100644
--- a/nominatim/api/search/geocoder.py
+++ b/nominatim/api/search/geocoder.py
@@ -152,7 +152,8 @@ class ForwardGeocoder:
 # pylint: disable=invalid-name,too-many-locals
 def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
                    start: int = 0) -> Iterator[Optional[List[Any]]]:
-    yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries', 'Qualifier', 'Rankings']
+    yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
+           'Qualifier', 'Catgeory', 'Rankings']
 
     def tk(tl: List[int]) -> str:
         tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
@@ -182,11 +183,18 @@ def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
 
     for search in searches[start:]:
         fields = ('lookups', 'rankings', 'countries', 'housenumbers',
-                  'postcodes', 'qualifier')
-        iters = itertools.zip_longest([f"{search.penalty:.3g}"],
-                                      *(getattr(search, attr, []) for attr in fields),
-                                      fillvalue= '')
-        for penalty, lookup, rank, cc, hnr, pc, qual in iters:
+                  'postcodes', 'qualifiers')
+        if hasattr(search, 'search'):
+            iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+                                          *(getattr(search.search, attr, []) for attr in fields),
+                                          getattr(search, 'categories', []),
+                                          fillvalue='')
+        else:
+            iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+                                          *(getattr(search, attr, []) for attr in fields),
+                                          [],
+                                          fillvalue='')
+        for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
             yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
-                   fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_ranking(rank)]
+                   fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]
         yield None
diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py
index ad08294e..d3e34537 100644
--- a/nominatim/api/search/icu_tokenizer.py
+++ b/nominatim/api/search/icu_tokenizer.py
@@ -83,7 +83,7 @@ class ICUToken(qmod.Token):
         seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
         distance = 0
         for tag, afrom, ato, bfrom, bto in seq.get_opcodes():
-            if tag == 'delete' and (afrom == 0 or ato == len(self.lookup_word)):
+            if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)):
                 distance += 1
             elif tag == 'replace':
                 distance += max((ato-afrom), (bto-bfrom))
diff --git a/nominatim/api/search/token_assignment.py b/nominatim/api/search/token_assignment.py
index 0ae2cd43..3f0e737b 100644
--- a/nominatim/api/search/token_assignment.py
+++ b/nominatim/api/search/token_assignment.py
@@ -253,6 +253,8 @@ class _TokenSequence:
                 priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
                 if not self._adapt_penalty_from_priors(priors, 1):
                     return False
+            if any(t.ttype == qmod.TokenType.CATEGORY for t in self.seq):
+                self.penalty += 1.0
 
         return True
 
diff --git a/nominatim/db/sqlalchemy_types.py b/nominatim/db/sqlalchemy_types.py
index 7d3789aa..9e1f9fce 100644
--- a/nominatim/db/sqlalchemy_types.py
+++ b/nominatim/db/sqlalchemy_types.py
@@ -66,7 +66,15 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg]
 
 
         def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn:
-            return sa.func.ST_DWithin(self, other, distance, type_=sa.Float)
+            return sa.func.ST_DWithin(self, other, distance, type_=sa.Boolean)
+
+
+        def ST_DWithin_no_index(self, other: SaColumn, distance: SaColumn) -> SaColumn:
+            return sa.func._ST_DWithin(self, other, distance, type_=sa.Boolean)
+
+
+        def ST_Intersects_no_index(self, other: SaColumn) -> SaColumn:
+            return sa.func._ST_Intersects(self, other, type_=sa.Boolean)
 
 
         def ST_Distance(self, other: SaColumn) -> SaColumn:
diff --git a/test/python/api/search/test_db_search_builder.py b/test/python/api/search/test_db_search_builder.py
index d1ad320b..c93b8ead 100644
--- a/test/python/api/search/test_db_search_builder.py
+++ b/test/python/api/search/test_db_search_builder.py
@@ -161,7 +161,7 @@ def test_category_only(kwargs):
     search = searches[0]
 
     assert isinstance(search, dbs.PoiSearch)
-    assert search.categories.values == [('this', 'that')]
+    assert search.qualifiers.values == [('this', 'that')]
 
 
 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
@@ -384,7 +384,7 @@ def test_frequent_partials_in_name_and_address():
 
 
 def test_too_frequent_partials_in_name_and_address():
-    searches = make_counted_searches(10000, 1, 10000, 1)
+    searches = make_counted_searches(20000, 1, 10000, 1)
 
     assert len(searches) == 1