From: Sarah Hoffmann Date: Sat, 12 Aug 2023 15:06:38 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~56 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/56f9535aa9cf3f9f500471603e5876cd3e0ddb0b?hp=e7ccd46a61f6fd4a2be17a983f02d3a13ce460e8 Merge remote-tracking branch 'upstream/master' --- diff --git a/nominatim/api/search/db_search_builder.py b/nominatim/api/search/db_search_builder.py index 8dd435d0..3ed66a42 100644 --- a/nominatim/api/search/db_search_builder.py +++ b/nominatim/api/search/db_search_builder.py @@ -111,9 +111,11 @@ class SearchBuilder: penalty = min(categories.penalties) categories.penalties = [p - penalty for p in categories.penalties] for search in builder: - yield dbs.NearSearch(penalty, categories, search) + yield dbs.NearSearch(penalty + assignment.penalty, categories, search) else: - yield from builder + for search in builder: + search.penalty += assignment.penalty + yield search def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch]: @@ -210,8 +212,7 @@ class SearchBuilder: yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens) return - exp_count = min(exp_count, min(t.count for t in addr_partials)) \ - if addr_partials else exp_count + exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count # Partial term to frequent. Try looking up by rare full names first. name_fulls = self.query.get_tokens(name, TokenType.WORD) diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py index 3f402436..02a45e7c 100644 --- a/nominatim/api/search/db_searches.py +++ b/nominatim/api/search/db_searches.py @@ -317,7 +317,7 @@ class PoiSearch(AbstractSearch): """ def __init__(self, sdata: SearchData) -> None: super().__init__(sdata.penalty) - self.categories = sdata.qualifiers + self.qualifiers = sdata.qualifiers self.countries = sdata.countries @@ -339,7 +339,7 @@ class PoiSearch(AbstractSearch): .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \ .limit(LIMIT_PARAM) - classtype = self.categories.values + classtype = self.qualifiers.values if len(classtype) == 1: cclass, ctype = classtype[0] sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query() @@ -358,7 +358,7 @@ class PoiSearch(AbstractSearch): rows.extend(await conn.execute(sql, bind_params)) else: # use the class type tables - for category in self.categories.values: + for category in self.qualifiers.values: table = await conn.get_class_table(*category) if table is not None: sql = _select_placex(t)\ @@ -384,7 +384,7 @@ class PoiSearch(AbstractSearch): for row in rows: result = nres.create_from_placex_row(row, nres.SearchResult) assert result - result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type)) + result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type)) result.bbox = Bbox.from_wkb(row.bbox) results.append(result) @@ -622,7 +622,10 @@ class PlaceSearch(AbstractSearch): if details.viewbox is not None: if details.bounded_viewbox: - sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM)) + if details.viewbox.area < 0.2: + sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM)) + else: + sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM)) else: penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0), (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0), @@ -630,7 +633,11 @@ class PlaceSearch(AbstractSearch): if details.near is not None: if details.near_radius is not None: - sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) + if details.near_radius < 0.1: + sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) + else: + sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM, + NEAR_RADIUS_PARAM)) sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(NEAR_PARAM) .label('importance')) sql = sql.order_by(sa.desc(sa.text('importance'))) diff --git a/nominatim/api/search/geocoder.py b/nominatim/api/search/geocoder.py index d341b6cd..564e3d8d 100644 --- a/nominatim/api/search/geocoder.py +++ b/nominatim/api/search/geocoder.py @@ -152,7 +152,8 @@ class ForwardGeocoder: # pylint: disable=invalid-name,too-many-locals def _dump_searches(searches: List[AbstractSearch], query: QueryStruct, start: int = 0) -> Iterator[Optional[List[Any]]]: - yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries', 'Qualifier', 'Rankings'] + yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries', + 'Qualifier', 'Catgeory', 'Rankings'] def tk(tl: List[int]) -> str: tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl] @@ -182,11 +183,18 @@ def _dump_searches(searches: List[AbstractSearch], query: QueryStruct, for search in searches[start:]: fields = ('lookups', 'rankings', 'countries', 'housenumbers', - 'postcodes', 'qualifier') - iters = itertools.zip_longest([f"{search.penalty:.3g}"], - *(getattr(search, attr, []) for attr in fields), - fillvalue= '') - for penalty, lookup, rank, cc, hnr, pc, qual in iters: + 'postcodes', 'qualifiers') + if hasattr(search, 'search'): + iters = itertools.zip_longest([f"{search.penalty:.3g}"], + *(getattr(search.search, attr, []) for attr in fields), + getattr(search, 'categories', []), + fillvalue='') + else: + iters = itertools.zip_longest([f"{search.penalty:.3g}"], + *(getattr(search, attr, []) for attr in fields), + [], + fillvalue='') + for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters: yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr), - fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_ranking(rank)] + fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)] yield None diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py index ad08294e..d3e34537 100644 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@ -83,7 +83,7 @@ class ICUToken(qmod.Token): seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm) distance = 0 for tag, afrom, ato, bfrom, bto in seq.get_opcodes(): - if tag == 'delete' and (afrom == 0 or ato == len(self.lookup_word)): + if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)): distance += 1 elif tag == 'replace': distance += max((ato-afrom), (bto-bfrom)) diff --git a/nominatim/api/search/token_assignment.py b/nominatim/api/search/token_assignment.py index 0ae2cd43..3f0e737b 100644 --- a/nominatim/api/search/token_assignment.py +++ b/nominatim/api/search/token_assignment.py @@ -253,6 +253,8 @@ class _TokenSequence: priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL) if not self._adapt_penalty_from_priors(priors, 1): return False + if any(t.ttype == qmod.TokenType.CATEGORY for t in self.seq): + self.penalty += 1.0 return True diff --git a/nominatim/db/sqlalchemy_types.py b/nominatim/db/sqlalchemy_types.py index 7d3789aa..9e1f9fce 100644 --- a/nominatim/db/sqlalchemy_types.py +++ b/nominatim/db/sqlalchemy_types.py @@ -66,7 +66,15 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg] def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn: - return sa.func.ST_DWithin(self, other, distance, type_=sa.Float) + return sa.func.ST_DWithin(self, other, distance, type_=sa.Boolean) + + + def ST_DWithin_no_index(self, other: SaColumn, distance: SaColumn) -> SaColumn: + return sa.func._ST_DWithin(self, other, distance, type_=sa.Boolean) + + + def ST_Intersects_no_index(self, other: SaColumn) -> SaColumn: + return sa.func._ST_Intersects(self, other, type_=sa.Boolean) def ST_Distance(self, other: SaColumn) -> SaColumn: diff --git a/test/python/api/search/test_db_search_builder.py b/test/python/api/search/test_db_search_builder.py index d1ad320b..c93b8ead 100644 --- a/test/python/api/search/test_db_search_builder.py +++ b/test/python/api/search/test_db_search_builder.py @@ -161,7 +161,7 @@ def test_category_only(kwargs): search = searches[0] assert isinstance(search, dbs.PoiSearch) - assert search.categories.values == [('this', 'that')] + assert search.qualifiers.values == [('this', 'that')] @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'}, @@ -384,7 +384,7 @@ def test_frequent_partials_in_name_and_address(): def test_too_frequent_partials_in_name_and_address(): - searches = make_counted_searches(10000, 1, 10000, 1) + searches = make_counted_searches(20000, 1, 10000, 1) assert len(searches) == 1