]> git.openstreetmap.org Git - nominatim.git/blob - test/python/api/search/test_db_search_builder.py
drop category tokens when they make up a full phrase
[nominatim.git] / test / python / api / search / test_db_search_builder.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Tests for creating abstract searches from token assignments.
9 """
10 import pytest
11
12 from nominatim.api.search.query import Token, TokenRange, BreakType, PhraseType, TokenType, QueryStruct, Phrase
13 from nominatim.api.search.db_search_builder import SearchBuilder
14 from nominatim.api.search.token_assignment import TokenAssignment
15 from nominatim.api.types import SearchDetails
16 import nominatim.api.search.db_searches as dbs
17
18 class MyToken(Token):
19     def get_category(self):
20         return 'this', 'that'
21
22
23 def make_query(*args):
24     q = QueryStruct([Phrase(PhraseType.NONE, '')])
25
26     for _ in range(max(inner[0] for tlist in args for inner in tlist)):
27         q.add_node(BreakType.WORD, PhraseType.NONE)
28     q.add_node(BreakType.END, PhraseType.NONE)
29
30     for start, tlist in enumerate(args):
31         for end, ttype, tinfo in tlist:
32             for tid, word in tinfo:
33                 q.add_token(TokenRange(start, end), ttype,
34                             MyToken(0.5 if ttype == TokenType.PARTIAL else 0.0, tid, 1, word, True))
35
36
37     return q
38
39
40 def test_country_search():
41     q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
42     builder = SearchBuilder(q, SearchDetails())
43
44     searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
45
46     assert len(searches) == 1
47
48     search = searches[0]
49
50     assert isinstance(search, dbs.CountrySearch)
51     assert set(search.countries.values) == {'de', 'en'}
52
53
54 def test_country_search_with_country_restriction():
55     q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
56     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'}))
57
58     searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
59
60     assert len(searches) == 1
61
62     search = searches[0]
63
64     assert isinstance(search, dbs.CountrySearch)
65     assert set(search.countries.values) == {'en'}
66
67
68 def test_country_search_with_conflicting_country_restriction():
69     q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
70     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
71
72     searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
73
74     assert len(searches) == 0
75
76
77 def test_postcode_search_simple():
78     q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])])
79     builder = SearchBuilder(q, SearchDetails())
80
81     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1))))
82
83     assert len(searches) == 1
84     search = searches[0]
85
86     assert isinstance(search, dbs.PostcodeSearch)
87     assert search.postcodes.values == ['2367']
88     assert not search.countries.values
89     assert not search.lookups
90     assert not search.rankings
91
92
93 def test_postcode_with_country():
94     q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
95                    [(2, TokenType.COUNTRY, [(1, 'xx')])])
96     builder = SearchBuilder(q, SearchDetails())
97
98     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
99                                                   country=TokenRange(1, 2))))
100
101     assert len(searches) == 1
102     search = searches[0]
103
104     assert isinstance(search, dbs.PostcodeSearch)
105     assert search.postcodes.values == ['2367']
106     assert search.countries.values == ['xx']
107     assert not search.lookups
108     assert not search.rankings
109
110
111 def test_postcode_with_address():
112     q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
113                    [(2, TokenType.PARTIAL, [(100, 'word')])])
114     builder = SearchBuilder(q, SearchDetails())
115
116     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
117                                                   address=[TokenRange(1, 2)])))
118
119     assert len(searches) == 1
120     search = searches[0]
121
122     assert isinstance(search, dbs.PostcodeSearch)
123     assert search.postcodes.values == ['2367']
124     assert not search.countries
125     assert search.lookups
126     assert not search.rankings
127
128
129 def test_postcode_with_address_with_full_word():
130     q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
131                    [(2, TokenType.PARTIAL, [(100, 'word')]),
132                     (2, TokenType.WORD, [(1, 'full')])])
133     builder = SearchBuilder(q, SearchDetails())
134
135     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
136                                                   address=[TokenRange(1, 2)])))
137
138     assert len(searches) == 1
139     search = searches[0]
140
141     assert isinstance(search, dbs.PostcodeSearch)
142     assert search.postcodes.values == ['2367']
143     assert not search.countries
144     assert search.lookups
145     assert len(search.rankings) == 1
146
147
148 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
149                                     {'near': '10,10'}])
150 def test_category_only(kwargs):
151     q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])])
152     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
153
154     searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1))))
155
156     assert len(searches) == 1
157
158     search = searches[0]
159
160     assert isinstance(search, dbs.PoiSearch)
161     assert search.qualifiers.values == [('this', 'that')]
162
163
164 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
165                                     {}])
166 def test_category_skipped(kwargs):
167     q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])])
168     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
169
170     searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1))))
171
172     assert len(searches) == 0
173
174
175 def test_name_only_search():
176     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
177                     (1, TokenType.WORD, [(100, 'a')])])
178     builder = SearchBuilder(q, SearchDetails())
179
180     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
181
182     assert len(searches) == 1
183     search = searches[0]
184
185     assert isinstance(search, dbs.PlaceSearch)
186     assert not search.postcodes.values
187     assert not search.countries.values
188     assert not search.housenumbers.values
189     assert not search.qualifiers.values
190     assert len(search.lookups) == 1
191     assert len(search.rankings) == 1
192
193
194 def test_name_with_qualifier():
195     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
196                     (1, TokenType.WORD, [(100, 'a')])],
197                    [(2, TokenType.QUALIFIER, [(55, 'hotel')])])
198     builder = SearchBuilder(q, SearchDetails())
199
200     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
201                                                   qualifier=TokenRange(1, 2))))
202
203     assert len(searches) == 1
204     search = searches[0]
205
206     assert isinstance(search, dbs.PlaceSearch)
207     assert not search.postcodes.values
208     assert not search.countries.values
209     assert not search.housenumbers.values
210     assert search.qualifiers.values == [('this', 'that')]
211     assert len(search.lookups) == 1
212     assert len(search.rankings) == 1
213
214
215 def test_name_with_housenumber_search():
216     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
217                     (1, TokenType.WORD, [(100, 'a')])],
218                    [(2, TokenType.HOUSENUMBER, [(66, '66')])])
219     builder = SearchBuilder(q, SearchDetails())
220
221     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
222                                                   housenumber=TokenRange(1, 2))))
223
224     assert len(searches) == 1
225     search = searches[0]
226
227     assert isinstance(search, dbs.PlaceSearch)
228     assert not search.postcodes.values
229     assert not search.countries.values
230     assert search.housenumbers.values == ['66']
231     assert len(search.lookups) == 1
232     assert len(search.rankings) == 1
233
234
235 def test_name_and_address():
236     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
237                     (1, TokenType.WORD, [(100, 'a')])],
238                    [(2, TokenType.PARTIAL, [(2, 'b')]),
239                     (2, TokenType.WORD, [(101, 'b')])],
240                    [(3, TokenType.PARTIAL, [(3, 'c')]),
241                     (3, TokenType.WORD, [(102, 'c')])]
242                   )
243     builder = SearchBuilder(q, SearchDetails())
244
245     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
246                                                   address=[TokenRange(1, 2),
247                                                            TokenRange(2, 3)])))
248
249     assert len(searches) == 1
250     search = searches[0]
251
252     assert isinstance(search, dbs.PlaceSearch)
253     assert not search.postcodes.values
254     assert not search.countries.values
255     assert not search.housenumbers.values
256     assert len(search.lookups) == 2
257     assert len(search.rankings) == 3
258
259
260 def test_name_and_complex_address():
261     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
262                     (1, TokenType.WORD, [(100, 'a')])],
263                    [(2, TokenType.PARTIAL, [(2, 'b')]),
264                     (3, TokenType.WORD, [(101, 'bc')])],
265                    [(3, TokenType.PARTIAL, [(3, 'c')])],
266                    [(4, TokenType.PARTIAL, [(4, 'd')]),
267                     (4, TokenType.WORD, [(103, 'd')])]
268                   )
269     builder = SearchBuilder(q, SearchDetails())
270
271     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
272                                                   address=[TokenRange(1, 2),
273                                                            TokenRange(2, 4)])))
274
275     assert len(searches) == 1
276     search = searches[0]
277
278     assert isinstance(search, dbs.PlaceSearch)
279     assert not search.postcodes.values
280     assert not search.countries.values
281     assert not search.housenumbers.values
282     assert len(search.lookups) == 2
283     assert len(search.rankings) == 2
284
285
286 def test_name_only_near_search():
287     q = make_query([(1, TokenType.CATEGORY, [(88, 'g')])],
288                    [(2, TokenType.PARTIAL, [(1, 'a')]),
289                     (2, TokenType.WORD, [(100, 'a')])])
290     builder = SearchBuilder(q, SearchDetails())
291
292     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
293                                                   category=TokenRange(0, 1))))
294
295     assert len(searches) == 1
296     search = searches[0]
297
298     assert isinstance(search, dbs.NearSearch)
299     assert isinstance(search.search, dbs.PlaceSearch)
300
301
302 def test_name_only_search_with_category():
303     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
304                     (1, TokenType.WORD, [(100, 'a')])])
305     builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
306
307     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
308
309     assert len(searches) == 1
310     search = searches[0]
311
312     assert isinstance(search, dbs.NearSearch)
313     assert isinstance(search.search, dbs.PlaceSearch)
314
315
316 def test_name_only_search_with_countries():
317     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
318                     (1, TokenType.WORD, [(100, 'a')])])
319     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'de,en'}))
320
321     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
322
323     assert len(searches) == 1
324     search = searches[0]
325
326     assert isinstance(search, dbs.PlaceSearch)
327     assert not search.postcodes.values
328     assert set(search.countries.values) == {'de', 'en'}
329     assert not search.housenumbers.values
330
331
332 def make_counted_searches(name_part, name_full, address_part, address_full,
333                           num_address_parts=1):
334     q = QueryStruct([Phrase(PhraseType.NONE, '')])
335     for i in range(1 + num_address_parts):
336         q.add_node(BreakType.WORD, PhraseType.NONE)
337     q.add_node(BreakType.END, PhraseType.NONE)
338
339     q.add_token(TokenRange(0, 1), TokenType.PARTIAL,
340                 MyToken(0.5, 1, name_part, 'name_part', True))
341     q.add_token(TokenRange(0, 1), TokenType.WORD,
342                 MyToken(0, 101, name_full, 'name_full', True))
343     for i in range(num_address_parts):
344         q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL,
345                     MyToken(0.5, 2, address_part, 'address_part', True))
346         q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD,
347                     MyToken(0, 102, address_full, 'address_full', True))
348
349     builder = SearchBuilder(q, SearchDetails())
350
351     return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
352                                               address=[TokenRange(1, 1 + num_address_parts)])))
353
354
355 def test_infrequent_partials_in_name():
356     searches = make_counted_searches(1, 1, 1, 1)
357
358     assert len(searches) == 1
359     search = searches[0]
360
361     assert isinstance(search, dbs.PlaceSearch)
362     assert len(search.lookups) == 2
363     assert len(search.rankings) == 2
364
365     assert set((l.column, l.lookup_type) for l in search.lookups) == \
366             {('name_vector', 'lookup_all'), ('nameaddress_vector', 'restrict')}
367
368
369 def test_frequent_partials_in_name_and_address():
370     searches = make_counted_searches(9999, 1, 9999, 1)
371
372     assert len(searches) == 2
373
374     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
375     searches.sort(key=lambda s: s.penalty)
376
377     assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
378             {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}
379     assert set((l.column, l.lookup_type) for l in searches[1].lookups) == \
380             {('nameaddress_vector', 'lookup_all'), ('name_vector', 'lookup_all')}
381
382
383 def test_too_frequent_partials_in_name_and_address():
384     searches = make_counted_searches(20000, 1, 10000, 1)
385
386     assert len(searches) == 1
387
388     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
389     searches.sort(key=lambda s: s.penalty)
390
391     assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
392             {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}