]> git.openstreetmap.org Git - nominatim.git/blob - test/python/api/search/test_token_assignment.py
replace BreakType enum with simple char constants
[nominatim.git] / test / python / api / search / test_token_assignment.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Test for creation of token assignments from tokenized queries.
9 """
10 import pytest
11
12 from nominatim_api.search.query import QueryStruct, Phrase, PhraseType, TokenType, TokenRange, Token
13 import nominatim_api.search.query as qmod
14 from nominatim_api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE
15
16 class MyToken(Token):
17     def get_category(self):
18         return 'this', 'that'
19
20
21 def make_query(*args):
22     q = QueryStruct([Phrase(args[0][1], '')])
23     dummy = MyToken(penalty=3.0, token=45, count=1, addr_count=1,
24                     lookup_word='foo')
25
26     for btype, ptype, _ in args[1:]:
27         q.add_node(btype, ptype)
28     q.add_node(qmod.BREAK_END, PhraseType.NONE)
29
30     for start, t in enumerate(args):
31         for end, ttype in t[2]:
32             q.add_token(TokenRange(start, end), ttype, dummy)
33
34     return q
35
36
37 def check_assignments(actual, *expected):
38     todo = list(expected)
39     for assignment in actual:
40         assert assignment in todo, f"Unexpected assignment: {assignment}"
41         todo.remove(assignment)
42
43     assert not todo, f"Missing assignments: {expected}"
44
45
46 def test_query_with_missing_tokens():
47     q = QueryStruct([Phrase(PhraseType.NONE, '')])
48     q.add_node(qmod.BREAK_END, PhraseType.NONE)
49
50     assert list(yield_token_assignments(q)) == []
51
52
53 def test_one_word_query():
54     q = make_query((qmod.BREAK_START, PhraseType.NONE,
55                     [(1, TokenType.PARTIAL),
56                      (1, TokenType.WORD),
57                      (1, TokenType.HOUSENUMBER)]))
58
59     res = list(yield_token_assignments(q))
60     assert res == [TokenAssignment(name=TokenRange(0, 1))]
61
62
63 def test_single_postcode():
64     q = make_query((qmod.BREAK_START, PhraseType.NONE,
65                     [(1, TokenType.POSTCODE)]))
66
67     res = list(yield_token_assignments(q))
68     assert res == [TokenAssignment(postcode=TokenRange(0, 1))]
69
70
71 def test_single_country_name():
72     q = make_query((qmod.BREAK_START, PhraseType.NONE,
73                     [(1, TokenType.COUNTRY)]))
74
75     res = list(yield_token_assignments(q))
76     assert res == [TokenAssignment(country=TokenRange(0, 1))]
77
78
79 def test_single_word_poi_search():
80     q = make_query((qmod.BREAK_START, PhraseType.NONE,
81                     [(1, TokenType.NEAR_ITEM),
82                      (1, TokenType.QUALIFIER)]))
83
84     res = list(yield_token_assignments(q))
85     assert res == [TokenAssignment(near_item=TokenRange(0, 1))]
86
87
88 @pytest.mark.parametrize('btype', [qmod.BREAK_WORD, qmod.BREAK_PART, qmod.BREAK_TOKEN])
89 def test_multiple_simple_words(btype):
90     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
91                    (btype, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
92                    (btype, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
93
94     penalty = PENALTY_TOKENCHANGE[btype]
95
96     check_assignments(yield_token_assignments(q),
97                       TokenAssignment(name=TokenRange(0, 3)),
98                       TokenAssignment(penalty=penalty, name=TokenRange(0, 2),
99                                       address=[TokenRange(2, 3)]),
100                       TokenAssignment(penalty=penalty, name=TokenRange(0, 1),
101                                       address=[TokenRange(1, 3)]),
102                       TokenAssignment(penalty=penalty, name=TokenRange(1, 3),
103                                       address=[TokenRange(0, 1)]),
104                       TokenAssignment(penalty=penalty, name=TokenRange(2, 3),
105                                       address=[TokenRange(0, 2)])
106                      )
107
108
109 def test_multiple_words_respect_phrase_break():
110     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
111                    (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
112
113     check_assignments(yield_token_assignments(q),
114                       TokenAssignment(name=TokenRange(0, 1),
115                                       address=[TokenRange(1, 2)]),
116                       TokenAssignment(name=TokenRange(1, 2),
117                                       address=[TokenRange(0, 1)]))
118
119
120 def test_housenumber_and_street():
121     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
122                    (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
123
124     check_assignments(yield_token_assignments(q),
125                       TokenAssignment(name=TokenRange(1, 2),
126                                       housenumber=TokenRange(0, 1)),
127                       TokenAssignment(address=[TokenRange(1, 2)],
128                                       housenumber=TokenRange(0, 1)))
129
130
131 def test_housenumber_and_street_backwards():
132     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
133                    (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]))
134
135     check_assignments(yield_token_assignments(q),
136                       TokenAssignment(name=TokenRange(0, 1),
137                                       housenumber=TokenRange(1, 2)),
138                       TokenAssignment(address=[TokenRange(0, 1)],
139                                       housenumber=TokenRange(1, 2)))
140
141
142 def test_housenumber_and_postcode():
143     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
144                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]),
145                    (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
146                    (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)]))
147
148     check_assignments(yield_token_assignments(q),
149                       TokenAssignment(penalty=pytest.approx(0.3),
150                                       name=TokenRange(0, 1),
151                                       housenumber=TokenRange(1, 2),
152                                       address=[TokenRange(2, 3)],
153                                       postcode=TokenRange(3, 4)),
154                       TokenAssignment(penalty=pytest.approx(0.3),
155                                       housenumber=TokenRange(1, 2),
156                                       address=[TokenRange(0, 1), TokenRange(2, 3)],
157                                       postcode=TokenRange(3, 4)))
158
159 def test_postcode_and_housenumber():
160     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
161                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.POSTCODE)]),
162                    (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
163                    (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]))
164
165     check_assignments(yield_token_assignments(q),
166                       TokenAssignment(penalty=pytest.approx(0.3),
167                                       name=TokenRange(2, 3),
168                                       housenumber=TokenRange(3, 4),
169                                       address=[TokenRange(0, 1)],
170                                       postcode=TokenRange(1, 2)),
171                       TokenAssignment(penalty=pytest.approx(0.3),
172                                       housenumber=TokenRange(3, 4),
173                                       address=[TokenRange(0, 1), TokenRange(2, 3)],
174                                       postcode=TokenRange(1, 2)))
175
176
177 def test_country_housenumber_postcode():
178     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.COUNTRY)]),
179                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
180                    (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.HOUSENUMBER)]),
181                    (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)]))
182
183     check_assignments(yield_token_assignments(q))
184
185
186 @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY,
187                                    TokenType.NEAR_ITEM, TokenType.QUALIFIER])
188 def test_housenumber_with_only_special_terms(ttype):
189     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
190                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, ttype)]))
191
192     check_assignments(yield_token_assignments(q))
193
194
195 @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.HOUSENUMBER, TokenType.COUNTRY])
196 def test_multiple_special_tokens(ttype):
197     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, ttype)]),
198                    (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
199                    (qmod.BREAK_PHRASE, PhraseType.NONE, [(3, ttype)]))
200
201     check_assignments(yield_token_assignments(q))
202
203
204 def test_housenumber_many_phrases():
205     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
206                    (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
207                    (qmod.BREAK_PHRASE, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
208                    (qmod.BREAK_PHRASE, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]),
209                    (qmod.BREAK_WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
210
211     check_assignments(yield_token_assignments(q),
212                       TokenAssignment(penalty=0.1,
213                                       name=TokenRange(4, 5),
214                                       housenumber=TokenRange(3, 4),\
215                                       address=[TokenRange(0, 1), TokenRange(1, 2),
216                                                TokenRange(2, 3)]),
217                       TokenAssignment(penalty=0.1,
218                                       housenumber=TokenRange(3, 4),\
219                                       address=[TokenRange(0, 1), TokenRange(1, 2),
220                                                TokenRange(2, 3), TokenRange(4, 5)]))
221
222
223 def test_country_at_beginning():
224     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.COUNTRY)]),
225                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
226
227     check_assignments(yield_token_assignments(q),
228                       TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
229                                       country=TokenRange(0, 1)))
230
231
232 def test_country_at_end():
233     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
234                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]))
235
236     check_assignments(yield_token_assignments(q),
237                       TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
238                                       country=TokenRange(1, 2)))
239
240
241 def test_country_in_middle():
242     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
243                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]),
244                    (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
245
246     check_assignments(yield_token_assignments(q))
247
248
249 def test_postcode_with_designation():
250     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.POSTCODE)]),
251                    (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
252
253     check_assignments(yield_token_assignments(q),
254                       TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
255                                       postcode=TokenRange(0, 1)),
256                       TokenAssignment(postcode=TokenRange(0, 1),
257                                       address=[TokenRange(1, 2)]))
258
259
260 def test_postcode_with_designation_backwards():
261     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
262                    (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.POSTCODE)]))
263
264     check_assignments(yield_token_assignments(q),
265                       TokenAssignment(name=TokenRange(0, 1),
266                                       postcode=TokenRange(1, 2)),
267                       TokenAssignment(penalty=0.1, postcode=TokenRange(1, 2),
268                                       address=[TokenRange(0, 1)]))
269
270
271 def test_near_item_at_beginning():
272     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]),
273                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
274
275     check_assignments(yield_token_assignments(q),
276                       TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
277                                       near_item=TokenRange(0, 1)))
278
279
280 def test_near_item_at_end():
281     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
282                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]))
283
284     check_assignments(yield_token_assignments(q),
285                       TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
286                                       near_item=TokenRange(1, 2)))
287
288
289 def test_near_item_in_middle():
290     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
291                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]),
292                    (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
293
294     check_assignments(yield_token_assignments(q))
295
296
297 def test_qualifier_at_beginning():
298     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]),
299                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
300                    (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
301
302
303     check_assignments(yield_token_assignments(q),
304                       TokenAssignment(penalty=0.1, name=TokenRange(1, 3),
305                                       qualifier=TokenRange(0, 1)),
306                       TokenAssignment(penalty=0.2, name=TokenRange(1, 2),
307                                       qualifier=TokenRange(0, 1),
308                                       address=[TokenRange(2, 3)]))
309
310
311 def test_qualifier_after_name():
312     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
313                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
314                    (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]),
315                    (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]),
316                    (qmod.BREAK_WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
317
318
319     check_assignments(yield_token_assignments(q),
320                       TokenAssignment(penalty=0.2, name=TokenRange(0, 2),
321                                       qualifier=TokenRange(2, 3),
322                                       address=[TokenRange(3, 5)]),
323                       TokenAssignment(penalty=0.2, name=TokenRange(3, 5),
324                                       qualifier=TokenRange(2, 3),
325                                       address=[TokenRange(0, 2)]))
326
327
328 def test_qualifier_before_housenumber():
329     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]),
330                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]),
331                    (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
332
333     check_assignments(yield_token_assignments(q))
334
335
336 def test_qualifier_after_housenumber():
337     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
338                    (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.QUALIFIER)]),
339                    (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
340
341     check_assignments(yield_token_assignments(q))
342
343
344 def test_qualifier_in_middle_of_phrase():
345     q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
346                    (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
347                    (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]),
348                    (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]),
349                    (qmod.BREAK_PHRASE, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
350
351     check_assignments(yield_token_assignments(q))
352