1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Test for creation of token assignments from tokenized queries.
12 from nominatim_api.search.query import QueryStruct, Phrase, PhraseType, TokenType, TokenRange, Token
13 import nominatim_api.search.query as qmod
14 from nominatim_api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE
17 def get_category(self):
21 def make_query(*args):
22 q = QueryStruct([Phrase(args[0][1], '')])
23 dummy = MyToken(penalty=3.0, token=45, count=1, addr_count=1,
26 for btype, ptype, _ in args[1:]:
27 q.add_node(btype, ptype)
28 q.add_node(qmod.BREAK_END, PhraseType.NONE)
30 for start, t in enumerate(args):
31 for end, ttype in t[2]:
32 q.add_token(TokenRange(start, end), ttype, dummy)
37 def check_assignments(actual, *expected):
39 for assignment in actual:
40 assert assignment in todo, f"Unexpected assignment: {assignment}"
41 todo.remove(assignment)
43 assert not todo, f"Missing assignments: {expected}"
46 def test_query_with_missing_tokens():
47 q = QueryStruct([Phrase(PhraseType.NONE, '')])
48 q.add_node(qmod.BREAK_END, PhraseType.NONE)
50 assert list(yield_token_assignments(q)) == []
53 def test_one_word_query():
54 q = make_query((qmod.BREAK_START, PhraseType.NONE,
55 [(1, TokenType.PARTIAL),
57 (1, TokenType.HOUSENUMBER)]))
59 res = list(yield_token_assignments(q))
60 assert res == [TokenAssignment(name=TokenRange(0, 1))]
63 def test_single_postcode():
64 q = make_query((qmod.BREAK_START, PhraseType.NONE,
65 [(1, TokenType.POSTCODE)]))
67 res = list(yield_token_assignments(q))
68 assert res == [TokenAssignment(postcode=TokenRange(0, 1))]
71 def test_single_country_name():
72 q = make_query((qmod.BREAK_START, PhraseType.NONE,
73 [(1, TokenType.COUNTRY)]))
75 res = list(yield_token_assignments(q))
76 assert res == [TokenAssignment(country=TokenRange(0, 1))]
79 def test_single_word_poi_search():
80 q = make_query((qmod.BREAK_START, PhraseType.NONE,
81 [(1, TokenType.NEAR_ITEM),
82 (1, TokenType.QUALIFIER)]))
84 res = list(yield_token_assignments(q))
85 assert res == [TokenAssignment(near_item=TokenRange(0, 1))]
88 @pytest.mark.parametrize('btype', [qmod.BREAK_WORD, qmod.BREAK_PART, qmod.BREAK_TOKEN])
89 def test_multiple_simple_words(btype):
90 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
91 (btype, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
92 (btype, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
94 penalty = PENALTY_TOKENCHANGE[btype]
96 check_assignments(yield_token_assignments(q),
97 TokenAssignment(name=TokenRange(0, 3)),
98 TokenAssignment(penalty=penalty, name=TokenRange(0, 2),
99 address=[TokenRange(2, 3)]),
100 TokenAssignment(penalty=penalty, name=TokenRange(0, 1),
101 address=[TokenRange(1, 3)]),
102 TokenAssignment(penalty=penalty, name=TokenRange(1, 3),
103 address=[TokenRange(0, 1)]),
104 TokenAssignment(penalty=penalty, name=TokenRange(2, 3),
105 address=[TokenRange(0, 2)])
109 def test_multiple_words_respect_phrase_break():
110 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
111 (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
113 check_assignments(yield_token_assignments(q),
114 TokenAssignment(name=TokenRange(0, 1),
115 address=[TokenRange(1, 2)]),
116 TokenAssignment(name=TokenRange(1, 2),
117 address=[TokenRange(0, 1)]))
120 def test_housenumber_and_street():
121 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
122 (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
124 check_assignments(yield_token_assignments(q),
125 TokenAssignment(name=TokenRange(1, 2),
126 housenumber=TokenRange(0, 1)),
127 TokenAssignment(address=[TokenRange(1, 2)],
128 housenumber=TokenRange(0, 1)))
131 def test_housenumber_and_street_backwards():
132 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
133 (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]))
135 check_assignments(yield_token_assignments(q),
136 TokenAssignment(name=TokenRange(0, 1),
137 housenumber=TokenRange(1, 2)),
138 TokenAssignment(address=[TokenRange(0, 1)],
139 housenumber=TokenRange(1, 2)))
142 def test_housenumber_and_postcode():
143 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
144 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]),
145 (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
146 (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)]))
148 check_assignments(yield_token_assignments(q),
149 TokenAssignment(penalty=pytest.approx(0.3),
150 name=TokenRange(0, 1),
151 housenumber=TokenRange(1, 2),
152 address=[TokenRange(2, 3)],
153 postcode=TokenRange(3, 4)),
154 TokenAssignment(penalty=pytest.approx(0.3),
155 housenumber=TokenRange(1, 2),
156 address=[TokenRange(0, 1), TokenRange(2, 3)],
157 postcode=TokenRange(3, 4)))
159 def test_postcode_and_housenumber():
160 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
161 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.POSTCODE)]),
162 (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
163 (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]))
165 check_assignments(yield_token_assignments(q),
166 TokenAssignment(penalty=pytest.approx(0.3),
167 name=TokenRange(2, 3),
168 housenumber=TokenRange(3, 4),
169 address=[TokenRange(0, 1)],
170 postcode=TokenRange(1, 2)),
171 TokenAssignment(penalty=pytest.approx(0.3),
172 housenumber=TokenRange(3, 4),
173 address=[TokenRange(0, 1), TokenRange(2, 3)],
174 postcode=TokenRange(1, 2)))
177 def test_country_housenumber_postcode():
178 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.COUNTRY)]),
179 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
180 (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.HOUSENUMBER)]),
181 (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)]))
183 check_assignments(yield_token_assignments(q))
186 @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY,
187 TokenType.NEAR_ITEM, TokenType.QUALIFIER])
188 def test_housenumber_with_only_special_terms(ttype):
189 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
190 (qmod.BREAK_WORD, PhraseType.NONE, [(2, ttype)]))
192 check_assignments(yield_token_assignments(q))
195 @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.HOUSENUMBER, TokenType.COUNTRY])
196 def test_multiple_special_tokens(ttype):
197 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, ttype)]),
198 (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
199 (qmod.BREAK_PHRASE, PhraseType.NONE, [(3, ttype)]))
201 check_assignments(yield_token_assignments(q))
204 def test_housenumber_many_phrases():
205 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
206 (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
207 (qmod.BREAK_PHRASE, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
208 (qmod.BREAK_PHRASE, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]),
209 (qmod.BREAK_WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
211 check_assignments(yield_token_assignments(q),
212 TokenAssignment(penalty=0.1,
213 name=TokenRange(4, 5),
214 housenumber=TokenRange(3, 4),\
215 address=[TokenRange(0, 1), TokenRange(1, 2),
217 TokenAssignment(penalty=0.1,
218 housenumber=TokenRange(3, 4),\
219 address=[TokenRange(0, 1), TokenRange(1, 2),
220 TokenRange(2, 3), TokenRange(4, 5)]))
223 def test_country_at_beginning():
224 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.COUNTRY)]),
225 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
227 check_assignments(yield_token_assignments(q),
228 TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
229 country=TokenRange(0, 1)))
232 def test_country_at_end():
233 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
234 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]))
236 check_assignments(yield_token_assignments(q),
237 TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
238 country=TokenRange(1, 2)))
241 def test_country_in_middle():
242 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
243 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]),
244 (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
246 check_assignments(yield_token_assignments(q))
249 def test_postcode_with_designation():
250 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.POSTCODE)]),
251 (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
253 check_assignments(yield_token_assignments(q),
254 TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
255 postcode=TokenRange(0, 1)),
256 TokenAssignment(postcode=TokenRange(0, 1),
257 address=[TokenRange(1, 2)]))
260 def test_postcode_with_designation_backwards():
261 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
262 (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.POSTCODE)]))
264 check_assignments(yield_token_assignments(q),
265 TokenAssignment(name=TokenRange(0, 1),
266 postcode=TokenRange(1, 2)),
267 TokenAssignment(penalty=0.1, postcode=TokenRange(1, 2),
268 address=[TokenRange(0, 1)]))
271 def test_near_item_at_beginning():
272 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]),
273 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
275 check_assignments(yield_token_assignments(q),
276 TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
277 near_item=TokenRange(0, 1)))
280 def test_near_item_at_end():
281 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
282 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]))
284 check_assignments(yield_token_assignments(q),
285 TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
286 near_item=TokenRange(1, 2)))
289 def test_near_item_in_middle():
290 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
291 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]),
292 (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
294 check_assignments(yield_token_assignments(q))
297 def test_qualifier_at_beginning():
298 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]),
299 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
300 (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
303 check_assignments(yield_token_assignments(q),
304 TokenAssignment(penalty=0.1, name=TokenRange(1, 3),
305 qualifier=TokenRange(0, 1)),
306 TokenAssignment(penalty=0.2, name=TokenRange(1, 2),
307 qualifier=TokenRange(0, 1),
308 address=[TokenRange(2, 3)]))
311 def test_qualifier_after_name():
312 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
313 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
314 (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]),
315 (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]),
316 (qmod.BREAK_WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
319 check_assignments(yield_token_assignments(q),
320 TokenAssignment(penalty=0.2, name=TokenRange(0, 2),
321 qualifier=TokenRange(2, 3),
322 address=[TokenRange(3, 5)]),
323 TokenAssignment(penalty=0.2, name=TokenRange(3, 5),
324 qualifier=TokenRange(2, 3),
325 address=[TokenRange(0, 2)]))
328 def test_qualifier_before_housenumber():
329 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]),
330 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]),
331 (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
333 check_assignments(yield_token_assignments(q))
336 def test_qualifier_after_housenumber():
337 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
338 (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.QUALIFIER)]),
339 (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
341 check_assignments(yield_token_assignments(q))
344 def test_qualifier_in_middle_of_phrase():
345 q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
346 (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
347 (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]),
348 (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]),
349 (qmod.BREAK_PHRASE, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
351 check_assignments(yield_token_assignments(q))