]> git.openstreetmap.org Git - nominatim.git/blob - settings/icu_tokenizer.yaml
make token analyzers configurable modules
[nominatim.git] / settings / icu_tokenizer.yaml
1 normalization:
2     - ":: lower ()"
3     - ":: Hans-Hant"
4     - !include icu-rules/unicode-digits-to-decimal.yaml
5     - "'№' > 'no'"
6     - "'n°' > 'no'"
7     - "'nº' > 'no'"
8     - "ª > a"
9     - "º > o"
10     - "[[:Punctuation:][:Symbol:]]  > ' '"
11     - "ß > 'ss'" # German szet is unimbigiously equal to double ss
12     - "[^[:Letter:] [:Number:] [:Space:]] >"
13     - "[:Lm:] >"
14     - ":: [[:Number:]] Latin ()"
15     - ":: [[:Number:]] Ascii ();"
16     - ":: [[:Number:]] NFD ();"
17     - "[[:Nonspacing Mark:] [:Cf:]] >;"
18     - "[:Space:]+ > ' '"
19 transliteration:
20     - ":: Latin ()"
21     - !include icu-rules/extended-unicode-to-asccii.yaml
22     - ":: Ascii ()"
23     - ":: NFD ()"
24     - "[^[:Ascii:]] >"
25     - ":: lower ()"
26     - ":: NFC ()"
27 sanitizers:
28     - step: split-name-list
29     - step: strip-brace-terms
30 token-analysis:
31     - analyzer: generic
32       variants:
33           - !include icu-rules/variants-bg.yaml
34           - !include icu-rules/variants-ca.yaml
35           - !include icu-rules/variants-cs.yaml
36           - !include icu-rules/variants-da.yaml
37           - !include icu-rules/variants-de.yaml
38           - !include icu-rules/variants-el.yaml
39           - !include icu-rules/variants-en.yaml
40           - !include icu-rules/variants-es.yaml
41           - !include icu-rules/variants-et.yaml
42           - !include icu-rules/variants-eu.yaml
43           - !include icu-rules/variants-fi.yaml
44           - !include icu-rules/variants-fr.yaml
45           - !include icu-rules/variants-gl.yaml
46           - !include icu-rules/variants-hu.yaml
47           - !include icu-rules/variants-it.yaml
48           - !include icu-rules/variants-ja.yaml
49           - !include icu-rules/variants-mg.yaml
50           - !include icu-rules/variants-ms.yaml
51           - !include icu-rules/variants-nl.yaml
52           - !include icu-rules/variants-no.yaml
53           - !include icu-rules/variants-pl.yaml
54           - !include icu-rules/variants-pt.yaml
55           - !include icu-rules/variants-ro.yaml
56           - !include icu-rules/variants-ru.yaml
57           - !include icu-rules/variants-sk.yaml
58           - !include icu-rules/variants-sl.yaml
59           - !include icu-rules/variants-sv.yaml
60           - !include icu-rules/variants-tr.yaml
61           - !include icu-rules/variants-uk.yaml
62           - !include icu-rules/variants-vi.yaml