2 - step: split_japanese_phrases
5 - pattern: \b(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(?:\.(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}\b # Filter for IPv4 addresses
7 - pattern: \b(?:(?:[A-Fa-f0-9]{1,4}:){1,7}|:)(?:[A-Fa-f0-9]{1,4})?\b # Filter for IPv6 addresses
9 - pattern: https?://[^\s]* # Filter URLs starting with http or https
15 - !include icu-rules/unicode-digits-to-decimal.yaml
21 - "[[:Punctuation:][:Symbol:][\u02bc] - [-:]]+ > '-'"
22 - "ß > 'ss'" # German szet is unambiguously equal to double ss
23 - "[^[:alnum:] [:Canonical_Combining_Class=Virama:] [:Space:] [-:]] >"
25 - ":: [[:Number:]] Latin ()"
26 - ":: [[:Number:]] Ascii ();"
27 - ":: [[:Number:]] NFD ();"
28 - "[[:Nonspacing Mark:] [:Cf:]] >;"
29 - "[-:]?[:Space:]+[-:]? > ' '"
33 - !include icu-rules/extended-unicode-to-asccii.yaml
37 - "[^a-z0-9[:Space:]] >"
41 - step: clean-housenumbers
47 - (\A|.*,)[^\d,]{3,}(,.*|\Z)
48 - step: clean-postcodes
49 convert-to-address: yes
50 default-pattern: "[A-Z0-9- ]{3,12}"
51 - step: clean-tiger-tags
52 - step: split-name-list
54 - step: strip-brace-terms
55 - step: tag-analyzer-by-language
56 filter-kind: [".*name.*"]
57 whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,"no",pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]
64 analyzer: housenumbers
71 - !include icu-rules/variants-bg.yaml
76 - !include icu-rules/variants-ca.yaml
81 - !include icu-rules/variants-cs.yaml
86 - !include icu-rules/variants-da.yaml
91 - !include icu-rules/variants-de.yaml
94 replacements: ["ä", "ae"]
96 replacements: ["ö", "oe"]
98 replacements: ["ü", "ue"]
103 - !include icu-rules/variants-el.yaml
108 - !include icu-rules/variants-en.yaml
113 - !include icu-rules/variants-es.yaml
118 - !include icu-rules/variants-et.yaml
123 - !include icu-rules/variants-eu.yaml
128 - !include icu-rules/variants-fi.yaml
133 - !include icu-rules/variants-fr.yaml
138 - !include icu-rules/variants-gl.yaml
143 - !include icu-rules/variants-hu.yaml
148 - !include icu-rules/variants-it.yaml
153 - !include icu-rules/variants-ja.yaml
158 - !include icu-rules/variants-mg.yaml
163 - !include icu-rules/variants-ms.yaml
168 - !include icu-rules/variants-nl.yaml
173 - !include icu-rules/variants-no.yaml
178 - !include icu-rules/variants-pl.yaml
183 - !include icu-rules/variants-pt.yaml
188 - !include icu-rules/variants-ro.yaml
193 - !include icu-rules/variants-ru.yaml
198 - !include icu-rules/variants-sk.yaml
203 - !include icu-rules/variants-sl.yaml
208 - !include icu-rules/variants-sv.yaml
213 - !include icu-rules/variants-tr.yaml
218 - !include icu-rules/variants-uk.yaml
223 - !include icu-rules/variants-vi.yaml