]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tokenizer/sanitizers/tag_japanese.py
Moved KANJI_MAP to global variable
[nominatim.git] / nominatim / tokenizer / sanitizers / tag_japanese.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 This sanitizer maps OSM data to Japanese block addresses.
9 It replaces blocknumber and housenumber with housenumber,
10 and quarter and neighbourhood with place.
11 """
12
13
14 from typing import Callable
15 from typing import List, Optional
16
17 from nominatim.tokenizer.sanitizers.base import ProcessInfo
18 from nominatim.tokenizer.sanitizers.config import SanitizerConfig
19 from nominatim.data.place_name import PlaceName
20
21 KANJI_MAP = {
22       '零': '0',
23       '一': '1',
24       '二': '2',
25       '三': '3',
26       '四': '4',
27       '五': '5',
28       '六': '6',
29       '七': '7',
30       '八': '8',
31       '九': '9'
32     }
33
34 def convert_kanji_sequence_to_number(sequence: str) -> str:
35     """Converts Kanji numbers to Arabic numbers
36     """
37     converted = ''
38     current_number = ''
39     for char in sequence:
40         if char in KANJI_MAP:
41             current_number += KANJI_MAP[char]
42         else:
43             converted += current_number
44             current_number = ''
45             converted += char
46     converted += current_number
47     return converted
48
49 def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]:
50 #def create(config: SanitizerConfig) -> Callable[[ProcessInfo],None]:
51     """Set up the sanitizer
52     """
53     return tag_japanese
54     #return tag_japanese(config)
55
56 def reconbine_housenumber(
57     new_address: List[PlaceName],
58     tmp_housenumber: Optional[str],
59     tmp_blocknumber: Optional[str]
60 ) -> List[PlaceName]:
61     """ Recombine the tag of housenumber by using housenumber and blocknumber
62     """
63     if tmp_blocknumber and tmp_housenumber:
64         new_address.append(
65             PlaceName(
66                 kind='housenumber',
67                 name=f'{tmp_blocknumber}-{tmp_housenumber}',
68                 suffix=''
69             )
70         )
71     elif tmp_blocknumber:
72         new_address.append(
73             PlaceName(
74                 kind='housenumber',
75                 name=f'{tmp_blocknumber}',
76                 suffix=''
77             )
78         )
79     elif tmp_housenumber:
80         new_address.append(
81             PlaceName(
82                 kind='housenumber',
83                 name=f'{tmp_housenumber}',
84                 suffix=''
85             )
86         )
87     return new_address
88
89 def reconbine_place(
90     new_address: List[PlaceName],
91     tmp_neighbourhood: Optional[str],
92     tmp_quarter: Optional[str]
93 ) -> List[PlaceName]:
94     """ Recombine the tag of place by using neighbourhood and quarter
95     """
96     if tmp_neighbourhood and tmp_quarter:
97         new_address.append(
98             PlaceName(
99                 kind='place',
100                 name=f'{tmp_quarter}{tmp_neighbourhood}',
101                 suffix=''
102             )
103         )
104     elif tmp_neighbourhood:
105         new_address.append(
106             PlaceName(
107                 kind='place',
108                 name=f'{tmp_neighbourhood}',
109                 suffix=''
110             )
111         )
112     elif tmp_quarter:
113         new_address.append(
114             PlaceName(
115                 kind='place',
116                 name=f'{tmp_quarter}',
117                 suffix=''
118             )
119         )
120     return new_address
121 def tag_japanese(obj: ProcessInfo) -> None:
122     """Recombine kind of address
123     """
124     if obj.place.country_code != 'jp':
125         return
126     tmp_housenumber = None
127     tmp_blocknumber = None
128     tmp_neighbourhood = None
129     tmp_quarter = None
130
131     new_address = []
132     for item in obj.names:
133         item.name = convert_kanji_sequence_to_number(item.name)
134
135     for item in obj.address:
136         item.name = convert_kanji_sequence_to_number(item.name)
137         if item.kind == 'housenumber':
138             tmp_housenumber = item.name
139         elif item.kind == 'block_number':
140             tmp_blocknumber = item.name
141         elif item.kind == 'neighbourhood':
142             tmp_neighbourhood = item.name
143         elif item.kind == 'quarter':
144             tmp_quarter = item.name
145         else:
146             new_address.append(item)
147
148     new_address = reconbine_housenumber(new_address,tmp_housenumber,tmp_blocknumber)
149     new_address = reconbine_place(new_address,tmp_neighbourhood,tmp_quarter)
150
151     obj.address = [item for item in new_address if item.name is not None]