X-Git-Url: https://git.openstreetmap.org/nominatim.git/blobdiff_plain/848e5ac5de510c1a1ae1a01107453e61a0895b97..fac8c32cda12d6684ad2232ac255324bd9b85e43:/nominatim/tokenizer/sanitizers/tag_japanese.py diff --git a/nominatim/tokenizer/sanitizers/tag_japanese.py b/nominatim/tokenizer/sanitizers/tag_japanese.py index dffd9559..723a6dfa 100644 --- a/nominatim/tokenizer/sanitizers/tag_japanese.py +++ b/nominatim/tokenizer/sanitizers/tag_japanese.py @@ -17,7 +17,34 @@ from typing import List, Optional from nominatim.tokenizer.sanitizers.base import ProcessInfo from nominatim.tokenizer.sanitizers.config import SanitizerConfig from nominatim.data.place_name import PlaceName -from nominatim.tokenizer.sanitizers.kanji_utils import convert_kanji_sequence_to_number + +KANJI_MAP = { + '零': '0', + '一': '1', + '二': '2', + '三': '3', + '四': '4', + '五': '5', + '六': '6', + '七': '7', + '八': '8', + '九': '9' + } + +def convert_kanji_sequence_to_number(sequence: str) -> str: + """Converts Kanji numbers to Arabic numbers + """ + converted = '' + current_number = '' + for char in sequence: + if char in KANJI_MAP: + current_number += KANJI_MAP[char] + else: + converted += current_number + current_number = '' + converted += char + converted += current_number + return converted def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]: #def create(config: SanitizerConfig) -> Callable[[ProcessInfo],None]: