From 848e5ac5de510c1a1ae1a01107453e61a0895b97 Mon Sep 17 00:00:00 2001 From: miku0 Date: Wed, 26 Jul 2023 09:50:25 +0000 Subject: [PATCH] Correction to PR's comment --- nominatim/tokenizer/sanitizers/kanji_utils.py | 36 ++++++++++++++++++ .../tokenizer/sanitizers/tag_japanese.py | 38 +++---------------- 2 files changed, 42 insertions(+), 32 deletions(-) create mode 100644 nominatim/tokenizer/sanitizers/kanji_utils.py diff --git a/nominatim/tokenizer/sanitizers/kanji_utils.py b/nominatim/tokenizer/sanitizers/kanji_utils.py new file mode 100644 index 00000000..69561523 --- /dev/null +++ b/nominatim/tokenizer/sanitizers/kanji_utils.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +This is a file for a function that converts Kanji (Japanese) numerals to Arabic numerals. +""" + +def convert_kanji_sequence_to_number(sequence: str) -> str: + """Converts Kanji numbers to Arabic numbers + """ + kanji_map = { + '零': '0', + '一': '1', + '二': '2', + '三': '3', + '四': '4', + '五': '5', + '六': '6', + '七': '7', + '八': '8', + '九': '9' + } + converted = '' + current_number = '' + for char in sequence: + if char in kanji_map: + current_number += kanji_map[char] + else: + converted += current_number + current_number = '' + converted += char + converted += current_number + return converted diff --git a/nominatim/tokenizer/sanitizers/tag_japanese.py b/nominatim/tokenizer/sanitizers/tag_japanese.py index 81d3d5b3..dffd9559 100644 --- a/nominatim/tokenizer/sanitizers/tag_japanese.py +++ b/nominatim/tokenizer/sanitizers/tag_japanese.py @@ -12,11 +12,12 @@ and quarter and neighbourhood with place. from typing import Callable -from typing import List +from typing import List, Optional from nominatim.tokenizer.sanitizers.base import ProcessInfo from nominatim.tokenizer.sanitizers.config import SanitizerConfig from nominatim.data.place_name import PlaceName +from nominatim.tokenizer.sanitizers.kanji_utils import convert_kanji_sequence_to_number def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]: #def create(config: SanitizerConfig) -> Callable[[ProcessInfo],None]: @@ -25,37 +26,10 @@ def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]: return tag_japanese #return tag_japanese(config) -def convert_kanji_sequence_to_number(sequence: str) -> str: - """Converts Kanji numbers to Arabic numbers - """ - kanji_map = { - '零': '0', - '一': '1', - '二': '2', - '三': '3', - '四': '4', - '五': '5', - '六': '6', - '七': '7', - '八': '8', - '九': '9' - } - converted = '' - current_number = '' - for char in sequence: - if char in kanji_map: - current_number += kanji_map[char] - else: - converted += current_number - current_number = '' - converted += char - converted += current_number - return converted - def reconbine_housenumber( new_address: List[PlaceName], - tmp_housenumber: str | None, - tmp_blocknumber: str | None + tmp_housenumber: Optional[str], + tmp_blocknumber: Optional[str] ) -> List[PlaceName]: """ Recombine the tag of housenumber by using housenumber and blocknumber """ @@ -87,8 +61,8 @@ def reconbine_housenumber( def reconbine_place( new_address: List[PlaceName], - tmp_neighbourhood: str | None, - tmp_quarter: str | None + tmp_neighbourhood: Optional[str], + tmp_quarter: Optional[str] ) -> List[PlaceName]: """ Recombine the tag of place by using neighbourhood and quarter """ -- 2.39.5