From fac8c32cda12d6684ad2232ac255324bd9b85e43 Mon Sep 17 00:00:00 2001 From: miku0 Date: Wed, 26 Jul 2023 21:43:22 +0000 Subject: [PATCH] Moved KANJI_MAP to global variable --- nominatim/tokenizer/sanitizers/kanji_utils.py | 36 ------------------- .../tokenizer/sanitizers/tag_japanese.py | 29 ++++++++++++++- 2 files changed, 28 insertions(+), 37 deletions(-) delete mode 100644 nominatim/tokenizer/sanitizers/kanji_utils.py diff --git a/nominatim/tokenizer/sanitizers/kanji_utils.py b/nominatim/tokenizer/sanitizers/kanji_utils.py deleted file mode 100644 index 69561523..00000000 --- a/nominatim/tokenizer/sanitizers/kanji_utils.py +++ /dev/null @@ -1,36 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later -# -# This file is part of Nominatim. (https://nominatim.org) -# -# Copyright (C) 2022 by the Nominatim developer community. -# For a full list of authors see the git log. -""" -This is a file for a function that converts Kanji (Japanese) numerals to Arabic numerals. -""" - -def convert_kanji_sequence_to_number(sequence: str) -> str: - """Converts Kanji numbers to Arabic numbers - """ - kanji_map = { - '零': '0', - '一': '1', - '二': '2', - '三': '3', - '四': '4', - '五': '5', - '六': '6', - '七': '7', - '八': '8', - '九': '9' - } - converted = '' - current_number = '' - for char in sequence: - if char in kanji_map: - current_number += kanji_map[char] - else: - converted += current_number - current_number = '' - converted += char - converted += current_number - return converted diff --git a/nominatim/tokenizer/sanitizers/tag_japanese.py b/nominatim/tokenizer/sanitizers/tag_japanese.py index dffd9559..723a6dfa 100644 --- a/nominatim/tokenizer/sanitizers/tag_japanese.py +++ b/nominatim/tokenizer/sanitizers/tag_japanese.py @@ -17,7 +17,34 @@ from typing import List, Optional from nominatim.tokenizer.sanitizers.base import ProcessInfo from nominatim.tokenizer.sanitizers.config import SanitizerConfig from nominatim.data.place_name import PlaceName -from nominatim.tokenizer.sanitizers.kanji_utils import convert_kanji_sequence_to_number + +KANJI_MAP = { + '零': '0', + '一': '1', + '二': '2', + '三': '3', + '四': '4', + '五': '5', + '六': '6', + '七': '7', + '八': '8', + '九': '9' + } + +def convert_kanji_sequence_to_number(sequence: str) -> str: + """Converts Kanji numbers to Arabic numbers + """ + converted = '' + current_number = '' + for char in sequence: + if char in KANJI_MAP: + current_number += KANJI_MAP[char] + else: + converted += current_number + current_number = '' + converted += char + converted += current_number + return converted def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]: #def create(config: SanitizerConfig) -> Callable[[ProcessInfo],None]: -- 2.45.1