\r
\r
"""\r
-from typing import Callable, List, Optional, Pattern, Tuple, Sequence\r
-import re\r
+from typing import Callable, List, Tuple, Sequence\r
\r
from nominatim.tokenizer.sanitizers.base import ProcessInfo\r
from nominatim.data.place_name import PlaceName\r
\r
def __init__(self, config: SanitizerConfig) -> None:\r
self.type = config.get('type', 'name')\r
- self.filter_kind = config.get_filter_kind()\r
+ self.filter_kind = config.get_filter('filter-kind')\r
self.country_codes = config.get_string_list('country_code', [])\r
- self.allowed_ranks = self._set_allowed_ranks( \\r
- config.get_string_list('rank_address', ['0-30']))\r
+ self.filter_suffix = config.get_filter('suffix')\r
+ self.filter_name = config.get_filter('name')\r
+ self.allowed_ranks = self._set_allowed_ranks(\r
+ config.get_string_list("rank_address", ["0-30"])\r
+ )\r
\r
self.has_country_code = config.get('country_code', None) is not None\r
\r
- suffixregexps = config.get_string_list('suffix', [r'[\s\S]*'])\r
- self.suffix_regexp = [re.compile(r) for r in suffixregexps]\r
-\r
- nameregexps = config.get_string_list('name', [r'[\s\S]*'])\r
- self.name_regexp = [re.compile(r) for r in nameregexps]\r
-\r
-\r
\r
def __call__(self, obj: ProcessInfo) -> None:\r
tags = obj.names if self.type == 'name' else obj.address\r
\r
- if (not tags or\r
- self.has_country_code and\r
- obj.place.country_code not in self.country_codes or\r
- not self.allowed_ranks[obj.place.rank_address]):\r
+ if not tags \\r
+ or not self.allowed_ranks[obj.place.rank_address] \\r
+ or self.has_country_code \\r
+ and obj.place.country_code not in self.country_codes:\r
return\r
\r
filtered_tags: List[PlaceName] = []\r
\r
for tag in tags:\r
\r
- if (not self.filter_kind(tag.kind) or\r
- not self._matches(tag.suffix, self.suffix_regexp) or\r
- not self._matches(tag.name, self.name_regexp)):\r
+ if not self.filter_kind(tag.kind) \\r
+ or not self.filter_suffix(tag.suffix or '') \\r
+ or not self.filter_name(tag.name):\r
filtered_tags.append(tag)\r
\r
\r
for rank in ranks:\r
intvl = [int(x) for x in rank.split('-')]\r
\r
- start, end = (intvl[0], intvl[0]) if len(intvl) == 1 else (intvl[0], intvl[1])\r
+ start, end = intvl[0], intvl[0] if len(intvl) == 1 else intvl[1]\r
\r
for i in range(start, end + 1):\r
allowed_ranks[i] = True\r
return tuple(allowed_ranks)\r
\r
\r
- def _matches(self, value: Optional[str], patterns: List[Pattern[str]]) -> bool:\r
- """ Returns True if the given value fully matches any of the regular\r
- expression pattern in the list. Otherwise, returns False.\r
-\r
- Note that if the value is None, it is taken as an empty string.\r
- """\r
- target = '' if value is None else value\r
- return any(r.fullmatch(target) is not None for r in patterns)\r
-\r
-\r
-\r
def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:\r
""" Create a function to process removal of certain tags.\r
"""\r