From 247afe1f56098c0fa0f7f15c5a7df9278ecaaad5 Mon Sep 17 00:00:00 2001 From: marc tobias Date: Sat, 23 Aug 2025 01:06:35 +0200 Subject: [PATCH] sanetizer no longer strips name parts in brackets when more parts follow --- src/nominatim_db/tokenizer/sanitizers/strip_brace_terms.py | 4 +++- test/python/tokenizer/sanitizers/test_strip_brace_terms.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/nominatim_db/tokenizer/sanitizers/strip_brace_terms.py b/src/nominatim_db/tokenizer/sanitizers/strip_brace_terms.py index 166ba865..7c3bb7e3 100644 --- a/src/nominatim_db/tokenizer/sanitizers/strip_brace_terms.py +++ b/src/nominatim_db/tokenizer/sanitizers/strip_brace_terms.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2024 by the Nominatim developer community. +# Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ This sanitizer creates additional name variants for names that have @@ -25,6 +25,8 @@ def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]: if obj.names: new_names = [] for name in (n for n in obj.names if '(' in n.name): + if ')' in name.name and not name.name.endswith(')'): + continue new_name = name.name.split('(')[0].strip() if new_name: new_names.append(name.clone(name=new_name)) diff --git a/test/python/tokenizer/sanitizers/test_strip_brace_terms.py b/test/python/tokenizer/sanitizers/test_strip_brace_terms.py index 6e891f9e..8aece57f 100644 --- a/test/python/tokenizer/sanitizers/test_strip_brace_terms.py +++ b/test/python/tokenizer/sanitizers/test_strip_brace_terms.py @@ -34,6 +34,8 @@ class TestStripBrace: == [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)] assert self.run_sanitizer_on(name='ack ( bar') \ == [('ack', 'name', None), ('ack ( bar', 'name', None)] + assert self.run_sanitizer_on(name='Berlin (Ost) Hauptbahnhof') \ + == [('Berlin (Ost) Hauptbahnhof', 'name', None)] def test_only_braces(self): assert self.run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)] -- 2.39.5