From 47adb2a3fc0eec5f87e30e200be0c8f86bbcc1c7 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 12 Jul 2021 11:53:25 +0200 Subject: [PATCH] reorganise process_place function Move address processing into its own function as it is rather extensive. --- nominatim/tokenizer/legacy_icu_tokenizer.py | 47 ++++++++++--------- nominatim/tokenizer/legacy_tokenizer.py | 52 ++++++++++----------- 2 files changed, 51 insertions(+), 48 deletions(-) diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py index c585c5af..12ee0404 100644 --- a/nominatim/tokenizer/legacy_icu_tokenizer.py +++ b/nominatim/tokenizer/legacy_icu_tokenizer.py @@ -411,33 +411,36 @@ class LegacyICUNameAnalyzer: self.add_country_names(country_feature.lower(), names) address = place.get('address') - if address: - hnrs = [] - addr_terms = [] - for key, value in address.items(): - if key == 'postcode': - self._add_postcode(value) - elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'): - hnrs.append(value) - elif key == 'street': - token_info.add_street(*self._compute_name_tokens({'name': value})) - elif key == 'place': - token_info.add_place(*self._compute_name_tokens({'name': value})) - elif not key.startswith('_') and \ - key not in ('country', 'full'): - addr_terms.append((key, *self._compute_name_tokens({'name': value}))) - - if hnrs: - hnrs = self._split_housenumbers(hnrs) - token_info.add_housenumbers(self.conn, [self._make_standard_hnr(n) for n in hnrs]) - - if addr_terms: - token_info.add_address_terms(addr_terms) + self._process_place_address(token_info, address) return token_info.data + def _process_place_address(self, token_info, address): + hnrs = [] + addr_terms = [] + for key, value in address.items(): + if key == 'postcode': + self._add_postcode(value) + elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'): + hnrs.append(value) + elif key == 'street': + token_info.add_street(*self._compute_name_tokens({'name': value})) + elif key == 'place': + token_info.add_place(*self._compute_name_tokens({'name': value})) + elif not key.startswith('_') and \ + key not in ('country', 'full'): + addr_terms.append((key, *self._compute_name_tokens({'name': value}))) + + if hnrs: + hnrs = self._split_housenumbers(hnrs) + token_info.add_housenumbers(self.conn, [self._make_standard_hnr(n) for n in hnrs]) + + if addr_terms: + token_info.add_address_terms(addr_terms) + + def _compute_name_tokens(self, names): """ Computes the full name and partial name tokens for the given dictionary of names. diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index 6040f88f..24af1c3a 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -424,37 +424,37 @@ class LegacyNameAnalyzer: self.add_country_names(country_feature.lower(), names) address = place.get('address') - if address: - hnrs = [] - addr_terms = [] - for key, value in address.items(): - if key == 'postcode': - self._add_postcode(value) - elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'): - hnrs.append(value) - elif key == 'street': - token_info.add_street(self.conn, value) - elif key == 'place': - token_info.add_place(self.conn, value) - elif not key.startswith('_') and \ - key not in ('country', 'full'): - addr_terms.append((key, value)) - - if hnrs: - token_info.add_housenumbers(self.conn, hnrs) - - if addr_terms: - token_info.add_address_terms(self.conn, addr_terms) + self._process_place_address(token_info, address) return token_info.data - def _add_postcode(self, postcode): - """ Make sure the normalized postcode is present in the word table. - """ - if re.search(r'[:,;]', postcode) is None: - self._cache.add_postcode(self.conn, self.normalize_postcode(postcode)) + def _process_place_address(self, token_info, address): + hnrs = [] + addr_terms = [] + + for key, value in address.items(): + if key == 'postcode': + # Make sure the normalized postcode is present in the word table. + if re.search(r'[:,;]', value) is None: + self._cache.add_postcode(self.conn, + self.normalize_postcode(value)) + elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'): + hnrs.append(value) + elif key == 'street': + token_info.add_street(self.conn, value) + elif key == 'place': + token_info.add_place(self.conn, value) + elif not key.startswith('_') and key not in ('country', 'full'): + addr_terms.append((key, value)) + + if hnrs: + token_info.add_housenumbers(self.conn, hnrs) + + if addr_terms: + token_info.add_address_terms(self.conn, addr_terms) + class _TokenInfo: -- 2.45.1