1 # SPDX-License-Identifier: GPL-3.0-or-later
 
   3 # This file is part of Nominatim. (https://nominatim.org)
 
   5 # Copyright (C) 2025 by the Nominatim developer community.
 
   6 # For a full list of authors see the git log.
 
   8 Tests for special postcode analysis and variant generation.
 
  12 from icu import Transliterator
 
  14 import nominatim_db.tokenizer.token_analysis.postcodes as module
 
  15 from nominatim_db.data.place_name import PlaceName
 
  17 DEFAULT_NORMALIZATION = """ :: NFD ();
 
  19                             [[:Nonspacing Mark:] [:Cf:]] >;
 
  21                             [[:Punctuation:][:Space:]]+ > ' ';
 
  25 DEFAULT_TRANSLITERATION = """ ::  Latin ();
 
  32     rules = {'analyzer': 'postcodes'}
 
  33     config = module.configure(rules, DEFAULT_NORMALIZATION)
 
  35     trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
 
  36     norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
 
  38     return module.create(norm, trans, config)
 
  41 def get_normalized_variants(proc, name):
 
  42     norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
 
  43     return proc.compute_variants(norm.transliterate(name).strip())
 
  46 @pytest.mark.parametrize('name,norm', [('12', '12'),
 
  49 def test_get_canonical_id(analyser, name, norm):
 
  50     assert analyser.get_canonical_id(PlaceName(name=name, kind='', suffix='')) == norm
 
  53 @pytest.mark.parametrize('postcode,variants', [('12345', {'12345'}),
 
  54                                                ('AB-998', {'ab 998', 'ab998'}),
 
  55                                                ('23 FGH D3', {'23 fgh d3', '23fgh d3',
 
  56                                                               '23 fghd3', '23fghd3'})])
 
  57 def test_compute_variants(analyser, postcode, variants):
 
  58     out = analyser.compute_variants(postcode)
 
  60     assert len(out) == len(set(out))
 
  61     assert set(out) == variants