]> git.openstreetmap.org Git - nominatim.git/commitdiff
Resolve conflicts
authorAntoJvlt <antonin.jolivat@gmail.com>
Mon, 17 May 2021 11:52:35 +0000 (13:52 +0200)
committerAntoJvlt <antonin.jolivat@gmail.com>
Mon, 17 May 2021 11:52:35 +0000 (13:52 +0200)
1  2 
docs/admin/Import.md
nominatim/tokenizer/legacy_icu_tokenizer.py
nominatim/tokenizer/legacy_tokenizer.py
test/bdd/steps/nominatim_environment.py
test/python/dummy_tokenizer.py
test/python/test_cli.py
test/python/test_tokenizer_legacy.py
test/python/test_tokenizer_legacy_icu.py

Simple merge
index e07602d90aea7192939d0abf2c0c36240a56c2b3,7205ddefab0c449ec33da6610fe98edb8cfb48ba..156e99ece67f156d463f5d7e84858dcb7b80c027
@@@ -285,28 -295,47 +295,47 @@@ class LegacyICUNameAnalyzer
  
          return self.transliterator.transliterate(hnr)
  
-     def add_postcodes_from_db(self):
-         """ Add postcodes from the location_postcode table to the word table.
+     def update_postcodes_from_db(self):
+         """ Update postcode tokens in the word table from the location_postcode
+             table.
          """
+         to_delete = []
          copystr = io.StringIO()
          with self.conn.cursor() as cur:
-             cur.execute("SELECT distinct(postcode) FROM location_postcode")
-             for (postcode, ) in cur:
-                 copystr.write(postcode)
-                 copystr.write('\t ')
-                 copystr.write(self.transliterator.transliterate(postcode))
-                 copystr.write('\tplace\tpostcode\t0\n')
-             copystr.seek(0)
-             cur.copy_from(copystr, 'word',
-                           columns=['word', 'word_token', 'class', 'type',
-                                    'search_name_count'])
-             # Don't really need an ID for postcodes....
-             # cur.execute("""UPDATE word SET word_id = nextval('seq_word')
-             #                WHERE word_id is null and type = 'postcode'""")
+             # This finds us the rows in location_postcode and word that are
+             # missing in the other table.
+             cur.execute("""SELECT * FROM
+                             (SELECT pc, word FROM
+                               (SELECT distinct(postcode) as pc FROM location_postcode) p
+                               FULL JOIN
+                               (SELECT word FROM word
+                                 WHERE class ='place' and type = 'postcode') w
+                               ON pc = word) x
+                            WHERE pc is null or word is null""")
+             for postcode, word in cur:
+                 if postcode is None:
+                     to_delete.append(word)
+                 else:
+                     copystr.write(postcode)
+                     copystr.write('\t ')
+                     copystr.write(self.transliterator.transliterate(postcode))
+                     copystr.write('\tplace\tpostcode\t0\n')
+             if to_delete:
+                 cur.execute("""DELETE FROM WORD
+                                WHERE class ='place' and type = 'postcode'
+                                      and word = any(%s)
+                             """, (to_delete, ))
+             if copystr.getvalue():
+                 copystr.seek(0)
+                 cur.copy_from(copystr, 'word',
+                               columns=['word', 'word_token', 'class', 'type',
+                                        'search_name_count'])
  
  
 -    def update_special_phrases(self, phrases):
 +    def update_special_phrases(self, phrases, should_replace):
          """ Replace the search index for special phrases with the new phrases.
          """
          norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
index 5bd45c51284f211ffc78b4fa4f25a5e169a19d2e,3808c68e069f3f00b7f76bb84847d7b43ccb4ba0..4c03678d12e0c95ab613d44f2c152febd2e6294c
@@@ -305,16 -305,54 +305,54 @@@ class LegacyNameAnalyzer
          return self.normalizer.transliterate(phrase)
  
  
-     def add_postcodes_from_db(self):
-         """ Add postcodes from the location_postcode table to the word table.
+     @staticmethod
+     def normalize_postcode(postcode):
+         """ Convert the postcode to a standardized form.
+             This function must yield exactly the same result as the SQL function
+             'token_normalized_postcode()'.
+         """
+         return postcode.strip().upper()
+     def update_postcodes_from_db(self):
+         """ Update postcode tokens in the word table from the location_postcode
+             table.
          """
          with self.conn.cursor() as cur:
-             cur.execute("""SELECT count(create_postcode_id(pc))
-                            FROM (SELECT distinct(postcode) as pc
-                                  FROM location_postcode) x""")
+             # This finds us the rows in location_postcode and word that are
+             # missing in the other table.
+             cur.execute("""SELECT * FROM
+                             (SELECT pc, word FROM
+                               (SELECT distinct(postcode) as pc FROM location_postcode) p
+                               FULL JOIN
+                               (SELECT word FROM word
+                                 WHERE class ='place' and type = 'postcode') w
+                               ON pc = word) x
+                            WHERE pc is null or word is null""")
+             to_delete = []
+             to_add = []
+             for postcode, word in cur:
+                 if postcode is None:
+                     to_delete.append(word)
+                 else:
+                     to_add.append(postcode)
+             if to_delete:
+                 cur.execute("""DELETE FROM WORD
+                                WHERE class ='place' and type = 'postcode'
+                                      and word = any(%s)
+                             """, (to_delete, ))
+             if to_add:
+                 cur.execute("""SELECT count(create_postcode_id(pc))
+                                FROM unnest(%s) as pc
+                             """, (to_add, ))
  
  
 -    def update_special_phrases(self, phrases):
 +    def update_special_phrases(self, phrases, should_replace):
          """ Replace the search index for special phrases with the new phrases.
          """
          norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
index 2e61a24524c992e8cc41161fe7fffc5d6e11d6a4,0a86ba8d1598752a00af622e3466f4deede31c0e..18e322caef3d5642939698ed5d83c22726058150
@@@ -51,10 -51,13 +51,13 @@@ class DummyNameAnalyzer
      def close(self):
          pass
  
-     def add_postcodes_from_db(self):
+     def normalize_postcode(self, postcode):
+         return postcode
+     def update_postcodes_from_db(self):
          pass
  
 -    def update_special_phrases(self, phrases):
 +    def update_special_phrases(self, phrases, should_replace):
          self.analyser_cache['special_phrases'] = phrases
  
      def add_country_names(self, code, names):
Simple merge
index 801471723c4b72b5be4c4f6938c188b059639408,15ae50a4ce94175b78fd444d97c9bb0a4e5ab2e5..76b51f717e93e8ca08de78433cdd3d31d15a8dad
@@@ -209,10 -221,9 +221,9 @@@ def test_update_special_phrase_empty_ta
          ("König bei", "amenity", "royal", "near"),
          ("Könige", "amenity", "royal", "-"),
          ("strasse", "highway", "primary", "in")
 -    ])
 +    ], True)
  
-     assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
-                                      FROM word WHERE class != 'place'""") \
+     assert word_table.get_special() \
                 == set(((' könig bei', 'könig bei', 'amenity', 'royal', 'near'),
                         (' könige', 'könige', 'amenity', 'royal', None),
                         (' strasse', 'strasse', 'highway', 'primary', 'in')))
  
  def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor,
                                            make_standard_name):
-     temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
-                               VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
-                                      (' bar', 'bar', 'highway', 'road', null)""")
+     word_table.add_special(' foo', 'foo', 'amenity', 'prison', 'in')
+     word_table.add_special(' bar', 'bar', 'highway', 'road', None)
  
-     assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+     assert word_table.count_special() == 2
  
 -    analyzer.update_special_phrases([])
 +    analyzer.update_special_phrases([], True)
  
-     assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+     assert word_table.count_special() == 0
  
  
- def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
-                                       make_standard_name):
-     temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
-                               VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
-                                      (' bar', 'bar', 'highway', 'road', null)""")
 +def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,
 +                                          make_standard_name):
 +    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
 +                              VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
 +                                     (' bar', 'bar', 'highway', 'road', null)""")
 +
 +    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
 +
 +    analyzer.update_special_phrases([], False)
 +
 +    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
 +
 +
+ def test_update_special_phrase_modify(analyzer, word_table, make_standard_name):
+     word_table.add_special(' foo', 'foo', 'amenity', 'prison', 'in')
+     word_table.add_special(' bar', 'bar', 'highway', 'road', None)
  
-     assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+     assert word_table.count_special() == 2
  
      analyzer.update_special_phrases([
        ('prison', 'amenity', 'prison', 'in'),
        ('bar', 'highway', 'road', '-'),
        ('garden', 'leisure', 'garden', 'near')
 -    ])
 +    ], True)
  
-     assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
-                                      FROM word WHERE class != 'place'""") \
+     assert word_table.get_special() \
                 == set(((' prison', 'prison', 'amenity', 'prison', 'in'),
                         (' bar', 'bar', 'highway', 'road', None),
                         (' garden', 'garden', 'leisure', 'garden', 'near')))