Merge pull request #2393 from lonvia/fix-flake8-issues

author Sarah Hoffmann <lonvia@denofr.de>

Tue, 13 Jul 2021 14:46:12 +0000 (16:46 +0200)

committer GitHub <noreply@github.com>

Tue, 13 Jul 2021 14:46:12 +0000 (16:46 +0200)
author Sarah Hoffmann <lonvia@denofr.de>
Tue, 13 Jul 2021 14:46:12 +0000 (16:46 +0200)
committer GitHub <noreply@github.com>
Tue, 13 Jul 2021 14:46:12 +0000 (16:46 +0200)
diff --git a/nominatim/cli.py b/nominatim/cli.py

index 533a920e07e5937d689be322e056167ca3135884..5626deb4b5aa6d503e9efd345086f85617ffd487 100644 (file)
--- a/nominatim/cli.py
+++ b/nominatim/cli.py
@@ -103,7 +103,7 @@ class CommandlineParser:
          return 1
  
  
-##### Subcommand classes
+# Subcommand classes
  #
  # Each class needs to implement two functions: add_args() adds the CLI parameters
  # for the subfunction, run() executes the subcommand.
diff --git a/nominatim/clicmd/api.py b/nominatim/clicmd/api.py

index a555695224403fd3b647f9601f5888f78b506bfb..b99d37b87430631fe95fcdca4d59325dabc94c71 100644 (file)
--- a/nominatim/clicmd/api.py
+++ b/nominatim/clicmd/api.py
@@ -90,7 +90,7 @@ class APISearch:
          if args.query:
              params = dict(q=args.query)
          else:
-            params = {k : getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)}
+            params = {k: getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)}
  
          for param, _ in EXTRADATA_PARAMS:
              if getattr(args, param):
diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py

index ee1941875d56b8c1007cf6bea1d222672080fd22..996f48f26dbffeae3306d6b4afd13853657ea02b 100644 (file)
--- a/nominatim/clicmd/args.py
+++ b/nominatim/clicmd/args.py
@@ -24,4 +24,4 @@ class NominatimArgs:
                                       main_data=self.config.TABLESPACE_PLACE_DATA,
                                       main_index=self.config.TABLESPACE_PLACE_INDEX
                                      )
-                    )
+                   )
diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py

index fbc23350c7212478f8c8b0c9fa484ec154f9f070..969998ad5063d3233882c2db8084fc70f4b13194 100644 (file)
--- a/nominatim/clicmd/refresh.py
+++ b/nominatim/clicmd/refresh.py
@@ -61,7 +61,7 @@ class UpdateRefresh:
                                    args.threads or 1)
                  indexer.index_postcodes()
              else:
-                LOG.error("The place table doesn\'t exist. " \
+                LOG.error("The place table doesn't exist. "
                            "Postcode updates on a frozen database is not possible.")
  
          if args.word_counts:
diff --git a/nominatim/clicmd/replication.py b/nominatim/clicmd/replication.py

index 242b0f6a0b00c80bacc05ef01b97823e6bed2dfd..4c8cd44e2a77d351015600b3112bc4dca8d9f8ca 100644 (file)
--- a/nominatim/clicmd/replication.py
+++ b/nominatim/clicmd/replication.py
@@ -93,7 +93,7 @@ class UpdateReplication:
                        indexed_only=not args.once)
  
          # Sanity check to not overwhelm the Geofabrik servers.
-        if 'download.geofabrik.de'in params['base_url']\
+        if 'download.geofabrik.de' in params['base_url']\
             and params['update_interval'] < 86400:
              LOG.fatal("Update interval too low for download.geofabrik.de.\n"
                        "Please check install documentation "
diff --git a/nominatim/db/async_connection.py b/nominatim/db/async_connection.py

index f06f3159f7b392c30526e988ef86aed5ecd4035a..a86c5bdcee13eb37c4874f29be286d2b06beecc0 100644 (file)
--- a/nominatim/db/async_connection.py
+++ b/nominatim/db/async_connection.py
@@ -85,7 +85,7 @@ class DBConnection:
  
          # Use a dict to hand in the parameters because async is a reserved
          # word in Python3.
-        self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
+        self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True})
          self.wait()
  
          self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py

index ac8d7c858090a725142fc9a3bf8546baf6caac8b..1319ac16ea21c6e3d9424ea3bd205dd369768b4d 100644 (file)
--- a/nominatim/db/connection.py
+++ b/nominatim/db/connection.py
@@ -8,6 +8,7 @@ import os
  import psycopg2
  import psycopg2.extensions
  import psycopg2.extras
+from psycopg2 import sql as pysql
  
  from nominatim.errors import UsageError
  
@@ -25,6 +26,16 @@ class _Cursor(psycopg2.extras.DictCursor):
  
          super().execute(query, args)
  
+
+    def execute_values(self, sql, argslist, template=None):
+        """ Wrapper for the psycopg2 convenience function to execute
+            SQL for a list of values.
+        """
+        LOG.debug("SQL execute_values(%s, %s)", sql, argslist)
+
+        psycopg2.extras.execute_values(self, sql, argslist, template=template)
+
+
      def scalar(self, sql, args=None):
          """ Execute query that returns a single value. The value is returned.
              If the query yields more than one row, a ValueError is raised.
@@ -37,6 +48,22 @@ class _Cursor(psycopg2.extras.DictCursor):
          return self.fetchone()[0]
  
  
+    def drop_table(self, name, if_exists=True, cascade=False):
+        """ Drop the table with the given name.
+            Set `if_exists` to False if a non-existant table should raise
+            an exception instead of just being ignored. If 'cascade' is set
+            to True then all dependent tables are deleted as well.
+        """
+        sql = 'DROP TABLE '
+        if if_exists:
+            sql += 'IF EXISTS '
+        sql += '{}'
+        if cascade:
+            sql += ' CASCADE'
+
+        self.execute(pysql.SQL(sql).format(pysql.Identifier(name)))
+
+
  class _Connection(psycopg2.extensions.connection):
      """ A connection that provides the specialised cursor by default and
          adds convenience functions for administrating the database.
@@ -75,14 +102,13 @@ class _Connection(psycopg2.extensions.connection):
          return True
  
  
-    def drop_table(self, name, if_exists=True):
+    def drop_table(self, name, if_exists=True, cascade=False):
          """ Drop the table with the given name.
              Set `if_exists` to False if a non-existant table should raise
              an exception instead of just being ignored.
          """
          with self.cursor() as cur:
-            cur.execute("""DROP TABLE {} "{}"
-                        """.format('IF EXISTS' if if_exists else '', name))
+            cur.drop_table(name, if_exists, cascade)
          self.commit()
  
  
diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py

index dafc5de434bb3bf69a69014d7d7e20a2059f9313..d756a215618d316499af1261c4f48a35c801b20c 100644 (file)
--- a/nominatim/db/sql_preprocessor.py
+++ b/nominatim/db/sql_preprocessor.py
@@ -61,7 +61,7 @@ def _setup_postgresql_features(conn):
      """
      pg_version = conn.server_version_tuple()
      return {
-        'has_index_non_key_column' : pg_version >= (11, 0, 0)
+        'has_index_non_key_column': pg_version >= (11, 0, 0)
      }
  
  class SQLPreprocessor:
diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py

index 4d4305e7d67ff74c93119bbc67ef4acfa7036e2c..9a4a41a581661ced3048797b7ae1ff98d613dad0 100644 (file)
--- a/nominatim/db/utils.py
+++ b/nominatim/db/utils.py
@@ -61,9 +61,9 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None)
  
  
  # List of characters that need to be quoted for the copy command.
-_SQL_TRANSLATION = {ord(u'\\') : u'\\\\',
-                    ord(u'\t') : u'\\t',
-                    ord(u'\n') : u'\\n'}
+_SQL_TRANSLATION = {ord(u'\\'): u'\\\\',
+                    ord(u'\t'): u'\\t',
+                    ord(u'\n'): u'\\n'}
  
  class CopyBuffer:
      """ Data collector for the copy_from command.
diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py

index 76883500b7a23469b1332d0ca0305551349b4dc6..d0cfb391c4dbdf7a63c875af6ec1b2d98ca88d0c 100644 (file)
--- a/nominatim/indexer/indexer.py
+++ b/nominatim/indexer/indexer.py
@@ -203,7 +203,7 @@ class Indexer:
  
                                  # And insert the curent batch
                                  for idx in range(0, len(places), batch):
-                                    part = places[idx:idx+batch]
+                                    part = places[idx:idx + batch]
                                      LOG.debug("Processing places: %s", str(part))
                                      runner.index_places(pool.next_free_worker(), part)
                                      progress.add(len(part))
diff --git a/nominatim/indexer/progress.py b/nominatim/indexer/progress.py

index 177e67b812aef0ea05116928c214ed5434f5a622..634b1fae703670d838bf1633a2d222b427dd43fa 100644 (file)
--- a/nominatim/indexer/progress.py
+++ b/nominatim/indexer/progress.py
@@ -63,7 +63,7 @@ class ProgressLogger:
              places_per_sec = self.done_places
          else:
              diff_seconds = (rank_end_time - self.rank_start_time).total_seconds()
-            places_per_sec = self.done_places/diff_seconds
+            places_per_sec = self.done_places / diff_seconds
  
          LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n",
                      self.done_places, self.total_places, int(diff_seconds),
diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py

index aa607faae3f3d48988ebddd738a49c90ba4bb607..068d7d0fa63edba9a45fe7aef9195d06d164996a 100644 (file)
--- a/nominatim/indexer/runners.py
+++ b/nominatim/indexer/runners.py
@@ -5,13 +5,17 @@ tasks.
  import functools
  
  import psycopg2.extras
+from psycopg2 import sql as pysql
  
  # pylint: disable=C0111
  
+def _mk_valuelist(template, num):
+    return pysql.SQL(',').join([pysql.SQL(template)] * num)
+
  class AbstractPlacexRunner:
      """ Returns SQL commands for indexing of the placex table.
      """
-    SELECT_SQL = 'SELECT place_id FROM placex'
+    SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ')
  
      def __init__(self, rank, analyzer):
          self.rank = rank
@@ -21,11 +25,12 @@ class AbstractPlacexRunner:
      @staticmethod
      @functools.lru_cache(maxsize=1)
      def _index_sql(num_places):
-        return """ UPDATE placex
-                   SET indexed_status = 0, address = v.addr, token_info = v.ti
-                   FROM (VALUES {}) as v(id, addr, ti)
-                   WHERE place_id = v.id
-               """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
+        return pysql.SQL(
+            """ UPDATE placex
+                SET indexed_status = 0, address = v.addr, token_info = v.ti
+                FROM (VALUES {}) as v(id, addr, ti)
+                WHERE place_id = v.id
+            """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
  
  
      @staticmethod
@@ -52,14 +57,15 @@ class RankRunner(AbstractPlacexRunner):
          return "rank {}".format(self.rank)
  
      def sql_count_objects(self):
-        return """SELECT count(*) FROM placex
-                  WHERE rank_address = {} and indexed_status > 0
-               """.format(self.rank)
+        return pysql.SQL("""SELECT count(*) FROM placex
+                            WHERE rank_address = {} and indexed_status > 0
+                         """).format(pysql.Literal(self.rank))
  
      def sql_get_objects(self):
-        return """{} WHERE indexed_status > 0 and rank_address = {}
-                     ORDER BY geometry_sector
-               """.format(self.SELECT_SQL, self.rank)
+        return self.SELECT_SQL + pysql.SQL(
+            """WHERE indexed_status > 0 and rank_address = {}
+               ORDER BY geometry_sector
+            """).format(pysql.Literal(self.rank))
  
  
  class BoundaryRunner(AbstractPlacexRunner):
@@ -71,17 +77,18 @@ class BoundaryRunner(AbstractPlacexRunner):
          return "boundaries rank {}".format(self.rank)
  
      def sql_count_objects(self):
-        return """SELECT count(*) FROM placex
-                  WHERE indexed_status > 0
-                    AND rank_search = {}
-                    AND class = 'boundary' and type = 'administrative'
-               """.format(self.rank)
+        return pysql.SQL("""SELECT count(*) FROM placex
+                            WHERE indexed_status > 0
+                              AND rank_search = {}
+                              AND class = 'boundary' and type = 'administrative'
+                         """).format(pysql.Literal(self.rank))
  
      def sql_get_objects(self):
-        return """{} WHERE indexed_status > 0 and rank_search = {}
-                           and class = 'boundary' and type = 'administrative'
-                     ORDER BY partition, admin_level
-               """.format(self.SELECT_SQL, self.rank)
+        return self.SELECT_SQL + pysql.SQL(
+            """WHERE indexed_status > 0 and rank_search = {}
+                     and class = 'boundary' and type = 'administrative'
+               ORDER BY partition, admin_level
+            """).format(pysql.Literal(self.rank))
  
  
  class InterpolationRunner:
@@ -120,11 +127,11 @@ class InterpolationRunner:
      @staticmethod
      @functools.lru_cache(maxsize=1)
      def _index_sql(num_places):
-        return """ UPDATE location_property_osmline
-                   SET indexed_status = 0, address = v.addr, token_info = v.ti
-                   FROM (VALUES {}) as v(id, addr, ti)
-                   WHERE place_id = v.id
-               """.format(','.join(["(%s, %s::hstore, %s::jsonb)"]  * num_places))
+        return pysql.SQL("""UPDATE location_property_osmline
+                            SET indexed_status = 0, address = v.addr, token_info = v.ti
+                            FROM (VALUES {}) as v(id, addr, ti)
+                            WHERE place_id = v.id
+                         """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
  
  
      def index_places(self, worker, places):
@@ -157,6 +164,6 @@ class PostcodeRunner:
  
      @staticmethod
      def index_places(worker, ids):
-        worker.perform(""" UPDATE location_postcode SET indexed_status = 0
-                           WHERE place_id IN ({})
-                       """.format(','.join((str(i[0]) for i in ids))))
+        worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0
+                                    WHERE place_id IN ({})""")
+                       .format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in ids))))
diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py

index 12ee0404f55d013cfe44c279d9578c2126ddc914..6d3d11c163eed81995b8c2c7c71f7870de5395ec 100644 (file)
--- a/nominatim/tokenizer/legacy_icu_tokenizer.py
+++ b/nominatim/tokenizer/legacy_icu_tokenizer.py
@@ -9,8 +9,6 @@ import re
  from textwrap import dedent
  from pathlib import Path
  
-import psycopg2.extras
-
  from nominatim.db.connection import connect
  from nominatim.db.properties import set_property, get_property
  from nominatim.db.utils import CopyBuffer
@@ -341,7 +339,7 @@ class LegacyICUNameAnalyzer:
                  term = self.name_processor.get_search_normalized(word)
                  if term:
                      copystr.add(word, ' ' + term, cls, typ,
-                                oper if oper in ('in', 'near')  else None, 0)
+                                oper if oper in ('in', 'near') else None, 0)
                      added += 1
  
              copystr.copy_out(cursor, 'word',
@@ -359,8 +357,7 @@ class LegacyICUNameAnalyzer:
          to_delete = existing_phrases - new_phrases
  
          if to_delete:
-            psycopg2.extras.execute_values(
-                cursor,
+            cursor.execute_values(
                  """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
                      WHERE word = name and class = in_class and type = in_type
                            and ((op = '-' and operator is null) or op = operator)""",
@@ -386,9 +383,9 @@ class LegacyICUNameAnalyzer:
              if word_tokens:
                  cur.execute("""INSERT INTO word (word_id, word_token, country_code,
                                                   search_name_count)
-                               (SELECT nextval('seq_word'), token, '{}', 0
+                               (SELECT nextval('seq_word'), token, %s, 0
                                  FROM unnest(%s) as token)
-                            """.format(country_code), (list(word_tokens),))
+                            """, (country_code, list(word_tokens)))
  
  
      def process_place(self, place):
diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py

index 24af1c3a1a4013f035cac265bb116f644c53c8a0..c19dce2f5a2a3c0d903cd13bca7bd6e3738a8008 100644 (file)
--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@ -370,8 +370,7 @@ class LegacyNameAnalyzer:
              to_delete = existing_phrases - norm_phrases
  
              if to_add:
-                psycopg2.extras.execute_values(
-                    cur,
+                cur.execute_values(
                      """ INSERT INTO word (word_id, word_token, word, class, type,
                                            search_name_count, operator)
                          (SELECT nextval('seq_word'), ' ' || make_standard_name(name), name,
@@ -381,8 +380,7 @@ class LegacyNameAnalyzer:
                      to_add)
  
              if to_delete and should_replace:
-                psycopg2.extras.execute_values(
-                    cur,
+                cur.execute_values(
                      """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
                          WHERE word = name and class = in_class and type = in_type
                                and ((op = '-' and operator is null) or op = operator)""",
@@ -582,7 +580,7 @@ class _TokenCache:
          with conn.cursor() as cur:
              cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
                             FROM generate_series(1, 100) as i""")
-            self._cached_housenumbers = {str(r[0]) : r[1] for r in cur}
+            self._cached_housenumbers = {str(r[0]): r[1] for r in cur}
  
          # For postcodes remember the ones that have already been added
          self.postcodes = set()
diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py

index d4f793b46334c27f304fbfe898268a4f23f564a9..d116554fea20f6e9b5e261adc2a48b0434fa5531 100644 (file)
--- a/nominatim/tools/check_database.py
+++ b/nominatim/tools/check_database.py
@@ -24,6 +24,7 @@ def _check(hint=None):
      """
      def decorator(func):
          title = func.__doc__.split('\n', 1)[0].strip()
+
          def run_check(conn, config):
              print(title, end=' ... ')
              ret = func(conn, config)
@@ -98,13 +99,12 @@ def _get_indexes(conn):
      if conn.table_exists('place'):
          indexes.extend(('idx_placex_pendingsector',
                          'idx_location_area_country_place_id',
-                        'idx_place_osm_unique'
-                       ))
+                        'idx_place_osm_unique'))
  
      return indexes
  
  
-### CHECK FUNCTIONS
+# CHECK FUNCTIONS
  #
  # Functions are exectured in the order they appear here.
  
diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py

index df82f9aaf4a6e042eae58e6dab378ed8cd422b3b..a4d7220fb8c73bcf7f38f1370c028122fb020b39 100644 (file)
--- a/nominatim/tools/database_import.py
+++ b/nominatim/tools/database_import.py
@@ -9,6 +9,7 @@ from pathlib import Path
  
  import psutil
  import psycopg2.extras
+from psycopg2 import sql as pysql
  
  from nominatim.db.connection import connect, get_pg_env
  from nominatim.db import utils as db_utils
@@ -184,7 +185,12 @@ def truncate_data_tables(conn):
  
      conn.commit()
  
-_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'
+
+_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier,
+                                        ('osm_type', 'osm_id', 'class', 'type',
+                                         'name', 'admin_level', 'address',
+                                         'extratags', 'geometry')))
+
  
  def load_data(dsn, threads):
      """ Copy data into the word and placex table.
@@ -195,12 +201,15 @@ def load_data(dsn, threads):
      for imod in range(place_threads):
          conn = DBConnection(dsn)
          conn.connect()
-        conn.perform("""INSERT INTO placex ({0})
-                         SELECT {0} FROM place
-                         WHERE osm_id % {1} = {2}
-                           AND NOT (class='place' and (type='houses' or type='postcode'))
-                           AND ST_IsValid(geometry)
-                     """.format(_COPY_COLUMNS, place_threads, imod))
+        conn.perform(
+            pysql.SQL("""INSERT INTO placex ({columns})
+                           SELECT {columns} FROM place
+                           WHERE osm_id % {total} = {mod}
+                             AND NOT (class='place' and (type='houses' or type='postcode'))
+                             AND ST_IsValid(geometry)
+                      """).format(columns=_COPY_COLUMNS,
+                                  total=pysql.Literal(place_threads),
+                                  mod=pysql.Literal(imod)))
          sel.register(conn, selectors.EVENT_READ, conn)
  
      # Address interpolations go into another table.
@@ -250,6 +259,7 @@ def create_search_indices(conn, config, drop=False):
  
      sql.run_sql_file(conn, 'indices.sql', drop=drop)
  
+
  def create_country_names(conn, tokenizer, languages=None):
      """ Add default country names to search index. `languages` is a comma-
          separated list of language codes as used in OSM. If `languages` is not
@@ -261,8 +271,7 @@ def create_country_names(conn, tokenizer, languages=None):
  
      def _include_key(key):
          return key == 'name' or \
-               (key.startswith('name:') \
-                and (not languages or key[5:] in languages))
+               (key.startswith('name:') and (not languages or key[5:] in languages))
  
      with conn.cursor() as cur:
          psycopg2.extras.register_hstore(cur)
@@ -271,7 +280,7 @@ def create_country_names(conn, tokenizer, languages=None):
  
          with tokenizer.name_analyzer() as analyzer:
              for code, name in cur:
-                names = {'countrycode' : code}
+                names = {'countrycode': code}
                  if code == 'gb':
                      names['short_name'] = 'UK'
                  if code == 'us':
diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py

index 560bb78166c54ba7859c5ea275522f8c70804f40..72d252b7a83abdeb434912729bd595cfb10b3f7a 100644 (file)
--- a/nominatim/tools/exec_utils.py
+++ b/nominatim/tools/exec_utils.py
@@ -136,11 +136,11 @@ def run_osm2pgsql(options):
  def get_url(url):
      """ Get the contents from the given URL and return it as a UTF-8 string.
      """
-    headers = {"User-Agent" : "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)}
+    headers = {"User-Agent": "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)}
  
      try:
          with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
              return response.read().decode('utf-8')
-    except:
+    except Exception:
          LOG.fatal('Failed to load URL: %s', url)
          raise
diff --git a/nominatim/tools/freeze.py b/nominatim/tools/freeze.py

index cc1bf97e09a92556cc1cc79cd47cfe03e605181c..a182fc8b32aa7c8ec05427d2d95587a4b567bb78 100644 (file)
--- a/nominatim/tools/freeze.py
+++ b/nominatim/tools/freeze.py
@@ -3,6 +3,8 @@ Functions for removing unnecessary data from the database.
  """
  from pathlib import Path
  
+from psycopg2 import sql as pysql
+
  UPDATE_TABLES = [
      'address_levels',
      'gb_postcode',
@@ -21,15 +23,15 @@ def drop_update_tables(conn):
      """ Drop all tables only necessary for updating the database from
          OSM replication data.
      """
-
-    where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES])
+    parts = (pysql.SQL("(tablename LIKE {})").format(pysql.Literal(t)) for t in UPDATE_TABLES)
  
      with conn.cursor() as cur:
-        cur.execute("SELECT tablename FROM pg_tables WHERE " + where)
+        cur.execute(pysql.SQL("SELECT tablename FROM pg_tables WHERE ")
+                    + pysql.SQL(' or ').join(parts))
          tables = [r[0] for r in cur]
  
          for table in tables:
-            cur.execute('DROP TABLE IF EXISTS "{}" CASCADE'.format(table))
+            cur.drop_table(table, cascade=True)
  
      conn.commit()
  
diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py

index de1e51013ffa00e4528a12318327343e38ccd11b..d7faca31f1f431b61383825ad105bee4163a6a07 100644 (file)
--- a/nominatim/tools/migration.py
+++ b/nominatim/tools/migration.py
@@ -142,7 +142,8 @@ def change_housenumber_transliteration(conn, **_):
                         BEGIN
                           SELECT array_to_string(array_agg(trans), ';')
                             INTO normtext
-                           FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
+                           FROM (SELECT lookup_word as trans,
+                                        getorcreate_housenumber_id(lookup_word)
                                   FROM (SELECT make_standard_name(h) as lookup_word
                                         FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
                           return normtext;
diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py

index cfd242e24d8d358cf48f77f2930e8a1a7171ef03..d00fc97a8dbb39180b2ca5dae92784a7291bd40e 100644 (file)
--- a/nominatim/tools/postcodes.py
+++ b/nominatim/tools/postcodes.py
@@ -7,7 +7,7 @@ import gzip
  import logging
  from math import isfinite
  
-from psycopg2.extras import execute_values
+from psycopg2 import sql as pysql
  
  from nominatim.db.connection import connect
  
@@ -52,27 +52,26 @@ class _CountryPostcodesCollector:
  
          with conn.cursor() as cur:
              if to_add:
-                execute_values(cur,
-                               """INSERT INTO location_postcode
-                                      (place_id, indexed_status, country_code,
-                                       postcode, geometry) VALUES %s""",
-                               to_add,
-                               template="""(nextval('seq_place'), 1, '{}',
-                                           %s, 'SRID=4326;POINT(%s %s)')
-                                        """.format(self.country))
+                cur.execute_values(
+                    """INSERT INTO location_postcode
+                         (place_id, indexed_status, country_code,
+                          postcode, geometry) VALUES %s""",
+                    to_add,
+                    template=pysql.SQL("""(nextval('seq_place'), 1, {},
+                                          %s, 'SRID=4326;POINT(%s %s)')
+                                       """).format(pysql.Literal(self.country)))
              if to_delete:
                  cur.execute("""DELETE FROM location_postcode
                                 WHERE country_code = %s and postcode = any(%s)
                              """, (self.country, to_delete))
              if to_update:
-                execute_values(cur,
-                               """UPDATE location_postcode
-                                  SET indexed_status = 2,
-                                      geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
-                                  FROM (VALUES %s) AS v (pc, x, y)
-                                  WHERE country_code = '{}' and postcode = pc
-                               """.format(self.country),
-                               to_update)
+                cur.execute_values(
+                    pysql.SQL("""UPDATE location_postcode
+                                 SET indexed_status = 2,
+                                     geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
+                                 FROM (VALUES %s) AS v (pc, x, y)
+                                 WHERE country_code = {} and postcode = pc
+                              """).format(pysql.Literal(self.country)), to_update)
  
  
      def _compute_changes(self, conn):
@@ -165,11 +164,14 @@ def update_postcodes(dsn, project_dir, tokenizer):
              with conn.cursor(name="placex_postcodes") as cur:
                  cur.execute("""
                  SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid)
-                FROM (SELECT 
-                        COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc,
+                FROM (SELECT
+                        COALESCE(plx.country_code,
+                                 get_country_code(ST_Centroid(pl.geometry))) as cc,
                          token_normalized_postcode(pl.address->'postcode') as pc,
-                        ST_Centroid(ST_Collect(COALESCE(plx.centroid, ST_Centroid(pl.geometry)))) as centroid 
-                        FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
+                        ST_Centroid(ST_Collect(COALESCE(plx.centroid,
+                                                        ST_Centroid(pl.geometry)))) as centroid
+                      FROM place AS pl LEFT OUTER JOIN placex AS plx
+                             ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
                      WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null
                      GROUP BY cc, pc) xx
                  WHERE pc IS NOT null AND cc IS NOT null
diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py

index 97e2e0374b644c4d7721ed30d38f6e573fab54b5..5aaee0c8d1d8417a5a88c4b7a317a2d2f37c4467 100644 (file)
--- a/nominatim/tools/refresh.py
+++ b/nominatim/tools/refresh.py
@@ -5,7 +5,7 @@ import json
  import logging
  from textwrap import dedent
  
-from psycopg2.extras import execute_values
+from psycopg2 import sql as pysql
  
  from nominatim.db.utils import execute_file
  from nominatim.db.sql_preprocessor import SQLPreprocessor
@@ -49,7 +49,7 @@ def load_address_levels(conn, table, levels):
          _add_address_level_rows_from_entry(rows, entry)
  
      with conn.cursor() as cur:
-        cur.execute('DROP TABLE IF EXISTS {}'.format(table))
+        cur.drop_table(table)
  
          cur.execute("""CREATE TABLE {} (country_code varchar(2),
                                          class TEXT,
@@ -57,7 +57,8 @@ def load_address_levels(conn, table, levels):
                                          rank_search SMALLINT,
                                          rank_address SMALLINT)""".format(table))
  
-        execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows)
+        cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
+                           .format(pysql.Identifier(table)), rows)
  
          cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
  
diff --git a/nominatim/tools/special_phrases/sp_importer.py b/nominatim/tools/special_phrases/sp_importer.py

index 681990fa6175be2935f6b59e763fe4dab6239792..791f4dc323eb61f4659b34c43af624ed37c4c1e9 100644 (file)
--- a/nominatim/tools/special_phrases/sp_importer.py
+++ b/nominatim/tools/special_phrases/sp_importer.py
@@ -44,8 +44,8 @@ class SPImporter():
          # This set will contain all existing phrases to be added.
          # It contains tuples with the following format: (lable, class, type, operator)
          self.word_phrases = set()
-        #This set will contain all existing place_classtype tables which doesn't match any
-        #special phrases class/type on the wiki.
+        # This set will contain all existing place_classtype tables which doesn't match any
+        # special phrases class/type on the wiki.
          self.table_phrases_to_delete = set()
  
      def import_phrases(self, tokenizer, should_replace):
@@ -60,7 +60,7 @@ class SPImporter():
          LOG.warning('Special phrases importation starting')
          self._fetch_existing_place_classtype_tables()
  
-        #Store pairs of class/type for further processing
+        # Store pairs of class/type for further processing
          class_type_pairs = set()
  
          for loaded_phrases in self.sp_loader:
@@ -131,17 +131,17 @@ class SPImporter():
              Return the class/type pair corresponding to the phrase.
          """
  
-        #blacklisting: disallow certain class/type combinations
+        # blacklisting: disallow certain class/type combinations
          if phrase.p_class in self.black_list.keys() \
             and phrase.p_type in self.black_list[phrase.p_class]:
              return None
  
-        #whitelisting: if class is in whitelist, allow only tags in the list
+        # whitelisting: if class is in whitelist, allow only tags in the list
          if phrase.p_class in self.white_list.keys() \
             and phrase.p_type not in self.white_list[phrase.p_class]:
              return None
  
-        #sanity check, in case somebody added garbage in the wiki
+        # sanity check, in case somebody added garbage in the wiki
          if not self._check_sanity(phrase):
              self.statistics_handler.notify_one_phrase_invalid()
              return None
@@ -161,7 +161,7 @@ class SPImporter():
  
          sql_tablespace = self.config.TABLESPACE_AUX_DATA
          if sql_tablespace:
-            sql_tablespace = ' TABLESPACE '+sql_tablespace
+            sql_tablespace = ' TABLESPACE ' + sql_tablespace
  
          with self.db_connection.cursor() as db_cursor:
              db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
@@ -174,19 +174,19 @@ class SPImporter():
  
              if table_name in self.table_phrases_to_delete:
                  self.statistics_handler.notify_one_table_ignored()
-                #Remove this table from the ones to delete as it match a class/type
-                #still existing on the special phrases of the wiki.
+                # Remove this table from the ones to delete as it match a
+                # class/type still existing on the special phrases of the wiki.
                  self.table_phrases_to_delete.remove(table_name)
-                #So dont need to create the table and indexes.
+                # So don't need to create the table and indexes.
                  continue
  
-            #Table creation
+            # Table creation
              self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type)
  
-            #Indexes creation
+            # Indexes creation
              self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type)
  
-            #Grant access on read to the web user.
+            # Grant access on read to the web user.
              self._grant_access_to_webuser(phrase_class, phrase_type)
  
              self.statistics_handler.notify_one_table_created()
@@ -202,8 +202,8 @@ class SPImporter():
          table_name = _classtype_table(phrase_class, phrase_type)
          with self.db_connection.cursor() as db_cursor:
              db_cursor.execute(SQL("""
-                    CREATE TABLE IF NOT EXISTS {{}} {} 
-                    AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex 
+                    CREATE TABLE IF NOT EXISTS {{}} {}
+                    AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
                      WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
                                .format(Identifier(table_name), Literal(phrase_class),
                                        Literal(phrase_type)))
@@ -215,7 +215,7 @@ class SPImporter():
          """
          index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
          base_table = _classtype_table(phrase_class, phrase_type)
-        #Index on centroid
+        # Index on centroid
          if not self.db_connection.index_exists(index_prefix + 'centroid'):
              with self.db_connection.cursor() as db_cursor:
                  db_cursor.execute(SQL("""
@@ -223,7 +223,7 @@ class SPImporter():
                                    .format(Identifier(index_prefix + 'centroid'),
                                            Identifier(base_table)), sql_tablespace)
  
-        #Index on place_id
+        # Index on place_id
          if not self.db_connection.index_exists(index_prefix + 'place_id'):
              with self.db_connection.cursor() as db_cursor:
                  db_cursor.execute(SQL(
@@ -248,18 +248,14 @@ class SPImporter():
              Delete the place_classtype tables.
          """
          LOG.warning('Cleaning database...')
-        #Array containing all queries to execute. Contain tuples of format (query, parameters)
-        queries_parameters = []
-
-        #Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
-        for table in self.table_phrases_to_delete:
-            self.statistics_handler.notify_one_table_deleted()
-            query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
-            queries_parameters.append((query, ()))
  
+        # Delete place_classtype tables corresponding to class/type which
+        # are not on the wiki anymore.
          with self.db_connection.cursor() as db_cursor:
-            for query, parameters in queries_parameters:
-                db_cursor.execute(query, parameters)
+            for table in self.table_phrases_to_delete:
+                self.statistics_handler.notify_one_table_deleted()
+                db_cursor.drop_table(table)
+
  
      def _convert_php_settings_if_needed(self, file_path):
          """
@@ -271,7 +267,7 @@ class SPImporter():
          file, extension = os.path.splitext(file_path)
          json_file_path = Path(file + '.json').resolve()
  
-        if extension not in('.php', '.json'):
+        if extension not in ('.php', '.json'):
              raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
  
          if extension == '.php' and not isfile(json_file_path):
diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py

index 914e15391123cf2571c99924a17d446c7bb8415c..1ad9de7e2cfa299dc18b68e873bf6f00c9592432 100644 (file)
--- a/nominatim/tools/special_phrases/sp_wiki_loader.py
+++ b/nominatim/tools/special_phrases/sp_wiki_loader.py
@@ -15,7 +15,7 @@ class SPWikiLoader(Iterator):
      def __init__(self, config, languages=None):
          super().__init__()
          self.config = config
-        #Compile the regex here to increase performances.
+        # Compile the regex here to increase performances.
          self.occurence_pattern = re.compile(
              r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
          )
@@ -35,7 +35,7 @@ class SPWikiLoader(Iterator):
              Parses XML content and extracts special phrases from it.
              Return a list of SpecialPhrase.
          """
-        #One match will be of format [label, class, type, operator, plural]
+        # One match will be of format [label, class, type, operator, plural]
          matches = self.occurence_pattern.findall(xml)
          returned_phrases = set()
          for match in matches:
@@ -65,5 +65,6 @@ class SPWikiLoader(Iterator):
              Requested URL Example :
                  https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
          """
-        url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
+        url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
+              + lang.upper()
          return get_url(url)
diff --git a/nominatim/tools/special_phrases/special_phrase.py b/nominatim/tools/special_phrases/special_phrase.py

index 448fbee47b283717e80ba01d28897ff8a08635e4..da7968cac9c0845917f4593da1c41c3b1b9ac18d 100644 (file)
--- a/nominatim/tools/special_phrases/special_phrase.py
+++ b/nominatim/tools/special_phrases/special_phrase.py
@@ -13,7 +13,7 @@ class SpecialPhrase():
      def __init__(self, p_label, p_class, p_type, p_operator):
          self.p_label = p_label.strip()
          self.p_class = p_class.strip()
-        #Hack around a bug where building=yes was imported with quotes into the wiki
+        # Hack around a bug where building=yes was imported with quotes into the wiki
          self.p_type = re.sub(r'\"|&quot;', '', p_type.strip())
-        #Needed if some operator in the wiki are not written in english
+        # Needed if some operator in the wiki are not written in english
          self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
author	Sarah Hoffmann <lonvia@denofr.de>
	Tue, 13 Jul 2021 14:46:12 +0000 (16:46 +0200)
committer	GitHub <noreply@github.com>
	Tue, 13 Jul 2021 14:46:12 +0000 (16:46 +0200)
nominatim/cli.py		patch \| blob \| history
nominatim/clicmd/api.py		patch \| blob \| history
nominatim/clicmd/args.py		patch \| blob \| history
nominatim/clicmd/refresh.py		patch \| blob \| history
nominatim/clicmd/replication.py		patch \| blob \| history
nominatim/db/async_connection.py		patch \| blob \| history
nominatim/db/connection.py		patch \| blob \| history
nominatim/db/sql_preprocessor.py		patch \| blob \| history
nominatim/db/utils.py		patch \| blob \| history
nominatim/indexer/indexer.py		patch \| blob \| history
nominatim/indexer/progress.py		patch \| blob \| history
nominatim/indexer/runners.py		patch \| blob \| history
nominatim/tokenizer/legacy_icu_tokenizer.py		patch \| blob \| history
nominatim/tokenizer/legacy_tokenizer.py		patch \| blob \| history
nominatim/tools/check_database.py		patch \| blob \| history
nominatim/tools/database_import.py		patch \| blob \| history
nominatim/tools/exec_utils.py		patch \| blob \| history
nominatim/tools/freeze.py		patch \| blob \| history
nominatim/tools/migration.py		patch \| blob \| history
nominatim/tools/postcodes.py		patch \| blob \| history
nominatim/tools/refresh.py		patch \| blob \| history
nominatim/tools/special_phrases/sp_importer.py		patch \| blob \| history
nominatim/tools/special_phrases/sp_wiki_loader.py		patch \| blob \| history
nominatim/tools/special_phrases/special_phrase.py		patch \| blob \| history