From: Sarah Hoffmann Date: Tue, 13 Jul 2021 14:46:12 +0000 (+0200) Subject: Merge pull request #2393 from lonvia/fix-flake8-issues X-Git-Tag: v4.0.0~51 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/bc8b2d4ae0dbaef64448ddcb530de9626da9d82d?hp=b4fec57b6d53f8e8a45c46ff11f13cbcbea1006a Merge pull request #2393 from lonvia/fix-flake8-issues Fix flake8 issues --- diff --git a/nominatim/cli.py b/nominatim/cli.py index 533a920e..5626deb4 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -103,7 +103,7 @@ class CommandlineParser: return 1 -##### Subcommand classes +# Subcommand classes # # Each class needs to implement two functions: add_args() adds the CLI parameters # for the subfunction, run() executes the subcommand. diff --git a/nominatim/clicmd/api.py b/nominatim/clicmd/api.py index a5556952..b99d37b8 100644 --- a/nominatim/clicmd/api.py +++ b/nominatim/clicmd/api.py @@ -90,7 +90,7 @@ class APISearch: if args.query: params = dict(q=args.query) else: - params = {k : getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)} + params = {k: getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)} for param, _ in EXTRADATA_PARAMS: if getattr(args, param): diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py index ee194187..996f48f2 100644 --- a/nominatim/clicmd/args.py +++ b/nominatim/clicmd/args.py @@ -24,4 +24,4 @@ class NominatimArgs: main_data=self.config.TABLESPACE_PLACE_DATA, main_index=self.config.TABLESPACE_PLACE_INDEX ) - ) + ) diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index fbc23350..969998ad 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -61,7 +61,7 @@ class UpdateRefresh: args.threads or 1) indexer.index_postcodes() else: - LOG.error("The place table doesn\'t exist. " \ + LOG.error("The place table doesn't exist. " "Postcode updates on a frozen database is not possible.") if args.word_counts: diff --git a/nominatim/clicmd/replication.py b/nominatim/clicmd/replication.py index 242b0f6a..4c8cd44e 100644 --- a/nominatim/clicmd/replication.py +++ b/nominatim/clicmd/replication.py @@ -93,7 +93,7 @@ class UpdateReplication: indexed_only=not args.once) # Sanity check to not overwhelm the Geofabrik servers. - if 'download.geofabrik.de'in params['base_url']\ + if 'download.geofabrik.de' in params['base_url']\ and params['update_interval'] < 86400: LOG.fatal("Update interval too low for download.geofabrik.de.\n" "Please check install documentation " diff --git a/nominatim/db/async_connection.py b/nominatim/db/async_connection.py index f06f3159..a86c5bdc 100644 --- a/nominatim/db/async_connection.py +++ b/nominatim/db/async_connection.py @@ -85,7 +85,7 @@ class DBConnection: # Use a dict to hand in the parameters because async is a reserved # word in Python3. - self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True}) + self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True}) self.wait() self.cursor = self.conn.cursor(cursor_factory=cursor_factory) diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py index ac8d7c85..1319ac16 100644 --- a/nominatim/db/connection.py +++ b/nominatim/db/connection.py @@ -8,6 +8,7 @@ import os import psycopg2 import psycopg2.extensions import psycopg2.extras +from psycopg2 import sql as pysql from nominatim.errors import UsageError @@ -25,6 +26,16 @@ class _Cursor(psycopg2.extras.DictCursor): super().execute(query, args) + + def execute_values(self, sql, argslist, template=None): + """ Wrapper for the psycopg2 convenience function to execute + SQL for a list of values. + """ + LOG.debug("SQL execute_values(%s, %s)", sql, argslist) + + psycopg2.extras.execute_values(self, sql, argslist, template=template) + + def scalar(self, sql, args=None): """ Execute query that returns a single value. The value is returned. If the query yields more than one row, a ValueError is raised. @@ -37,6 +48,22 @@ class _Cursor(psycopg2.extras.DictCursor): return self.fetchone()[0] + def drop_table(self, name, if_exists=True, cascade=False): + """ Drop the table with the given name. + Set `if_exists` to False if a non-existant table should raise + an exception instead of just being ignored. If 'cascade' is set + to True then all dependent tables are deleted as well. + """ + sql = 'DROP TABLE ' + if if_exists: + sql += 'IF EXISTS ' + sql += '{}' + if cascade: + sql += ' CASCADE' + + self.execute(pysql.SQL(sql).format(pysql.Identifier(name))) + + class _Connection(psycopg2.extensions.connection): """ A connection that provides the specialised cursor by default and adds convenience functions for administrating the database. @@ -75,14 +102,13 @@ class _Connection(psycopg2.extensions.connection): return True - def drop_table(self, name, if_exists=True): + def drop_table(self, name, if_exists=True, cascade=False): """ Drop the table with the given name. Set `if_exists` to False if a non-existant table should raise an exception instead of just being ignored. """ with self.cursor() as cur: - cur.execute("""DROP TABLE {} "{}" - """.format('IF EXISTS' if if_exists else '', name)) + cur.drop_table(name, if_exists, cascade) self.commit() diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py index dafc5de4..d756a215 100644 --- a/nominatim/db/sql_preprocessor.py +++ b/nominatim/db/sql_preprocessor.py @@ -61,7 +61,7 @@ def _setup_postgresql_features(conn): """ pg_version = conn.server_version_tuple() return { - 'has_index_non_key_column' : pg_version >= (11, 0, 0) + 'has_index_non_key_column': pg_version >= (11, 0, 0) } class SQLPreprocessor: diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py index 4d4305e7..9a4a41a5 100644 --- a/nominatim/db/utils.py +++ b/nominatim/db/utils.py @@ -61,9 +61,9 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None) # List of characters that need to be quoted for the copy command. -_SQL_TRANSLATION = {ord(u'\\') : u'\\\\', - ord(u'\t') : u'\\t', - ord(u'\n') : u'\\n'} +_SQL_TRANSLATION = {ord(u'\\'): u'\\\\', + ord(u'\t'): u'\\t', + ord(u'\n'): u'\\n'} class CopyBuffer: """ Data collector for the copy_from command. diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index 76883500..d0cfb391 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -203,7 +203,7 @@ class Indexer: # And insert the curent batch for idx in range(0, len(places), batch): - part = places[idx:idx+batch] + part = places[idx:idx + batch] LOG.debug("Processing places: %s", str(part)) runner.index_places(pool.next_free_worker(), part) progress.add(len(part)) diff --git a/nominatim/indexer/progress.py b/nominatim/indexer/progress.py index 177e67b8..634b1fae 100644 --- a/nominatim/indexer/progress.py +++ b/nominatim/indexer/progress.py @@ -63,7 +63,7 @@ class ProgressLogger: places_per_sec = self.done_places else: diff_seconds = (rank_end_time - self.rank_start_time).total_seconds() - places_per_sec = self.done_places/diff_seconds + places_per_sec = self.done_places / diff_seconds LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n", self.done_places, self.total_places, int(diff_seconds), diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py index aa607faa..068d7d0f 100644 --- a/nominatim/indexer/runners.py +++ b/nominatim/indexer/runners.py @@ -5,13 +5,17 @@ tasks. import functools import psycopg2.extras +from psycopg2 import sql as pysql # pylint: disable=C0111 +def _mk_valuelist(template, num): + return pysql.SQL(',').join([pysql.SQL(template)] * num) + class AbstractPlacexRunner: """ Returns SQL commands for indexing of the placex table. """ - SELECT_SQL = 'SELECT place_id FROM placex' + SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ') def __init__(self, rank, analyzer): self.rank = rank @@ -21,11 +25,12 @@ class AbstractPlacexRunner: @staticmethod @functools.lru_cache(maxsize=1) def _index_sql(num_places): - return """ UPDATE placex - SET indexed_status = 0, address = v.addr, token_info = v.ti - FROM (VALUES {}) as v(id, addr, ti) - WHERE place_id = v.id - """.format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places)) + return pysql.SQL( + """ UPDATE placex + SET indexed_status = 0, address = v.addr, token_info = v.ti + FROM (VALUES {}) as v(id, addr, ti) + WHERE place_id = v.id + """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places)) @staticmethod @@ -52,14 +57,15 @@ class RankRunner(AbstractPlacexRunner): return "rank {}".format(self.rank) def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE rank_address = {} and indexed_status > 0 - """.format(self.rank) + return pysql.SQL("""SELECT count(*) FROM placex + WHERE rank_address = {} and indexed_status > 0 + """).format(pysql.Literal(self.rank)) def sql_get_objects(self): - return """{} WHERE indexed_status > 0 and rank_address = {} - ORDER BY geometry_sector - """.format(self.SELECT_SQL, self.rank) + return self.SELECT_SQL + pysql.SQL( + """WHERE indexed_status > 0 and rank_address = {} + ORDER BY geometry_sector + """).format(pysql.Literal(self.rank)) class BoundaryRunner(AbstractPlacexRunner): @@ -71,17 +77,18 @@ class BoundaryRunner(AbstractPlacexRunner): return "boundaries rank {}".format(self.rank) def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE indexed_status > 0 - AND rank_search = {} - AND class = 'boundary' and type = 'administrative' - """.format(self.rank) + return pysql.SQL("""SELECT count(*) FROM placex + WHERE indexed_status > 0 + AND rank_search = {} + AND class = 'boundary' and type = 'administrative' + """).format(pysql.Literal(self.rank)) def sql_get_objects(self): - return """{} WHERE indexed_status > 0 and rank_search = {} - and class = 'boundary' and type = 'administrative' - ORDER BY partition, admin_level - """.format(self.SELECT_SQL, self.rank) + return self.SELECT_SQL + pysql.SQL( + """WHERE indexed_status > 0 and rank_search = {} + and class = 'boundary' and type = 'administrative' + ORDER BY partition, admin_level + """).format(pysql.Literal(self.rank)) class InterpolationRunner: @@ -120,11 +127,11 @@ class InterpolationRunner: @staticmethod @functools.lru_cache(maxsize=1) def _index_sql(num_places): - return """ UPDATE location_property_osmline - SET indexed_status = 0, address = v.addr, token_info = v.ti - FROM (VALUES {}) as v(id, addr, ti) - WHERE place_id = v.id - """.format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places)) + return pysql.SQL("""UPDATE location_property_osmline + SET indexed_status = 0, address = v.addr, token_info = v.ti + FROM (VALUES {}) as v(id, addr, ti) + WHERE place_id = v.id + """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places)) def index_places(self, worker, places): @@ -157,6 +164,6 @@ class PostcodeRunner: @staticmethod def index_places(worker, ids): - worker.perform(""" UPDATE location_postcode SET indexed_status = 0 - WHERE place_id IN ({}) - """.format(','.join((str(i[0]) for i in ids)))) + worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0 + WHERE place_id IN ({})""") + .format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in ids)))) diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py index 12ee0404..6d3d11c1 100644 --- a/nominatim/tokenizer/legacy_icu_tokenizer.py +++ b/nominatim/tokenizer/legacy_icu_tokenizer.py @@ -9,8 +9,6 @@ import re from textwrap import dedent from pathlib import Path -import psycopg2.extras - from nominatim.db.connection import connect from nominatim.db.properties import set_property, get_property from nominatim.db.utils import CopyBuffer @@ -341,7 +339,7 @@ class LegacyICUNameAnalyzer: term = self.name_processor.get_search_normalized(word) if term: copystr.add(word, ' ' + term, cls, typ, - oper if oper in ('in', 'near') else None, 0) + oper if oper in ('in', 'near') else None, 0) added += 1 copystr.copy_out(cursor, 'word', @@ -359,8 +357,7 @@ class LegacyICUNameAnalyzer: to_delete = existing_phrases - new_phrases if to_delete: - psycopg2.extras.execute_values( - cursor, + cursor.execute_values( """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op) WHERE word = name and class = in_class and type = in_type and ((op = '-' and operator is null) or op = operator)""", @@ -386,9 +383,9 @@ class LegacyICUNameAnalyzer: if word_tokens: cur.execute("""INSERT INTO word (word_id, word_token, country_code, search_name_count) - (SELECT nextval('seq_word'), token, '{}', 0 + (SELECT nextval('seq_word'), token, %s, 0 FROM unnest(%s) as token) - """.format(country_code), (list(word_tokens),)) + """, (country_code, list(word_tokens))) def process_place(self, place): diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index 24af1c3a..c19dce2f 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -370,8 +370,7 @@ class LegacyNameAnalyzer: to_delete = existing_phrases - norm_phrases if to_add: - psycopg2.extras.execute_values( - cur, + cur.execute_values( """ INSERT INTO word (word_id, word_token, word, class, type, search_name_count, operator) (SELECT nextval('seq_word'), ' ' || make_standard_name(name), name, @@ -381,8 +380,7 @@ class LegacyNameAnalyzer: to_add) if to_delete and should_replace: - psycopg2.extras.execute_values( - cur, + cur.execute_values( """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op) WHERE word = name and class = in_class and type = in_type and ((op = '-' and operator is null) or op = operator)""", @@ -582,7 +580,7 @@ class _TokenCache: with conn.cursor() as cur: cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text FROM generate_series(1, 100) as i""") - self._cached_housenumbers = {str(r[0]) : r[1] for r in cur} + self._cached_housenumbers = {str(r[0]): r[1] for r in cur} # For postcodes remember the ones that have already been added self.postcodes = set() diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py index d4f793b4..d116554f 100644 --- a/nominatim/tools/check_database.py +++ b/nominatim/tools/check_database.py @@ -24,6 +24,7 @@ def _check(hint=None): """ def decorator(func): title = func.__doc__.split('\n', 1)[0].strip() + def run_check(conn, config): print(title, end=' ... ') ret = func(conn, config) @@ -98,13 +99,12 @@ def _get_indexes(conn): if conn.table_exists('place'): indexes.extend(('idx_placex_pendingsector', 'idx_location_area_country_place_id', - 'idx_place_osm_unique' - )) + 'idx_place_osm_unique')) return indexes -### CHECK FUNCTIONS +# CHECK FUNCTIONS # # Functions are exectured in the order they appear here. diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index df82f9aa..a4d7220f 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -9,6 +9,7 @@ from pathlib import Path import psutil import psycopg2.extras +from psycopg2 import sql as pysql from nominatim.db.connection import connect, get_pg_env from nominatim.db import utils as db_utils @@ -184,7 +185,12 @@ def truncate_data_tables(conn): conn.commit() -_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry' + +_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier, + ('osm_type', 'osm_id', 'class', 'type', + 'name', 'admin_level', 'address', + 'extratags', 'geometry'))) + def load_data(dsn, threads): """ Copy data into the word and placex table. @@ -195,12 +201,15 @@ def load_data(dsn, threads): for imod in range(place_threads): conn = DBConnection(dsn) conn.connect() - conn.perform("""INSERT INTO placex ({0}) - SELECT {0} FROM place - WHERE osm_id % {1} = {2} - AND NOT (class='place' and (type='houses' or type='postcode')) - AND ST_IsValid(geometry) - """.format(_COPY_COLUMNS, place_threads, imod)) + conn.perform( + pysql.SQL("""INSERT INTO placex ({columns}) + SELECT {columns} FROM place + WHERE osm_id % {total} = {mod} + AND NOT (class='place' and (type='houses' or type='postcode')) + AND ST_IsValid(geometry) + """).format(columns=_COPY_COLUMNS, + total=pysql.Literal(place_threads), + mod=pysql.Literal(imod))) sel.register(conn, selectors.EVENT_READ, conn) # Address interpolations go into another table. @@ -250,6 +259,7 @@ def create_search_indices(conn, config, drop=False): sql.run_sql_file(conn, 'indices.sql', drop=drop) + def create_country_names(conn, tokenizer, languages=None): """ Add default country names to search index. `languages` is a comma- separated list of language codes as used in OSM. If `languages` is not @@ -261,8 +271,7 @@ def create_country_names(conn, tokenizer, languages=None): def _include_key(key): return key == 'name' or \ - (key.startswith('name:') \ - and (not languages or key[5:] in languages)) + (key.startswith('name:') and (not languages or key[5:] in languages)) with conn.cursor() as cur: psycopg2.extras.register_hstore(cur) @@ -271,7 +280,7 @@ def create_country_names(conn, tokenizer, languages=None): with tokenizer.name_analyzer() as analyzer: for code, name in cur: - names = {'countrycode' : code} + names = {'countrycode': code} if code == 'gb': names['short_name'] = 'UK' if code == 'us': diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index 560bb781..72d252b7 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -136,11 +136,11 @@ def run_osm2pgsql(options): def get_url(url): """ Get the contents from the given URL and return it as a UTF-8 string. """ - headers = {"User-Agent" : "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)} + headers = {"User-Agent": "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)} try: with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response: return response.read().decode('utf-8') - except: + except Exception: LOG.fatal('Failed to load URL: %s', url) raise diff --git a/nominatim/tools/freeze.py b/nominatim/tools/freeze.py index cc1bf97e..a182fc8b 100644 --- a/nominatim/tools/freeze.py +++ b/nominatim/tools/freeze.py @@ -3,6 +3,8 @@ Functions for removing unnecessary data from the database. """ from pathlib import Path +from psycopg2 import sql as pysql + UPDATE_TABLES = [ 'address_levels', 'gb_postcode', @@ -21,15 +23,15 @@ def drop_update_tables(conn): """ Drop all tables only necessary for updating the database from OSM replication data. """ - - where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES]) + parts = (pysql.SQL("(tablename LIKE {})").format(pysql.Literal(t)) for t in UPDATE_TABLES) with conn.cursor() as cur: - cur.execute("SELECT tablename FROM pg_tables WHERE " + where) + cur.execute(pysql.SQL("SELECT tablename FROM pg_tables WHERE ") + + pysql.SQL(' or ').join(parts)) tables = [r[0] for r in cur] for table in tables: - cur.execute('DROP TABLE IF EXISTS "{}" CASCADE'.format(table)) + cur.drop_table(table, cascade=True) conn.commit() diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index de1e5101..d7faca31 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -142,7 +142,8 @@ def change_housenumber_transliteration(conn, **_): BEGIN SELECT array_to_string(array_agg(trans), ';') INTO normtext - FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) + FROM (SELECT lookup_word as trans, + getorcreate_housenumber_id(lookup_word) FROM (SELECT make_standard_name(h) as lookup_word FROM regexp_split_to_table(housenumber, '[,;]') h) x) y; return normtext; diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py index cfd242e2..d00fc97a 100644 --- a/nominatim/tools/postcodes.py +++ b/nominatim/tools/postcodes.py @@ -7,7 +7,7 @@ import gzip import logging from math import isfinite -from psycopg2.extras import execute_values +from psycopg2 import sql as pysql from nominatim.db.connection import connect @@ -52,27 +52,26 @@ class _CountryPostcodesCollector: with conn.cursor() as cur: if to_add: - execute_values(cur, - """INSERT INTO location_postcode - (place_id, indexed_status, country_code, - postcode, geometry) VALUES %s""", - to_add, - template="""(nextval('seq_place'), 1, '{}', - %s, 'SRID=4326;POINT(%s %s)') - """.format(self.country)) + cur.execute_values( + """INSERT INTO location_postcode + (place_id, indexed_status, country_code, + postcode, geometry) VALUES %s""", + to_add, + template=pysql.SQL("""(nextval('seq_place'), 1, {}, + %s, 'SRID=4326;POINT(%s %s)') + """).format(pysql.Literal(self.country))) if to_delete: cur.execute("""DELETE FROM location_postcode WHERE country_code = %s and postcode = any(%s) """, (self.country, to_delete)) if to_update: - execute_values(cur, - """UPDATE location_postcode - SET indexed_status = 2, - geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326) - FROM (VALUES %s) AS v (pc, x, y) - WHERE country_code = '{}' and postcode = pc - """.format(self.country), - to_update) + cur.execute_values( + pysql.SQL("""UPDATE location_postcode + SET indexed_status = 2, + geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326) + FROM (VALUES %s) AS v (pc, x, y) + WHERE country_code = {} and postcode = pc + """).format(pysql.Literal(self.country)), to_update) def _compute_changes(self, conn): @@ -165,11 +164,14 @@ def update_postcodes(dsn, project_dir, tokenizer): with conn.cursor(name="placex_postcodes") as cur: cur.execute(""" SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid) - FROM (SELECT - COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc, + FROM (SELECT + COALESCE(plx.country_code, + get_country_code(ST_Centroid(pl.geometry))) as cc, token_normalized_postcode(pl.address->'postcode') as pc, - ST_Centroid(ST_Collect(COALESCE(plx.centroid, ST_Centroid(pl.geometry)))) as centroid - FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type + ST_Centroid(ST_Collect(COALESCE(plx.centroid, + ST_Centroid(pl.geometry)))) as centroid + FROM place AS pl LEFT OUTER JOIN placex AS plx + ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null GROUP BY cc, pc) xx WHERE pc IS NOT null AND cc IS NOT null diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index 97e2e037..5aaee0c8 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -5,7 +5,7 @@ import json import logging from textwrap import dedent -from psycopg2.extras import execute_values +from psycopg2 import sql as pysql from nominatim.db.utils import execute_file from nominatim.db.sql_preprocessor import SQLPreprocessor @@ -49,7 +49,7 @@ def load_address_levels(conn, table, levels): _add_address_level_rows_from_entry(rows, entry) with conn.cursor() as cur: - cur.execute('DROP TABLE IF EXISTS {}'.format(table)) + cur.drop_table(table) cur.execute("""CREATE TABLE {} (country_code varchar(2), class TEXT, @@ -57,7 +57,8 @@ def load_address_levels(conn, table, levels): rank_search SMALLINT, rank_address SMALLINT)""".format(table)) - execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows) + cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s") + .format(pysql.Identifier(table)), rows) cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table)) diff --git a/nominatim/tools/special_phrases/sp_importer.py b/nominatim/tools/special_phrases/sp_importer.py index 681990fa..791f4dc3 100644 --- a/nominatim/tools/special_phrases/sp_importer.py +++ b/nominatim/tools/special_phrases/sp_importer.py @@ -44,8 +44,8 @@ class SPImporter(): # This set will contain all existing phrases to be added. # It contains tuples with the following format: (lable, class, type, operator) self.word_phrases = set() - #This set will contain all existing place_classtype tables which doesn't match any - #special phrases class/type on the wiki. + # This set will contain all existing place_classtype tables which doesn't match any + # special phrases class/type on the wiki. self.table_phrases_to_delete = set() def import_phrases(self, tokenizer, should_replace): @@ -60,7 +60,7 @@ class SPImporter(): LOG.warning('Special phrases importation starting') self._fetch_existing_place_classtype_tables() - #Store pairs of class/type for further processing + # Store pairs of class/type for further processing class_type_pairs = set() for loaded_phrases in self.sp_loader: @@ -131,17 +131,17 @@ class SPImporter(): Return the class/type pair corresponding to the phrase. """ - #blacklisting: disallow certain class/type combinations + # blacklisting: disallow certain class/type combinations if phrase.p_class in self.black_list.keys() \ and phrase.p_type in self.black_list[phrase.p_class]: return None - #whitelisting: if class is in whitelist, allow only tags in the list + # whitelisting: if class is in whitelist, allow only tags in the list if phrase.p_class in self.white_list.keys() \ and phrase.p_type not in self.white_list[phrase.p_class]: return None - #sanity check, in case somebody added garbage in the wiki + # sanity check, in case somebody added garbage in the wiki if not self._check_sanity(phrase): self.statistics_handler.notify_one_phrase_invalid() return None @@ -161,7 +161,7 @@ class SPImporter(): sql_tablespace = self.config.TABLESPACE_AUX_DATA if sql_tablespace: - sql_tablespace = ' TABLESPACE '+sql_tablespace + sql_tablespace = ' TABLESPACE ' + sql_tablespace with self.db_connection.cursor() as db_cursor: db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)") @@ -174,19 +174,19 @@ class SPImporter(): if table_name in self.table_phrases_to_delete: self.statistics_handler.notify_one_table_ignored() - #Remove this table from the ones to delete as it match a class/type - #still existing on the special phrases of the wiki. + # Remove this table from the ones to delete as it match a + # class/type still existing on the special phrases of the wiki. self.table_phrases_to_delete.remove(table_name) - #So dont need to create the table and indexes. + # So don't need to create the table and indexes. continue - #Table creation + # Table creation self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type) - #Indexes creation + # Indexes creation self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type) - #Grant access on read to the web user. + # Grant access on read to the web user. self._grant_access_to_webuser(phrase_class, phrase_type) self.statistics_handler.notify_one_table_created() @@ -202,8 +202,8 @@ class SPImporter(): table_name = _classtype_table(phrase_class, phrase_type) with self.db_connection.cursor() as db_cursor: db_cursor.execute(SQL(""" - CREATE TABLE IF NOT EXISTS {{}} {} - AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex + CREATE TABLE IF NOT EXISTS {{}} {} + AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = {{}} AND type = {{}}""".format(sql_tablespace)) .format(Identifier(table_name), Literal(phrase_class), Literal(phrase_type))) @@ -215,7 +215,7 @@ class SPImporter(): """ index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type) base_table = _classtype_table(phrase_class, phrase_type) - #Index on centroid + # Index on centroid if not self.db_connection.index_exists(index_prefix + 'centroid'): with self.db_connection.cursor() as db_cursor: db_cursor.execute(SQL(""" @@ -223,7 +223,7 @@ class SPImporter(): .format(Identifier(index_prefix + 'centroid'), Identifier(base_table)), sql_tablespace) - #Index on place_id + # Index on place_id if not self.db_connection.index_exists(index_prefix + 'place_id'): with self.db_connection.cursor() as db_cursor: db_cursor.execute(SQL( @@ -248,18 +248,14 @@ class SPImporter(): Delete the place_classtype tables. """ LOG.warning('Cleaning database...') - #Array containing all queries to execute. Contain tuples of format (query, parameters) - queries_parameters = [] - - #Delete place_classtype tables corresponding to class/type which are not on the wiki anymore - for table in self.table_phrases_to_delete: - self.statistics_handler.notify_one_table_deleted() - query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table)) - queries_parameters.append((query, ())) + # Delete place_classtype tables corresponding to class/type which + # are not on the wiki anymore. with self.db_connection.cursor() as db_cursor: - for query, parameters in queries_parameters: - db_cursor.execute(query, parameters) + for table in self.table_phrases_to_delete: + self.statistics_handler.notify_one_table_deleted() + db_cursor.drop_table(table) + def _convert_php_settings_if_needed(self, file_path): """ @@ -271,7 +267,7 @@ class SPImporter(): file, extension = os.path.splitext(file_path) json_file_path = Path(file + '.json').resolve() - if extension not in('.php', '.json'): + if extension not in ('.php', '.json'): raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.') if extension == '.php' and not isfile(json_file_path): diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py index 914e1539..1ad9de7e 100644 --- a/nominatim/tools/special_phrases/sp_wiki_loader.py +++ b/nominatim/tools/special_phrases/sp_wiki_loader.py @@ -15,7 +15,7 @@ class SPWikiLoader(Iterator): def __init__(self, config, languages=None): super().__init__() self.config = config - #Compile the regex here to increase performances. + # Compile the regex here to increase performances. self.occurence_pattern = re.compile( r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' ) @@ -35,7 +35,7 @@ class SPWikiLoader(Iterator): Parses XML content and extracts special phrases from it. Return a list of SpecialPhrase. """ - #One match will be of format [label, class, type, operator, plural] + # One match will be of format [label, class, type, operator, plural] matches = self.occurence_pattern.findall(xml) returned_phrases = set() for match in matches: @@ -65,5 +65,6 @@ class SPWikiLoader(Iterator): Requested URL Example : https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN """ - url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long + url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \ + + lang.upper() return get_url(url) diff --git a/nominatim/tools/special_phrases/special_phrase.py b/nominatim/tools/special_phrases/special_phrase.py index 448fbee4..da7968ca 100644 --- a/nominatim/tools/special_phrases/special_phrase.py +++ b/nominatim/tools/special_phrases/special_phrase.py @@ -13,7 +13,7 @@ class SpecialPhrase(): def __init__(self, p_label, p_class, p_type, p_operator): self.p_label = p_label.strip() self.p_class = p_class.strip() - #Hack around a bug where building=yes was imported with quotes into the wiki + # Hack around a bug where building=yes was imported with quotes into the wiki self.p_type = re.sub(r'\"|"', '', p_type.strip()) - #Needed if some operator in the wiki are not written in english + # Needed if some operator in the wiki are not written in english self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator