From: Sarah Hoffmann Date: Mon, 30 Jan 2023 10:17:22 +0000 (+0100) Subject: Merge pull request #2963 from lonvia/add-sqlalchemy-schema X-Git-Tag: v4.3.0~108 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/5c55c1d8a15f525bf1c4a17aedbd6f960ad9d3b6?hp=c7e8a82d687758fea374edd43f11d1533d942b7b Merge pull request #2963 from lonvia/add-sqlalchemy-schema Add table definitions for SQLAlchemy --- diff --git a/.github/actions/build-nominatim/action.yml b/.github/actions/build-nominatim/action.yml index 1aa12daa..d5f04efe 100644 --- a/.github/actions/build-nominatim/action.yml +++ b/.github/actions/build-nominatim/action.yml @@ -27,10 +27,10 @@ runs: run: | sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} if [ "$FLAVOUR" == "oldstuff" ]; then - pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4 datrie asyncpg + pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4 GeoAlchemy2==0.10.0 datrie asyncpg else sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml python3-asyncpg - pip3 install sqlalchemy + pip3 install sqlalchemy GeoAlchemy2 fi shell: bash env: diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index e7ba35f6..1f6f1bb7 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -99,18 +99,22 @@ jobs: if: (matrix.flavour == 'oldstuff') || (matrix.flavour == 'ubuntu-22') - name: Install test prerequsites (from apt for Ununtu 2x) - run: sudo apt-get install -y -qq python3-pytest uvicorn + run: sudo apt-get install -y -qq python3-pytest python3-pytest-asyncio uvicorn if: matrix.flavour != 'oldstuff' + - name: Install newer pytest-asyncio + run: pip3 install -U pytest-asyncio + if: matrix.flavour == 'ubuntu-20' + - name: Install test prerequsites (from pip for Ubuntu 18) - run: pip3 install pytest uvicorn + run: pip3 install pytest pytest-asyncio uvicorn if: matrix.flavour == 'oldstuff' - name: Install Python webservers run: pip3 install falcon sanic sanic-testing sanic-cors starlette - - name: Install latest pylint/mypy - run: pip3 install -U pylint mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson typing-extensions asgi_lifespan sqlalchemy2-stubs + - name: Install latest pylint + run: pip3 install -U pylint asgi_lifespan - name: PHP linting run: phpcs --report-width=120 . @@ -123,7 +127,6 @@ jobs: - name: PHP unit tests run: phpunit ./ working-directory: Nominatim/test/php - if: ${{ (matrix.ubuntu == 20) || (matrix.ubuntu == 22) }} - name: Python unit tests run: python3 -m pytest test/python @@ -134,8 +137,8 @@ jobs: python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3 working-directory: Nominatim/test/bdd - - name: Install newer Python packages (for typechecking info) - run: pip3 install -U osmium uvicorn + - name: Install mypy and typechecking info + run: pip3 install -U mypy osmium uvicorn types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson typing-extensions if: matrix.flavour != 'oldstuff' - name: Python static typechecking diff --git a/.mypy.ini b/.mypy.ini index 611c3c5d..aa6782de 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -18,3 +18,6 @@ ignore_missing_imports = True [mypy-falcon.*] ignore_missing_imports = True + +[mypy-geoalchemy2.*] +ignore_missing_imports = True diff --git a/docs/admin/Installation.md b/docs/admin/Installation.md index 663d5c37..e55e4b37 100644 --- a/docs/admin/Installation.md +++ b/docs/admin/Installation.md @@ -49,6 +49,7 @@ For running Nominatim: * [psutil](https://github.com/giampaolo/psutil) * [Jinja2](https://palletsprojects.com/p/jinja/) * [SQLAlchemy](https://www.sqlalchemy.org/) (1.4+ with greenlet support) + * [GeoAlchemy2](https://geoalchemy-2.readthedocs.io/) (0.10+) * [asyncpg](https://magicstack.github.io/asyncpg) (0.8+) * [PyICU](https://pypi.org/project/PyICU/) * [PyYaml](https://pyyaml.org/) (5.1+) diff --git a/docs/develop/Development-Environment.md b/docs/develop/Development-Environment.md index fc5008c4..4c678aa6 100644 --- a/docs/develop/Development-Environment.md +++ b/docs/develop/Development-Environment.md @@ -36,6 +36,7 @@ It has the following additional requirements: * [mypy](http://mypy-lang.org/) (plus typing information for external libs) * [Python Typing Extensions](https://github.com/python/typing_extensions) (for Python < 3.9) * [pytest](https://pytest.org) +* [pytest-asyncio](https://pytest-asyncio.readthedocs.io) For testing the Python search frontend, you need to install extra dependencies depending on your choice of webserver framework: @@ -62,9 +63,9 @@ To install all necessary packages run: sudo apt install php-cgi phpunit php-codesniffer \ python3-pip python3-setuptools python3-dev -pip3 install --user behave mkdocs mkdocstrings pytest pylint \ +pip3 install --user behave mkdocs mkdocstrings pytest pytest-asyncio pylint \ mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil \ - types-ujosn \ + types-ujosn types-requests typing-extensions\ sanic-testing httpx asgi-lifespan ``` diff --git a/nominatim/api/connection.py b/nominatim/api/connection.py new file mode 100644 index 00000000..79a5e347 --- /dev/null +++ b/nominatim/api/connection.py @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Extended SQLAlchemy connection class that also includes access to the schema. +""" +from typing import Any, Mapping, Sequence, Union, Dict, cast + +import sqlalchemy as sa +from sqlalchemy.ext.asyncio import AsyncConnection + +from nominatim.db.sqlalchemy_schema import SearchTables + +class SearchConnection: + """ An extended SQLAlchemy connection class, that also contains + then table definitions. The underlying asynchronous SQLAlchemy + connection can be accessed with the 'connection' property. + The 't' property is the collection of Nominatim tables. + """ + + def __init__(self, conn: AsyncConnection, + tables: SearchTables, + properties: Dict[str, Any]) -> None: + self.connection = conn + self.t = tables # pylint: disable=invalid-name + self._property_cache = properties + + + async def scalar(self, sql: sa.sql.base.Executable, + params: Union[Mapping[str, Any], None] = None + ) -> Any: + """ Execute a 'scalar()' query on the connection. + """ + return await self.connection.scalar(sql, params) + + + async def execute(self, sql: sa.sql.base.Executable, + params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None] = None + ) -> 'sa.engine.Result[Any]': + """ Execute a 'execute()' query on the connection. + """ + return await self.connection.execute(sql, params) + + + async def get_property(self, name: str, cached: bool = True) -> str: + """ Get a property from Nominatim's property table. + + Property values are normally cached so that they are only + retrieved from the database when they are queried for the + first time with this function. Set 'cached' to False to force + reading the property from the database. + + Raises a ValueError if the property does not exist. + """ + if name.startswith('DB:'): + raise ValueError(f"Illegal property value '{name}'.") + + if cached and name in self._property_cache: + return cast(str, self._property_cache[name]) + + sql = sa.select(self.t.properties.c.value)\ + .where(self.t.properties.c.property == name) + value = await self.connection.scalar(sql) + + if value is None: + raise ValueError(f"Property '{name}' not found in database.") + + self._property_cache[name] = cast(str, value) + + return cast(str, value) + + + async def get_db_property(self, name: str) -> Any: + """ Get a setting from the database. At the moment, only + 'server_version', the version of the database software, can + be retrieved with this function. + + Raises a ValueError if the property does not exist. + """ + if name != 'server_version': + raise ValueError(f"DB setting '{name}' not found in database.") + + return self._property_cache['DB:server_version'] diff --git a/nominatim/api/core.py b/nominatim/api/core.py index 159229dd..54f02a93 100644 --- a/nominatim/api/core.py +++ b/nominatim/api/core.py @@ -7,7 +7,7 @@ """ Implementation of classes for API access via libraries. """ -from typing import Mapping, Optional, Any, AsyncIterator +from typing import Mapping, Optional, Any, AsyncIterator, Dict import asyncio import contextlib from pathlib import Path @@ -16,8 +16,10 @@ import sqlalchemy as sa import sqlalchemy.ext.asyncio as sa_asyncio import asyncpg +from nominatim.db.sqlalchemy_schema import SearchTables from nominatim.config import Configuration from nominatim.api.status import get_status, StatusResult +from nominatim.api.connection import SearchConnection class NominatimAPIAsync: """ API loader asynchornous version. @@ -29,6 +31,8 @@ class NominatimAPIAsync: self._engine_lock = asyncio.Lock() self._engine: Optional[sa_asyncio.AsyncEngine] = None + self._tables: Optional[SearchTables] = None + self._property_cache: Dict[str, Any] = {'DB:server_version': 0} async def setup_database(self) -> None: @@ -61,18 +65,21 @@ class NominatimAPIAsync: try: async with engine.begin() as conn: result = await conn.scalar(sa.text('SHOW server_version_num')) - self.server_version = int(result) + server_version = int(result) except asyncpg.PostgresError: - self.server_version = 0 + server_version = 0 - if self.server_version >= 110000: - @sa.event.listens_for(engine.sync_engine, "connect") # type: ignore[misc] + if server_version >= 110000: + @sa.event.listens_for(engine.sync_engine, "connect") def _on_connect(dbapi_con: Any, _: Any) -> None: cursor = dbapi_con.cursor() cursor.execute("SET jit_above_cost TO '-1'") # Make sure that all connections get the new settings await self.close() + self._property_cache['DB:server_version'] = server_version + + self._tables = SearchTables(sa.MetaData(), engine.name) # pylint: disable=no-member self._engine = engine @@ -86,7 +93,7 @@ class NominatimAPIAsync: @contextlib.asynccontextmanager - async def begin(self) -> AsyncIterator[sa_asyncio.AsyncConnection]: + async def begin(self) -> AsyncIterator[SearchConnection]: """ Create a new connection with automatic transaction handling. This function may be used to get low-level access to the database. @@ -97,9 +104,10 @@ class NominatimAPIAsync: await self.setup_database() assert self._engine is not None + assert self._tables is not None async with self._engine.begin() as conn: - yield conn + yield SearchConnection(conn, self._tables, self._property_cache) async def status(self) -> StatusResult: diff --git a/nominatim/api/status.py b/nominatim/api/status.py index 560953d3..61e36cc3 100644 --- a/nominatim/api/status.py +++ b/nominatim/api/status.py @@ -7,58 +7,40 @@ """ Classes and function releated to status call. """ -from typing import Optional, cast +from typing import Optional import datetime as dt +import dataclasses import sqlalchemy as sa -from sqlalchemy.ext.asyncio.engine import AsyncConnection -import asyncpg +from nominatim.api.connection import SearchConnection from nominatim import version +@dataclasses.dataclass class StatusResult: """ Result of a call to the status API. """ + status: int + message: str + software_version = version.NOMINATIM_VERSION + data_updated: Optional[dt.datetime] = None + database_version: Optional[version.NominatimVersion] = None - def __init__(self, status: int, msg: str): - self.status = status - self.message = msg - self.software_version = version.NOMINATIM_VERSION - self.data_updated: Optional[dt.datetime] = None - self.database_version: Optional[version.NominatimVersion] = None - -async def _get_database_date(conn: AsyncConnection) -> Optional[dt.datetime]: - """ Query the database date. - """ - sql = sa.text('SELECT lastimportdate FROM import_status LIMIT 1') - result = await conn.execute(sql) - - for row in result: - return cast(dt.datetime, row[0]) - - return None - - -async def _get_database_version(conn: AsyncConnection) -> Optional[version.NominatimVersion]: - sql = sa.text("""SELECT value FROM nominatim_properties - WHERE property = 'database_version'""") - result = await conn.execute(sql) - - for row in result: - return version.parse_version(cast(str, row[0])) - - return None - - -async def get_status(conn: AsyncConnection) -> StatusResult: +async def get_status(conn: SearchConnection) -> StatusResult: """ Execute a status API call. """ status = StatusResult(0, 'OK') + + # Last update date + sql = sa.select(conn.t.import_status.c.lastimportdate).limit(1) + status.data_updated = await conn.scalar(sql) + + # Database version try: - status.data_updated = await _get_database_date(conn) - status.database_version = await _get_database_version(conn) - except asyncpg.PostgresError: - return StatusResult(700, 'Database connection failed') + verstr = await conn.get_property('database_version') + status.database_version = version.parse_version(verstr) + except ValueError: + pass return status diff --git a/nominatim/db/sqlalchemy_schema.py b/nominatim/db/sqlalchemy_schema.py new file mode 100644 index 00000000..17839168 --- /dev/null +++ b/nominatim/db/sqlalchemy_schema.py @@ -0,0 +1,142 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +SQLAlchemy definitions for all tables used by the frontend. +""" +from typing import Any + +import sqlalchemy as sa +from geoalchemy2 import Geometry +from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB +from sqlalchemy.dialects.sqlite import JSON as sqlite_json + +#pylint: disable=too-many-instance-attributes +class SearchTables: + """ Data class that holds the tables of the Nominatim database. + """ + + def __init__(self, meta: sa.MetaData, engine_name: str) -> None: + if engine_name == 'postgresql': + Composite: Any = HSTORE + Json: Any = JSONB + IntArray: Any = ARRAY(sa.Integer()) #pylint: disable=invalid-name + elif engine_name == 'sqlite': + Composite = sqlite_json + Json = sqlite_json + IntArray = sqlite_json + else: + raise ValueError("Only 'postgresql' and 'sqlite' engines are supported.") + + self.meta = meta + + self.import_status = sa.Table('import_status', meta, + sa.Column('lastimportdate', sa.DateTime(True), nullable=False), + sa.Column('sequence_id', sa.Integer), + sa.Column('indexed', sa.Boolean)) + + self.properties = sa.Table('nominatim_properties', meta, + sa.Column('property', sa.Text, nullable=False), + sa.Column('value', sa.Text)) + + self.placex = sa.Table('placex', meta, + sa.Column('place_id', sa.BigInteger, nullable=False, unique=True), + sa.Column('parent_place_id', sa.BigInteger), + sa.Column('linked_place_id', sa.BigInteger), + sa.Column('importance', sa.Float), + sa.Column('indexed_date', sa.DateTime), + sa.Column('rank_address', sa.SmallInteger), + sa.Column('rank_search', sa.SmallInteger), + sa.Column('partition', sa.SmallInteger), + sa.Column('indexed_status', sa.SmallInteger), + sa.Column('osm_type', sa.String(1), nullable=False), + sa.Column('osm_id', sa.BigInteger, nullable=False), + sa.Column('class', sa.Text, nullable=False, key='class_'), + sa.Column('type', sa.Text, nullable=False), + sa.Column('admin_level', sa.SmallInteger), + sa.Column('name', Composite), + sa.Column('address', Composite), + sa.Column('extratags', Composite), + sa.Column('geometry', Geometry(srid=4326), nullable=False), + sa.Column('wikipedia', sa.Text), + sa.Column('country_code', sa.String(2)), + sa.Column('housenumber', sa.Text), + sa.Column('postcode', sa.Text), + sa.Column('centroid', Geometry(srid=4326, spatial_index=False))) + + self.addressline = sa.Table('place_addressline', meta, + sa.Column('place_id', sa.BigInteger, index=True), + sa.Column('address_place_id', sa.BigInteger, index=True), + sa.Column('distance', sa.Float), + sa.Column('cached_rank_address', sa.SmallInteger), + sa.Column('fromarea', sa.Boolean), + sa.Column('isaddress', sa.Boolean)) + + self.postcode = sa.Table('location_postcode', meta, + sa.Column('place_id', sa.BigInteger, unique=True), + sa.Column('parent_place_id', sa.BigInteger), + sa.Column('rank_search', sa.SmallInteger), + sa.Column('rank_address', sa.SmallInteger), + sa.Column('indexed_status', sa.SmallInteger), + sa.Column('indexed_date', sa.DateTime), + sa.Column('country_code', sa.String(2)), + sa.Column('postcode', sa.Text, index=True), + sa.Column('geometry', Geometry(srid=4326))) + + self.osmline = sa.Table('location_property_osmline', meta, + sa.Column('place_id', sa.BigInteger, nullable=False, unique=True), + sa.Column('osm_id', sa.BigInteger), + sa.Column('parent_place_id', sa.BigInteger), + sa.Column('indexed_date', sa.DateTime), + sa.Column('startnumber', sa.Integer), + sa.Column('endnumber', sa.Integer), + sa.Column('step', sa.SmallInteger), + sa.Column('partition', sa.SmallInteger), + sa.Column('indexed_status', sa.SmallInteger), + sa.Column('linegeo', Geometry(srid=4326)), + sa.Column('address', Composite), + sa.Column('postcode', sa.Text), + sa.Column('country_code', sa.String(2))) + + self.word = sa.Table('word', meta, + sa.Column('word_id', sa.Integer), + sa.Column('word_token', sa.Text, nullable=False), + sa.Column('type', sa.Text, nullable=False), + sa.Column('word', sa.Text), + sa.Column('info', Json)) + + self.country_name = sa.Table('country_name', meta, + sa.Column('country_code', sa.String(2)), + sa.Column('name', Composite), + sa.Column('derived_name', Composite), + sa.Column('country_default_language_code', sa.Text), + sa.Column('partition', sa.Integer)) + + self.country_grid = sa.Table('country_osm_grid', meta, + sa.Column('country_code', sa.String(2)), + sa.Column('area', sa.Float), + sa.Column('geometry', Geometry(srid=4326))) + + # The following tables are not necessarily present. + self.search_name = sa.Table('search_name', meta, + sa.Column('place_id', sa.BigInteger, index=True), + sa.Column('importance', sa.Float), + sa.Column('search_rank', sa.SmallInteger), + sa.Column('address_rank', sa.SmallInteger), + sa.Column('name_vector', IntArray, index=True), + sa.Column('nameaddress_vector', IntArray, index=True), + sa.Column('country_code', sa.String(2)), + sa.Column('centroid', Geometry(srid=4326))) + + self.tiger = sa.Table('location_property_tiger', meta, + sa.Column('place_id', sa.BigInteger), + sa.Column('parent_place_id', sa.BigInteger), + sa.Column('startnumber', sa.Integer), + sa.Column('endnumber', sa.Integer), + sa.Column('step', sa.SmallInteger), + sa.Column('partition', sa.SmallInteger), + sa.Column('linegeo', Geometry(srid=4326, spatial_index=False)), + sa.Column('postcode', sa.Text)) diff --git a/test/python/api/test_api_connection.py b/test/python/api/test_api_connection.py new file mode 100644 index 00000000..5609cb03 --- /dev/null +++ b/test/python/api/test_api_connection.py @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Tests for enhanced connection class for API functions. +""" +from pathlib import Path +import pytest +import pytest_asyncio + +import sqlalchemy as sa + +from nominatim.api import NominatimAPIAsync + +@pytest_asyncio.fixture +async def apiobj(temp_db): + """ Create an asynchronous SQLAlchemy engine for the test DB. + """ + api = NominatimAPIAsync(Path('/invalid'), {}) + yield api + await api.close() + + +@pytest.mark.asyncio +async def test_run_scalar(apiobj, table_factory): + table_factory('foo', definition='that TEXT', content=(('a', ),)) + + async with apiobj.begin() as conn: + assert await conn.scalar(sa.text('SELECT * FROM foo')) == 'a' + + +@pytest.mark.asyncio +async def test_run_execute(apiobj, table_factory): + table_factory('foo', definition='that TEXT', content=(('a', ),)) + + async with apiobj.begin() as conn: + result = await conn.execute(sa.text('SELECT * FROM foo')) + assert result.fetchone()[0] == 'a' + + +@pytest.mark.asyncio +async def test_get_property_existing_cached(apiobj, table_factory): + table_factory('nominatim_properties', + definition='property TEXT, value TEXT', + content=(('dbv', '96723'), )) + + async with apiobj.begin() as conn: + assert await conn.get_property('dbv') == '96723' + + await conn.execute(sa.text('TRUNCATE nominatim_properties')) + + assert await conn.get_property('dbv') == '96723' + + +@pytest.mark.asyncio +async def test_get_property_existing_uncached(apiobj, table_factory): + table_factory('nominatim_properties', + definition='property TEXT, value TEXT', + content=(('dbv', '96723'), )) + + async with apiobj.begin() as conn: + assert await conn.get_property('dbv') == '96723' + + await conn.execute(sa.text("UPDATE nominatim_properties SET value = '1'")) + + assert await conn.get_property('dbv', cached=False) == '1' + + +@pytest.mark.asyncio +@pytest.mark.parametrize('param', ['foo', 'DB:server_version']) +async def test_get_property_missing(apiobj, table_factory, param): + table_factory('nominatim_properties', + definition='property TEXT, value TEXT') + + async with apiobj.begin() as conn: + with pytest.raises(ValueError): + await conn.get_property(param) + + +@pytest.mark.asyncio +async def test_get_db_property_existing(apiobj): + async with apiobj.begin() as conn: + assert await conn.get_db_property('server_version') > 0 + + +@pytest.mark.asyncio +async def test_get_db_property_existing(apiobj): + async with apiobj.begin() as conn: + with pytest.raises(ValueError): + await conn.get_db_property('dfkgjd.rijg') diff --git a/vagrant/Install-on-Ubuntu-20.sh b/vagrant/Install-on-Ubuntu-20.sh index 34e81637..e6ad9ca9 100755 --- a/vagrant/Install-on-Ubuntu-20.sh +++ b/vagrant/Install-on-Ubuntu-20.sh @@ -33,7 +33,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: # Some of the Python packages that come with Ubuntu 20.04 are too old, so # install the latest version from pip: - pip3 install --user sqlalchemy asyncpg + pip3 install --user sqlalchemy GeoAlchemy2 asyncpg # diff --git a/vagrant/Install-on-Ubuntu-22.sh b/vagrant/Install-on-Ubuntu-22.sh index 82e706c9..68bd8682 100755 --- a/vagrant/Install-on-Ubuntu-22.sh +++ b/vagrant/Install-on-Ubuntu-22.sh @@ -29,7 +29,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: php-cli php-pgsql php-intl libicu-dev python3-dotenv \ python3-psycopg2 python3-psutil python3-jinja2 \ python3-icu python3-datrie python3-sqlalchemy \ - python3-asyncpg git + python3-geoalchemy2 python3-asyncpg git # # System Configuration