From: Sarah Hoffmann Date: Tue, 15 Aug 2023 21:08:44 +0000 (+0200) Subject: cache ICU transliterators and reuse them X-Git-Tag: v4.3.0~22^2 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/bfc706a5968f1fd7daf6d9b3b7d77b456d488084 cache ICU transliterators and reuse them --- diff --git a/nominatim/api/connection.py b/nominatim/api/connection.py index 72cabf78..bf217314 100644 --- a/nominatim/api/connection.py +++ b/nominatim/api/connection.py @@ -7,7 +7,8 @@ """ Extended SQLAlchemy connection class that also includes access to the schema. """ -from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set +from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set, \ + Awaitable, Callable, TypeVar import sqlalchemy as sa from sqlalchemy.ext.asyncio import AsyncConnection @@ -17,6 +18,8 @@ from nominatim.db.sqlalchemy_schema import SearchTables from nominatim.db.sqlalchemy_types import Geometry from nominatim.api.logging import log +T = TypeVar('T') + class SearchConnection: """ An extended SQLAlchemy connection class, that also contains then table definitions. The underlying asynchronous SQLAlchemy @@ -61,11 +64,10 @@ class SearchConnection: Raises a ValueError if the property does not exist. """ - if name.startswith('DB:'): - raise ValueError(f"Illegal property value '{name}'.") + lookup_name = f'DBPROP:{name}' - if cached and name in self._property_cache: - return cast(str, self._property_cache[name]) + if cached and lookup_name in self._property_cache: + return cast(str, self._property_cache[lookup_name]) sql = sa.select(self.t.properties.c.value)\ .where(self.t.properties.c.property == name) @@ -74,7 +76,7 @@ class SearchConnection: if value is None: raise ValueError(f"Property '{name}' not found in database.") - self._property_cache[name] = cast(str, value) + self._property_cache[lookup_name] = cast(str, value) return cast(str, value) @@ -92,6 +94,29 @@ class SearchConnection: return self._property_cache['DB:server_version'] + async def get_cached_value(self, group: str, name: str, + factory: Callable[[], Awaitable[T]]) -> T: + """ Access the cache for this Nominatim instance. + Each cache value needs to belong to a group and have a name. + This function is for internal API use only. + + `factory` is an async callback function that produces + the value if it is not already cached. + + Returns the cached value or the result of factory (also caching + the result). + """ + full_name = f'{group}:{name}' + + if full_name in self._property_cache: + return cast(T, self._property_cache[full_name]) + + value = await factory() + self._property_cache[full_name] = value + + return value + + async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]: """ Lookup up if there is a classtype table for the given category and return a SQLAlchemy table for it, if it exists. diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py index 7bf516e3..b68e8d10 100644 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@ -133,10 +133,19 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): async def setup(self) -> None: """ Set up static data structures needed for the analysis. """ - rules = await self.conn.get_property('tokenizer_import_normalisation') - self.normalizer = Transliterator.createFromRules("normalization", rules) - rules = await self.conn.get_property('tokenizer_import_transliteration') - self.transliterator = Transliterator.createFromRules("transliteration", rules) + async def _make_normalizer() -> Any: + rules = await self.conn.get_property('tokenizer_import_normalisation') + return Transliterator.createFromRules("normalization", rules) + + self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer', + _make_normalizer) + + async def _make_transliterator() -> Any: + rules = await self.conn.get_property('tokenizer_import_transliteration') + return Transliterator.createFromRules("transliteration", rules) + + self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator', + _make_transliterator) if 'word' not in self.conn.t.meta.tables: sa.Table('word', self.conn.t.meta,