From 6b627df4fbce307325fa7013bd037da1ef84b112 Mon Sep 17 00:00:00 2001 From: anqixxx Date: Thu, 24 Jul 2025 16:54:13 -0400 Subject: [PATCH] Locales and localization refactor with Locales as a localizer object. Removed auto-localization from search/search_address APIs (now explicit), simplified AddressLines to subclass List[AddressLine], made display_name a computed property in Results instead of field and removed result-localization circular dependencies --- src/nominatim_api/localization.py | 22 ++++++ src/nominatim_api/results.py | 78 +++++++++++-------- src/nominatim_api/types.py | 4 - src/nominatim_api/v1/server_glue.py | 18 +++-- src/nominatim_db/clicmd/api.py | 28 ++++--- src/nominatim_db/clicmd/export.py | 6 +- test/python/api/test_api_details.py | 6 ++ test/python/api/test_result_formatting_v1.py | 2 +- .../api/test_result_formatting_v1_reverse.py | 5 +- test/python/cli/test_cmd_api.py | 9 +-- 10 files changed, 112 insertions(+), 66 deletions(-) diff --git a/src/nominatim_api/localization.py b/src/nominatim_api/localization.py index 3414286e..3c786a20 100644 --- a/src/nominatim_api/localization.py +++ b/src/nominatim_api/localization.py @@ -9,6 +9,7 @@ Helper functions for localizing names of results. """ from typing import Mapping, List, Optional from .config import Configuration +from .results import AddressLines, BaseResultT import re @@ -96,3 +97,24 @@ class Locales: languages.append(parts[0]) return Locales(languages) + + def localize(self, lines: AddressLines) -> None: + """ Sets the local name of address parts according to the chosen + locale. + + Only address parts that are marked as isaddress are localized. + + AddressLines should be modified in place. + """ + for line in lines: + if line.isaddress and line.names: + line.local_name = self.display_name(line.names) + + def localize_results(self, results: List[BaseResultT]) -> None: + """ Set the local name of results according to the chosen + locale. + """ + for result in results: + result.locale_name = self.display_name(result.names) + if result.address_rows: + self.localize(result.address_rows) diff --git a/src/nominatim_api/results.py b/src/nominatim_api/results.py index 1a4dc8ae..1b74b5aa 100644 --- a/src/nominatim_api/results.py +++ b/src/nominatim_api/results.py @@ -11,7 +11,10 @@ Data classes are part of the public API while the functions are for internal use only. That's why they are implemented as free-standing functions instead of member functions. """ -from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast, Callable +from typing import ( + Optional, Tuple, Dict, Sequence, TypeVar, Type, List, + cast, Callable +) import enum import dataclasses import datetime as dt @@ -23,7 +26,6 @@ from .sql.sqlalchemy_types import Geometry from .types import Point, Bbox, LookupDetails from .connection import SearchConnection from .logging import log -from .localization import Locales # This file defines complex result data classes. @@ -130,27 +132,21 @@ class AddressLine: [Localization](Result-Handling.md#localization) below. """ - -class AddressLines(List[AddressLine]): - """ Sequence of address lines order in descending order by their rank. - """ - - def localize(self, locales: Locales) -> List[str]: - """ Set the local name of address parts according to the chosen - locale. Return the list of local names without duplicates. - - Only address parts that are marked as isaddress are localized - and returned. + @property + def display_name(self) -> Optional[str]: + """ Dynamically compute the display name for the Address Line component """ - label_parts: List[str] = [] + if self.local_name: + return self.local_name + elif 'name' in self.names: + return self.names['name'] + elif self.names: + return next(iter(self.names.values()), None) + return None - for line in self: - if line.isaddress and line.names: - line.local_name = locales.display_name(line.names) - if not label_parts or label_parts[-1] != line.local_name: - label_parts.append(line.local_name) - return label_parts +class AddressLines(List[AddressLine]): + """ A wrapper around a list of AddressLine objects.""" @dataclasses.dataclass @@ -189,7 +185,6 @@ class BaseResult: admin_level: int = 15 locale_name: Optional[str] = None - display_name: Optional[str] = None names: Optional[Dict[str, str]] = None address: Optional[Dict[str, str]] = None @@ -225,6 +220,35 @@ class BaseResult: """ return self.centroid[0] + @property + def display_name(self) -> Optional[str]: + """ Dynamically compute the display name for the result place + and, if available, its address information.. + """ + if self.address_rows: # if this is true we need additional processing + label_parts: List[str] = [] + + for line in self.address_rows: # assume locale_name is set by external formatter + if line.isaddress and line.names: + address_name = line.display_name + + if address_name and (not label_parts or label_parts[-1] != address_name): + label_parts.append(address_name) + + if label_parts: + return ', '.join(label_parts) + + # Now adding additional information for reranking + if self.locale_name: + return self.locale_name + elif self.names and 'name' in self.names: + return self.names['name'] + elif self.names: + return next(iter(self.names.values())) + elif self.housenumber: + return self.housenumber + return None + def calculated_importance(self) -> float: """ Get a valid importance value. This is either the stored importance of the value or an artificial value computed from the place's @@ -232,16 +256,6 @@ class BaseResult: """ return self.importance or (0.40001 - (self.rank_search/75.0)) - def localize(self, locales: Locales) -> None: - """ Fill the locale_name and the display_name field for the - place and, if available, its address information. - """ - self.locale_name = locales.display_name(self.names) - if self.address_rows: - self.display_name = ', '.join(self.address_rows.localize(locales)) - else: - self.display_name = self.locale_name - BaseResultT = TypeVar('BaseResultT', bound=BaseResult) @@ -456,8 +470,6 @@ async def add_result_details(conn: SearchConnection, results: List[BaseResultT], log().comment('Query keywords') for result in results: await complete_keywords(conn, result) - for result in results: - result.localize(details.locales) def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine: diff --git a/src/nominatim_api/types.py b/src/nominatim_api/types.py index e58df478..b3048fc9 100644 --- a/src/nominatim_api/types.py +++ b/src/nominatim_api/types.py @@ -17,7 +17,6 @@ from struct import unpack from binascii import unhexlify from .errors import UsageError -from .localization import Locales @dataclasses.dataclass @@ -410,9 +409,6 @@ class LookupDetails: 0.0 means the original geometry is kept. The higher the value, the more the geometry gets simplified. """ - locales: Locales = Locales() - """ Preferred languages for localization of results. - """ @classmethod def from_kwargs(cls: Type[TParam], kwargs: Dict[str, Any]) -> TParam: diff --git a/src/nominatim_api/v1/server_glue.py b/src/nominatim_api/v1/server_glue.py index 4aba0f9d..99f7dc48 100644 --- a/src/nominatim_api/v1/server_glue.py +++ b/src/nominatim_api/v1/server_glue.py @@ -156,8 +156,6 @@ async def details_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: debug = setup_debugging(params) - locales = Locales.from_accept_languages(get_accepted_languages(params)) - result = await api.details(place, address_details=params.get_bool('addressdetails', False), linked_places=params.get_bool('linkedplaces', True), @@ -166,7 +164,6 @@ async def details_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: geometry_output=(GeometryFormat.GEOJSON if params.get_bool('polygon_geojson', False) else GeometryFormat.NONE), - locales=locales ) if debug: @@ -175,6 +172,9 @@ async def details_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: if result is None: params.raise_error('No place with that OSM ID found.', status=404) + locales = Locales.from_accept_languages(get_accepted_languages(params)) + locales.localize_results([result]) + output = params.formatting().format_result( result, fmt, {'locales': locales, @@ -194,7 +194,6 @@ async def reverse_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: details = parse_geometry_details(params, fmt) details['max_rank'] = helpers.zoom_to_rank(params.get_int('zoom', 18)) details['layers'] = get_layers(params) - details['locales'] = Locales.from_accept_languages(get_accepted_languages(params)) result = await api.reverse(coord, **details) @@ -210,6 +209,10 @@ async def reverse_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: else: query = '' + if result: + Locales.from_accept_languages(get_accepted_languages(params)).localize_results( + [result]) + fmt_options = {'query': query, 'extratags': params.get_bool('extratags', False), 'namedetails': params.get_bool('namedetails', False), @@ -227,7 +230,6 @@ async def lookup_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: fmt = parse_format(params, SearchResults, 'xml') debug = setup_debugging(params) details = parse_geometry_details(params, fmt) - details['locales'] = Locales.from_accept_languages(get_accepted_languages(params)) places = [] for oid in (params.get('osm_ids') or '').split(','): @@ -246,6 +248,8 @@ async def lookup_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: if debug: return build_response(params, loglib.get_and_disable(), num_results=len(results)) + Locales.from_accept_languages(get_accepted_languages(params)).localize_results(results) + fmt_options = {'extratags': params.get_bool('extratags', False), 'namedetails': params.get_bool('namedetails', False), 'addressdetails': params.get_bool('addressdetails', True)} @@ -310,8 +314,6 @@ async def search_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: else: details['layers'] = get_layers(params) - details['locales'] = Locales.from_accept_languages(get_accepted_languages(params)) - # unstructured query parameters query = params.get('q', None) # structured query parameters @@ -336,6 +338,8 @@ async def search_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: except UsageError as err: params.raise_error(str(err)) + Locales.from_accept_languages(get_accepted_languages(params)).localize_results(results) + if details['dedupe'] and len(results) > 1: results = helpers.deduplicate_results(results, max_results) diff --git a/src/nominatim_db/clicmd/api.py b/src/nominatim_db/clicmd/api.py index a922dc89..f5a3926f 100644 --- a/src/nominatim_db/clicmd/api.py +++ b/src/nominatim_db/clicmd/api.py @@ -196,7 +196,6 @@ class APISearch: 'excluded': args.exclude_place_ids, 'viewbox': args.viewbox, 'bounded_viewbox': args.bounded, - 'locales': _get_locales(args, api.config.DEFAULT_LANGUAGE) } if args.query: @@ -213,6 +212,9 @@ class APISearch: except napi.UsageError as ex: raise UsageError(ex) from ex + locales = _get_locales(args, api.config.DEFAULT_LANGUAGE) + locales.localize_results(results) + if args.dedupe and len(results) > 1: results = deduplicate_results(results, args.limit) @@ -277,11 +279,14 @@ class APIReverse: layers=layers, address_details=True, # needed for display name geometry_output=_get_geometry_output(args), - geometry_simplification=args.polygon_threshold, - locales=_get_locales(args, api.config.DEFAULT_LANGUAGE)) + geometry_simplification=args.polygon_threshold) except napi.UsageError as ex: raise UsageError(ex) from ex + if result is not None: + locales = _get_locales(args, api.config.DEFAULT_LANGUAGE) + locales.localize_results([result]) + if args.format == 'debug': print(loglib.get_and_disable()) return 0 @@ -339,11 +344,13 @@ class APILookup: results = api.lookup(places, address_details=True, # needed for display name geometry_output=_get_geometry_output(args), - geometry_simplification=args.polygon_threshold or 0.0, - locales=_get_locales(args, api.config.DEFAULT_LANGUAGE)) + geometry_simplification=args.polygon_threshold or 0.0) except napi.UsageError as ex: raise UsageError(ex) from ex + locales = _get_locales(args, api.config.DEFAULT_LANGUAGE) + locales.localize_results(results) + if args.format == 'debug': print(loglib.get_and_disable()) return 0 @@ -425,7 +432,6 @@ class APIDetails: try: with napi.NominatimAPI(args.project_dir) as api: - locales = _get_locales(args, api.config.DEFAULT_LANGUAGE) result = api.details(place, address_details=args.addressdetails, linked_places=args.linkedplaces, @@ -433,19 +439,21 @@ class APIDetails: keywords=args.keywords, geometry_output=(napi.GeometryFormat.GEOJSON if args.polygon_geojson - else napi.GeometryFormat.NONE), - locales=locales) + else napi.GeometryFormat.NONE)) except napi.UsageError as ex: raise UsageError(ex) from ex + if result is not None: + locales = _get_locales(args, api.config.DEFAULT_LANGUAGE) + locales.localize_results([result]) + if args.format == 'debug': print(loglib.get_and_disable()) return 0 if result: _print_output(formatter, result, args.format or 'json', - {'locales': locales, - 'group_hierarchy': args.group_hierarchy}) + {'group_hierarchy': args.group_hierarchy}) return 0 LOG.error("Object not found in database.") diff --git a/src/nominatim_db/clicmd/export.py b/src/nominatim_db/clicmd/export.py index c6a100b2..7bf17d2b 100644 --- a/src/nominatim_db/clicmd/export.py +++ b/src/nominatim_db/clicmd/export.py @@ -151,9 +151,11 @@ async def dump_results(conn: napi.SearchConnection, results: List[ReverseResult], writer: 'csv.DictWriter[str]', lang: Optional[str]) -> None: - locale = napi.Locales([lang] if lang else None) await add_result_details(conn, results, - LookupDetails(address_details=True, locales=locale)) + LookupDetails(address_details=True)) + + locale = napi.Locales([lang] if lang else None) + locale.localize_results(results) for result in results: data = {'placeid': result.place_id, diff --git a/test/python/api/test_api_details.py b/test/python/api/test_api_details.py index 4f6dd92b..c1679f63 100644 --- a/test/python/api/test_api_details.py +++ b/test/python/api/test_api_details.py @@ -34,6 +34,7 @@ def test_lookup_in_placex(apiobj, frontend, idobj): api = frontend(apiobj, options={'details'}) result = api.details(idobj) + napi.Locales().localize_results([result]) assert result is not None @@ -83,6 +84,7 @@ def test_lookup_in_placex_minimal_info(apiobj, frontend): api = frontend(apiobj, options={'details'}) result = api.details(napi.PlaceID(332)) + napi.Locales().localize_results([result]) assert result is not None @@ -149,6 +151,7 @@ def test_lookup_placex_with_address_details(apiobj, frontend): api = frontend(apiobj, options={'details'}) result = api.details(napi.PlaceID(332), address_details=True) + napi.Locales().localize_results([result]) assert result.address_rows == [ napi.AddressLine(place_id=332, osm_object=('W', 4), @@ -350,6 +353,7 @@ def test_lookup_osmline_with_address_details(apiobj, frontend): api = frontend(apiobj, options={'details'}) result = api.details(napi.PlaceID(9000), address_details=True) + napi.Locales().localize_results([result]) assert result.address_rows == [ napi.AddressLine(place_id=332, osm_object=('W', 4), @@ -450,6 +454,7 @@ def test_lookup_tiger_with_address_details(apiobj, frontend): api = frontend(apiobj, options={'details'}) result = api.details(napi.PlaceID(9000), address_details=True) + napi.Locales().localize_results([result]) assert result.address_rows == [ napi.AddressLine(place_id=332, osm_object=('W', 4), @@ -545,6 +550,7 @@ def test_lookup_postcode_with_address_details(apiobj, frontend): api = frontend(apiobj, options={'details'}) result = api.details(napi.PlaceID(9000), address_details=True) + napi.Locales().localize_results([result]) assert result.address_rows == [ napi.AddressLine(place_id=9000, osm_object=None, diff --git a/test/python/api/test_result_formatting_v1.py b/test/python/api/test_result_formatting_v1.py index 406c7654..9d4293a4 100644 --- a/test/python/api/test_result_formatting_v1.py +++ b/test/python/api/test_result_formatting_v1.py @@ -119,7 +119,7 @@ def test_search_details_full(): country_code='ll', indexed_date=import_date ) - search.localize(napi.Locales()) + napi.Locales().localize_results([search]) result = v1_format.format_result(search, 'json', {}) diff --git a/test/python/api/test_result_formatting_v1_reverse.py b/test/python/api/test_result_formatting_v1_reverse.py index 902f0e79..48f5fea5 100644 --- a/test/python/api/test_result_formatting_v1_reverse.py +++ b/test/python/api/test_result_formatting_v1_reverse.py @@ -102,11 +102,10 @@ def test_format_reverse_with_address(fmt): rank_address=10, distance=0.0) ])) - reverse.localize(napi.Locales()) + napi.Locales().localize_results([reverse]) raw = v1_format.format_result(napi.ReverseResults([reverse]), fmt, {'addressdetails': True}) - if fmt == 'xml': root = ET.fromstring(raw) assert root.find('addressparts').find('county').text == 'Hello' @@ -165,7 +164,7 @@ def test_format_reverse_geocodejson_special_parts(): distance=0.0) ])) - reverse.localize(napi.Locales()) + napi.Locales().localize_results([reverse]) raw = v1_format.format_result(napi.ReverseResults([reverse]), 'geocodejson', {'addressdetails': True}) diff --git a/test/python/cli/test_cmd_api.py b/test/python/cli/test_cmd_api.py index 541b680c..fa7715e1 100644 --- a/test/python/cli/test_cmd_api.py +++ b/test/python/cli/test_cmd_api.py @@ -74,8 +74,7 @@ class TestCliReverseCall: napi.Point(1.0, -3.0), names={'name': 'Name', 'name:fr': 'Nom'}, extratags={'extra': 'Extra'}, - locale_name='Name', - display_name='Name') + locale_name='Name') monkeypatch.setattr(napi.NominatimAPI, 'reverse', lambda *args, **kwargs: result) @@ -122,8 +121,7 @@ class TestCliLookupCall: napi.Point(1.0, -3.0), names={'name': 'Name', 'name:fr': 'Nom'}, extratags={'extra': 'Extra'}, - locale_name='Name', - display_name='Name') + locale_name='Name') monkeypatch.setattr(napi.NominatimAPI, 'lookup', lambda *args, **kwargs: napi.SearchResults([result])) @@ -150,8 +148,7 @@ def test_search(cli_call, tmp_path, capsys, monkeypatch, endpoint, params): napi.Point(1.0, -3.0), names={'name': 'Name', 'name:fr': 'Nom'}, extratags={'extra': 'Extra'}, - locale_name='Name', - display_name='Name') + locale_name='Name') monkeypatch.setattr(napi.NominatimAPI, endpoint, lambda *args, **kwargs: napi.SearchResults([result])) -- 2.39.5