From: Sarah Hoffmann Date: Fri, 10 Mar 2023 09:01:43 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~74 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/46a10cf8146e16d9da7bb21e5976d9c62b69fdd1?hp=89baa8445425ab9f1ac9e57eab336091d6458eaf Merge remote-tracking branch 'upstream/master' --- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6d75ce57..8baadb28 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -69,7 +69,7 @@ Before submitting a pull request make sure that the tests pass: Nominatim follows semantic versioning. Major releases are done for large changes that require (or at least strongly recommend) a reimport of the databases. -Minor releases can usually be applied to exisiting databases Patch releases +Minor releases can usually be applied to exisiting databases. Patch releases contain bug fixes only and are released from a separate branch where the relevant changes are cherry-picked from the master branch. diff --git a/SECURITY.md b/SECURITY.md index e8e6fcad..d023c1e5 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,7 +13,6 @@ versions. | 4.1.x | 2024-08-05 | | 4.0.x | 2023-11-02 | | 3.7.x | 2023-04-05 | -| 3.6.x | 2022-12-12 | ## Reporting a Vulnerability @@ -38,3 +37,4 @@ incident. Announcements will also be published at the ## List of Previous Incidents * 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html) +* 2023-02-21 - [cross-site scripting vulnerability](https://nominatim.org/2023/02/21/release-421.html) diff --git a/docs/customize/Import-Styles.md b/docs/customize/Import-Styles.md index 6085d4e4..e96f96e0 100644 --- a/docs/customize/Import-Styles.md +++ b/docs/customize/Import-Styles.md @@ -127,7 +127,7 @@ to the user when requested, but are not used otherwise. values. Tags with matching key/value pairs are deleted. * __extra_keys__ is a _key match list_ for tags which should be saved into extratags -* __delete_tags__ contains a table of tag keys pointing to a list of tag +* __extra_tags__ contains a table of tag keys pointing to a list of tag values. Tags with matching key/value pairs are moved to extratags. Key list may contain three kinds of strings: @@ -193,14 +193,14 @@ Address tags will be used to build up the address of an object. `set_address_tags()` takes a table with arbitrary fields pointing to _key match lists_. To fields have a special meaning: -__main__ defines +* __main__: defines the tags that make a full address object out of the OSM object. This is usually the housenumber or variants thereof. If a main address tag appears, then the object will always be included, if necessary with a fallback of `place=house`. If the key has a prefix of `addr:` or `is_in:` this will be stripped. -__extra__ defines all supplementary tags for addresses, tags like `addr:street`, `addr:city` etc. If the key has a prefix of `addr:` or `is_in:` this will be stripped. +* __extra__: defines all supplementary tags for addresses, tags like `addr:street`, `addr:city` etc. If the key has a prefix of `addr:` or `is_in:` this will be stripped. All other fields will be handled as summary fields. If a key matches the key match list, then its value will be added to the address tags with the diff --git a/docs/customize/Tokenizers.md b/docs/customize/Tokenizers.md index 58606c29..11c27e38 100644 --- a/docs/customize/Tokenizers.md +++ b/docs/customize/Tokenizers.md @@ -102,7 +102,7 @@ Here is an example configuration file: ``` yaml normalization: - ":: lower ()" - - "ß > 'ss'" # German szet is unimbigiously equal to double ss + - "ß > 'ss'" # German szet is unambiguously equal to double ss transliteration: - !include /etc/nominatim/icu-rules/extended-unicode-to-asccii.yaml - ":: Ascii ()" @@ -128,7 +128,7 @@ The configuration file contains four sections: The normalization and transliteration sections each define a set of ICU rules that are applied to the names. -The **normalisation** rules are applied after sanitation. They should remove +The **normalization** rules are applied after sanitation. They should remove any information that is not relevant for search at all. Usual rules to be applied here are: lower-casing, removing of special characters, cleanup of spaces. @@ -221,7 +221,13 @@ The following is a list of sanitizers that are shipped with Nominatim. rendering: heading_level: 6 +#### delete-tags +::: nominatim.tokenizer.sanitizers.delete_tags + selection: + members: False + rendering: + heading_level: 6 #### Token Analysis diff --git a/lib-php/DebugHtml.php b/lib-php/DebugHtml.php index 2207d529..7b0cba2d 100644 --- a/lib-php/DebugHtml.php +++ b/lib-php/DebugHtml.php @@ -135,7 +135,7 @@ class Debug public static function printSQL($sSQL) { - echo '

'.date('c').' '.htmlspecialchars($sSQL).'

'."\n"; + echo '

'.date('c').' '.htmlspecialchars($sSQL, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401).'

'."\n"; } private static function outputVar($mVar, $sPreNL) @@ -183,7 +183,7 @@ class Debug $sOut = (string)$mVar; } - echo htmlspecialchars($sOut); + echo htmlspecialchars($sOut, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401); return strlen($sOut); } } diff --git a/lib-php/PlaceLookup.php b/lib-php/PlaceLookup.php index ba4f50bc..76a093c4 100644 --- a/lib-php/PlaceLookup.php +++ b/lib-php/PlaceLookup.php @@ -524,12 +524,7 @@ class PlaceLookup // Get the bounding box and outline polygon $sSQL = 'select place_id,0 as numfeatures,st_area(geometry) as area,'; - if ($fLonReverse != null && $fLatReverse != null) { - $sSQL .= ' ST_Y(closest_point) as centrelat,'; - $sSQL .= ' ST_X(closest_point) as centrelon,'; - } else { - $sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,'; - } + $sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,'; $sSQL .= ' ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,'; $sSQL .= ' ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon'; if ($this->bIncludePolygonAsGeoJSON) { @@ -544,19 +539,21 @@ class PlaceLookup if ($this->bIncludePolygonAsText) { $sSQL .= ',ST_AsText(geometry) as astext'; } + + $sSQL .= ' FROM (SELECT place_id'; if ($fLonReverse != null && $fLatReverse != null) { - $sFrom = ' from (SELECT * , CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN '; - $sFrom .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))'; - $sFrom .=' ELSE centroid END AS closest_point'; - $sFrom .= ' from placex where place_id = '.$iPlaceID.') as plx'; + $sSQL .= ',CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN '; + $sSQL .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))'; + $sSQL .=' ELSE centroid END AS centroid'; } else { - $sFrom = ' from placex where place_id = '.$iPlaceID; + $sSQL .= ',centroid'; } if ($this->fPolygonSimplificationThreshold > 0) { - $sSQL .= ' from (select place_id,centroid,ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry'.$sFrom.') as plx'; + $sSQL .= ',ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry'; } else { - $sSQL .= $sFrom; + $sSQL .= ',geometry'; } + $sSQL .= ' FROM placex where place_id = '.$iPlaceID.') as plx'; $aPointPolygon = $this->oDB->getRow($sSQL, null, 'Could not get outline'); diff --git a/nominatim/api/logging.py b/nominatim/api/logging.py index e9c88470..3759ba1b 100644 --- a/nominatim/api/logging.py +++ b/nominatim/api/logging.py @@ -96,7 +96,7 @@ class HTMLLogger(BaseLogger): .compile(conn.sync_engine, compile_kwargs={"literal_binds": True})) if CODE_HIGHLIGHT: sqlstr = highlight(sqlstr, PostgresLexer(), - HtmlFormatter(nowrap=True, lineseparator='
')) + HtmlFormatter(nowrap=True, lineseparator='
')) self._write(f'
{sqlstr}
') else: self._write(f'{sqlstr}') diff --git a/nominatim/data/place_info.py b/nominatim/data/place_info.py index 1bfd512c..91e77a58 100644 --- a/nominatim/data/place_info.py +++ b/nominatim/data/place_info.py @@ -55,7 +55,7 @@ class PlaceInfo: @property def rank_address(self) -> int: - """ The [rank address][1] before ant rank correction is applied. + """ The [rank address][1] before any rank correction is applied. [1]: ../customize/Ranking.md#address-rank """ diff --git a/nominatim/data/place_name.py b/nominatim/data/place_name.py index f4c5e0fa..47464e2b 100644 --- a/nominatim/data/place_name.py +++ b/nominatim/data/place_name.py @@ -21,7 +21,7 @@ class PlaceName: In addition to that, a name may have arbitrary additional attributes. How attributes are used, depends on the sanitizers and token analysers. - The exception is is the 'analyzer' attribute. This attribute determines + The exception is the 'analyzer' attribute. This attribute determines which token analysis module will be used to finalize the treatment of names. """ diff --git a/nominatim/tokenizer/sanitizers/clean_postcodes.py b/nominatim/tokenizer/sanitizers/clean_postcodes.py index 593f770d..5eaea391 100644 --- a/nominatim/tokenizer/sanitizers/clean_postcodes.py +++ b/nominatim/tokenizer/sanitizers/clean_postcodes.py @@ -74,7 +74,7 @@ class _PostcodeSanitizer: def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]: - """ Create a housenumber processing function. + """ Create a function that filters postcodes by their officially allowed pattern. """ return _PostcodeSanitizer(config) diff --git a/nominatim/tokenizer/sanitizers/clean_tiger_tags.py b/nominatim/tokenizer/sanitizers/clean_tiger_tags.py index 9698a326..8b4d337d 100644 --- a/nominatim/tokenizer/sanitizers/clean_tiger_tags.py +++ b/nominatim/tokenizer/sanitizers/clean_tiger_tags.py @@ -41,6 +41,6 @@ def _clean_tiger_county(obj: ProcessInfo) -> None: def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]: - """ Create a housenumber processing function. + """ Create a function that preprocesses tags from the TIGER import. """ return _clean_tiger_county diff --git a/nominatim/tokenizer/sanitizers/delete_tags.py b/nominatim/tokenizer/sanitizers/delete_tags.py new file mode 100644 index 00000000..fd35de48 --- /dev/null +++ b/nominatim/tokenizer/sanitizers/delete_tags.py @@ -0,0 +1,144 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Sanitizer which prevents certain tags from getting into the search index. +It remove tags which matches all properties given below. + + +Arguments: + type: Define which type of tags should be considered for removal. + There are two types of tags 'name' and 'address' tags. + Takes a string 'name' or 'address'. (default: 'name') + + filter-kind: Define which 'kind' of tags should be removed. + Takes a string or list of strings where each + string is a regular expression. A tag is considered + to be a candidate for removal if its 'kind' property + fully matches any of the given regular expressions. + Note that by default all 'kind' of tags are considered. + + suffix: Define the 'suffix' property of the tags which should be + removed. Takes a string or list of strings where each + string is a regular expression. A tag is considered to be a + candidate for removal if its 'suffix' property fully + matches any of the given regular expressions. Note that by + default tags with any suffix value are considered including + those which don't have a suffix at all. + + name: Define the 'name' property corresponding to the 'kind' property + of the tag. Takes a string or list of strings where each string + is a regular expression. A tag is considered to be a candidate + for removal if its name fully matches any of the given regular + expressions. Note that by default tags with any 'name' are + considered. + + country_code: Define the country code of places whose tags should be + considered for removed. Takes a string or list of strings + where each string is a two-letter lower-case country code. + Note that by default tags of places with any country code + are considered including those which don't have a country + code at all. + + rank_address: Define the address rank of places whose tags should be + considered for removal. Takes a string or list of strings + where each string is a number or range of number or the + form -. + Note that default is '0-30', which means that tags of all + places are considered. + See https://nominatim.org/release-docs/latest/customize/Ranking/#address-rank + to learn more about address rank. + + +""" +from typing import Callable, List, Optional, Pattern, Tuple, Sequence +import re + +from nominatim.tokenizer.sanitizers.base import ProcessInfo +from nominatim.data.place_name import PlaceName +from nominatim.tokenizer.sanitizers.config import SanitizerConfig + +class _TagSanitizer: + + def __init__(self, config: SanitizerConfig) -> None: + self.type = config.get('type', 'name') + self.filter_kind = config.get_filter_kind() + self.country_codes = config.get_string_list('country_code', []) + self.allowed_ranks = self._set_allowed_ranks( \ + config.get_string_list('rank_address', ['0-30'])) + + self.has_country_code = config.get('country_code', None) is not None + + suffixregexps = config.get_string_list('suffix', [r'[\s\S]*']) + self.suffix_regexp = [re.compile(r) for r in suffixregexps] + + nameregexps = config.get_string_list('name', [r'[\s\S]*']) + self.name_regexp = [re.compile(r) for r in nameregexps] + + + + def __call__(self, obj: ProcessInfo) -> None: + tags = obj.names if self.type == 'name' else obj.address + + if (not tags or + self.has_country_code and + obj.place.country_code not in self.country_codes or + not self.allowed_ranks[obj.place.rank_address]): + return + + filtered_tags: List[PlaceName] = [] + + for tag in tags: + + if (not self.filter_kind(tag.kind) or + not self._matches(tag.suffix, self.suffix_regexp) or + not self._matches(tag.name, self.name_regexp)): + filtered_tags.append(tag) + + + if self.type == 'name': + obj.names = filtered_tags + else: + obj.address = filtered_tags + + + def _set_allowed_ranks(self, ranks: Sequence[str]) -> Tuple[bool, ...]: + """ Returns a tuple of 31 boolean values corresponding to the + address ranks 0-30. Value at index 'i' is True if rank 'i' + is present in the ranks or lies in the range of any of the + ranks provided in the sanitizer configuration, otherwise + the value is False. + """ + allowed_ranks = [False] * 31 + + for rank in ranks: + intvl = [int(x) for x in rank.split('-')] + + start, end = (intvl[0], intvl[0]) if len(intvl) == 1 else (intvl[0], intvl[1]) + + for i in range(start, end + 1): + allowed_ranks[i] = True + + + return tuple(allowed_ranks) + + + def _matches(self, value: Optional[str], patterns: List[Pattern[str]]) -> bool: + """ Returns True if the given value fully matches any of the regular + expression pattern in the list. Otherwise, returns False. + + Note that if the value is None, it is taken as an empty string. + """ + target = '' if value is None else value + return any(r.fullmatch(target) is not None for r in patterns) + + + +def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]: + """ Create a function to process removal of certain tags. + """ + + return _TagSanitizer(config) diff --git a/nominatim/tokenizer/sanitizers/tag_analyzer_by_language.py b/nominatim/tokenizer/sanitizers/tag_analyzer_by_language.py index 6d6430f0..032b69a8 100644 --- a/nominatim/tokenizer/sanitizers/tag_analyzer_by_language.py +++ b/nominatim/tokenizer/sanitizers/tag_analyzer_by_language.py @@ -12,7 +12,7 @@ If a name already has an analyzer tagged, then this is kept. Arguments: filter-kind: Restrict the names the sanitizer should be applied to - to the given tags. The parameter expects a list of + the given tags. The parameter expects a list of regular expressions which are matched against 'kind'. Note that a match against the full string is expected. whitelist: Restrict the set of languages that should be tagged. diff --git a/test/bdd/api/details/params.feature b/test/bdd/api/details/params.feature index 3bb5bf7c..3d5635de 100644 --- a/test/bdd/api/details/params.feature +++ b/test/bdd/api/details/params.feature @@ -7,9 +7,9 @@ Feature: Object details Then the result is valid json And result has attributes geometry And result has not attributes keywords,address,linked_places,parentof - And results contain - | geometry+type | - | Point | + And results contain in field geometry + | type | + | Point | Scenario: JSON Details with pretty printing When sending json details query for W297699560 @@ -75,9 +75,9 @@ Feature: Object details | 1 | Then the result is valid json And result has attributes geometry - And results contain - | geometry+type | - | | + And results contain in field geometry + | type | + | | Examples: | osmid | geometry | diff --git a/test/bdd/api/details/simple.feature b/test/bdd/api/details/simple.feature index 58e5e59e..eed95a73 100644 --- a/test/bdd/api/details/simple.feature +++ b/test/bdd/api/details/simple.feature @@ -103,3 +103,16 @@ Feature: Object details | category | type | admin_level | | place | postcode | 15 | And result has not attributes osm_type,osm_id + + + @v1-api-python-only + Scenario Outline: Details debug output returns no errors + When sending debug details query for + Then the result is valid html + + Examples: + | feature | + | N5484325405 | + | W1 | + | 112820 | + | 112871 | diff --git a/test/bdd/api/reverse/geocodejson.feature b/test/bdd/api/reverse/geocodejson.feature deleted file mode 100644 index 3d6a9b10..00000000 --- a/test/bdd/api/reverse/geocodejson.feature +++ /dev/null @@ -1,29 +0,0 @@ -@APIDB -Feature: Parameters for Reverse API - Testing correctness of geocodejson output. - - Scenario: City housenumber-level address with street - When sending geocodejson reverse coordinates 47.1068011,9.52810091 - Then results contain - | housenumber | street | postcode | city | country | - | 8 | Im Winkel | 9495 | Triesen | Liechtenstein | - - Scenario: Town street-level address with street - When sending geocodejson reverse coordinates 47.066,9.504 - | zoom | - | 16 | - Then results contain - | name | city | postcode | country | - | Gnetsch | Balzers | 9496 | Liechtenstein | - - Scenario: Poi street-level address with footway - When sending geocodejson reverse coordinates 47.0653,9.5007 - Then results contain - | street | city | postcode | country | - | Burgweg | Balzers | 9496 | Liechtenstein | - - Scenario: City address with suburb - When sending geocodejson reverse coordinates 47.146861,9.511771 - Then results contain - | housenumber | street | district | city | postcode | country | - | 5 | Lochgass | Ebenholz | Vaduz | 9490 | Liechtenstein | diff --git a/test/bdd/api/reverse/geometry.feature b/test/bdd/api/reverse/geometry.feature new file mode 100644 index 00000000..2c14dd5f --- /dev/null +++ b/test/bdd/api/reverse/geometry.feature @@ -0,0 +1,44 @@ +@APIDB +Feature: Geometries for reverse geocoding + Tests for returning geometries with reverse + + + Scenario: Polygons are returned fully by default + When sending v1/reverse at 47.13803,9.52264 + | polygon_text | + | 1 | + Then results contain + | geotext | + | POLYGON((9.5225302 47.138066,9.5225348 47.1379282,9.5226142 47.1379294,9.5226143 47.1379257,9.522615 47.137917,9.5226225 47.1379098,9.5226334 47.1379052,9.5226461 47.1379037,9.5226588 47.1379056,9.5226693 47.1379107,9.5226762 47.1379181,9.5226762 47.1379268,9.5226761 47.1379308,9.5227366 47.1379317,9.5227352 47.1379753,9.5227608 47.1379757,9.5227595 47.1380148,9.5227355 47.1380145,9.5227337 47.1380692,9.5225302 47.138066)) | + + + Scenario: Polygons can be slightly simplified + When sending v1/reverse at 47.13803,9.52264 + | polygon_text | polygon_threshold | + | 1 | 0.00001 | + Then results contain + | geotext | + | POLYGON((9.5225302 47.138066,9.5225348 47.1379282,9.5226142 47.1379294,9.5226225 47.1379098,9.5226588 47.1379056,9.5226761 47.1379308,9.5227366 47.1379317,9.5227352 47.1379753,9.5227608 47.1379757,9.5227595 47.1380148,9.5227355 47.1380145,9.5227337 47.1380692,9.5225302 47.138066)) | + + + Scenario: Polygons can be much simplified + When sending v1/reverse at 47.13803,9.52264 + | polygon_text | polygon_threshold | + | 1 | 0.9 | + Then results contain + | geotext | + | POLYGON((9.5225302 47.138066,9.5225348 47.1379282,9.5227608 47.1379757,9.5227337 47.1380692,9.5225302 47.138066)) | + + + Scenario: For polygons return the centroid as center point + When sending v1/reverse at 47.13836,9.52304 + Then results contain + | centroid | + | 9.52271080 47.13818045 | + + + Scenario: For streets return the closest point as center point + When sending v1/reverse at 47.13368,9.52942 + Then results contain + | centroid | + | 9.529431527 47.13368172 | diff --git a/test/bdd/api/reverse/language.feature b/test/bdd/api/reverse/language.feature index 43d1f11b..e42689f7 100644 --- a/test/bdd/api/reverse/language.feature +++ b/test/bdd/api/reverse/language.feature @@ -2,13 +2,13 @@ Feature: Localization of reverse search results Scenario: default language - When sending json reverse coordinates 47.14,9.55 + When sending v1/reverse at 47.14,9.55 Then result addresses contain | ID | country | | 0 | Liechtenstein | Scenario: accept-language parameter - When sending json reverse coordinates 47.14,9.55 + When sending v1/reverse at 47.14,9.55 | accept-language | | ja,en | Then result addresses contain @@ -19,7 +19,7 @@ Feature: Localization of reverse search results Given the HTTP header | accept-language | | fo-ca,fo;q=0.8,en-ca;q=0.5,en;q=0.3 | - When sending json reverse coordinates 47.14,9.55 + When sending v1/reverse at 47.14,9.55 Then result addresses contain | ID | country | | 0 | Liktinstein | @@ -28,7 +28,7 @@ Feature: Localization of reverse search results Given the HTTP header | accept-language | | fo-ca,fo;q=0.8,en-ca;q=0.5,en;q=0.3 | - When sending json reverse coordinates 47.14,9.55 + When sending v1/reverse at 47.14,9.55 | accept-language | | en | Then result addresses contain diff --git a/test/bdd/api/reverse/params.feature b/test/bdd/api/reverse/params.feature deleted file mode 100644 index d6ef3794..00000000 --- a/test/bdd/api/reverse/params.feature +++ /dev/null @@ -1,147 +0,0 @@ -@APIDB -Feature: Parameters for Reverse API - Testing different parameter options for reverse API. - - Scenario Outline: Reverse-geocoding without address - When sending reverse coordinates 47.13,9.56 - | addressdetails | - | 0 | - Then exactly 1 result is returned - And result has not attributes address - - Examples: - | format | - | json | - | jsonv2 | - | geojson | - | xml | - - Scenario Outline: Coordinates must be floating-point numbers - When sending reverse coordinates - Then a HTTP 400 is returned - - Examples: - | coords | - | -45.3,; | - | gkjd,50 | - - Scenario Outline: Zoom levels between 4 and 18 are allowed - When sending reverse coordinates 47.14122383,9.52169581334 - | zoom | - | | - Then exactly 1 result is returned - And result addresses contain - | country_code | - | li | - - Examples: - | zoom | - | 4 | - | 5 | - | 6 | - | 7 | - | 8 | - | 9 | - | 10 | - | 11 | - | 12 | - | 13 | - | 14 | - | 15 | - | 16 | - | 17 | - | 18 | - - Scenario: Non-numerical zoom levels return an error - When sending reverse coordinates 47.14122383,9.52169581334 - | zoom | - | adfe | - Then a HTTP 400 is returned - - Scenario Outline: Reverse Geocoding with extratags - When sending reverse coordinates 47.1395013150811,9.522098077031046 - | extratags | - | 1 | - Then result 0 has attributes extratags - - Examples: - | format | - | xml | - | json | - | jsonv2 | - | geojson | - - Scenario Outline: Reverse Geocoding with namedetails - When sending reverse coordinates 47.1395013150811,9.522098077031046 - | namedetails | - | 1 | - Then result 0 has attributes namedetails - - Examples: - | format | - | xml | - | json | - | jsonv2 | - | geojson | - - Scenario Outline: Reverse Geocoding contains TEXT geometry - When sending reverse coordinates 47.165989816710066,9.515774846076965 - | polygon_text | - | 1 | - Then result 0 has attributes - - Examples: - | format | response_attribute | - | xml | geotext | - | json | geotext | - | jsonv2 | geotext | - - Scenario Outline: Reverse Geocoding contains SVG geometry - When sending reverse coordinates 47.165989816710066,9.515774846076965 - | polygon_svg | - | 1 | - Then result 0 has attributes - - Examples: - | format | response_attribute | - | xml | geosvg | - | json | svg | - | jsonv2 | svg | - - Scenario Outline: Reverse Geocoding contains KML geometry - When sending reverse coordinates 47.165989816710066,9.515774846076965 - | polygon_kml | - | 1 | - Then result 0 has attributes - - Examples: - | format | response_attribute | - | xml | geokml | - | json | geokml | - | jsonv2 | geokml | - - Scenario Outline: Reverse Geocoding contains GEOJSON geometry - When sending reverse coordinates 47.165989816710066,9.515774846076965 - | polygon_geojson | - | 1 | - Then result 0 has attributes - - Examples: - | format | response_attribute | - | xml | geojson | - | json | geojson | - | jsonv2 | geojson | - | geojson | geojson | - - Scenario Outline: Reverse Geocoding in geojson format contains no non-geojson geometry - When sending geojson reverse coordinates 47.165989816710066,9.515774846076965 - | polygon_text | polygon_svg | polygon_geokml | - | 1 | 1 | 1 | - Then result 0 has not attributes - - Examples: - | response_attribute | - | geotext | - | polygonpoints | - | svg | - | geokml | diff --git a/test/bdd/api/reverse/queries.feature b/test/bdd/api/reverse/queries.feature index a2b0f033..d51378d6 100644 --- a/test/bdd/api/reverse/queries.feature +++ b/test/bdd/api/reverse/queries.feature @@ -2,19 +2,35 @@ Feature: Reverse geocoding Testing the reverse function + Scenario Outline: Simple reverse-geocoding with no results + When sending v1/reverse at , + Then exactly 0 results are returned + + Examples: + | lat | lon | + | 0.0 | 0.0 | + | -34.830 | -56.105 | + | 45.174 | -103.072 | + | 21.156 | -12.2744 | + | 91.3 | 0.4 | + | -700 | 0.4 | + | 0.2 | 324.44 | + | 0.2 | -180.4 | + + @Tiger Scenario: TIGER house number - When sending jsonv2 reverse coordinates 32.4752389363,-86.4810198619 + When sending v1/reverse at 32.4752389363,-86.4810198619 Then results contain - | osm_type | category | type | - | way | place | house | + | category | type | + | place | house | And result addresses contain | house_number | road | postcode | country_code | | 707 | Upper Kingston Road | 36067 | us | @Tiger Scenario: No TIGER house number for zoom < 18 - When sending jsonv2 reverse coordinates 32.4752389363,-86.4810198619 + When sending v1/reverse at 32.4752389363,-86.4810198619 | zoom | | 17 | Then results contain @@ -25,7 +41,7 @@ Feature: Reverse geocoding | Upper Kingston Road | 30607 | us | Scenario: Interpolated house number - When sending jsonv2 reverse coordinates 47.118533,9.57056562 + When sending v1/reverse at 47.118533,9.57056562 Then results contain | osm_type | category | type | | way | place | house | @@ -34,20 +50,20 @@ Feature: Reverse geocoding | 1019 | Grosssteg | Scenario: Address with non-numerical house number - When sending jsonv2 reverse coordinates 47.107465,9.52838521614 + When sending v1/reverse at 47.107465,9.52838521614 Then result addresses contain | house_number | road | | 39A/B | Dorfstrasse | Scenario: Address with numerical house number - When sending jsonv2 reverse coordinates 47.168440329479594,9.511551699184338 + When sending v1/reverse at 47.168440329479594,9.511551699184338 Then result addresses contain | house_number | road | | 6 | Schmedgässle | Scenario Outline: Zoom levels below 5 result in country - When sending jsonv2 reverse coordinates 47.16,9.51 + When sending v1/reverse at 47.16,9.51 | zoom | | | Then results contain @@ -63,7 +79,7 @@ Feature: Reverse geocoding | 4 | Scenario: When on a street, the closest interpolation is shown - When sending jsonv2 reverse coordinates 47.118457166193245,9.570678289621355 + When sending v1/reverse at 47.118457166193245,9.570678289621355 | zoom | | 18 | Then results contain @@ -72,7 +88,7 @@ Feature: Reverse geocoding # github 2214 Scenario: Interpolations do not override house numbers when they are closer - When sending jsonv2 reverse coordinates 47.11778,9.57255 + When sending v1/reverse at 47.11778,9.57255 | zoom | | 18 | Then results contain @@ -80,7 +96,7 @@ Feature: Reverse geocoding | 5, Grosssteg, Steg, Triesenberg, Oberland, 9497, Liechtenstein | Scenario: Interpolations do not override house numbers when they are closer (2) - When sending jsonv2 reverse coordinates 47.11834,9.57167 + When sending v1/reverse at 47.11834,9.57167 | zoom | | 18 | Then results contain @@ -88,7 +104,7 @@ Feature: Reverse geocoding | 3, Grosssteg, Sücka, Triesenberg, Oberland, 9497, Liechtenstein | Scenario: When on a street with zoom 18, the closest housenumber is returned - When sending jsonv2 reverse coordinates 47.11755503977281,9.572722250405036 + When sending v1/reverse at 47.11755503977281,9.572722250405036 | zoom | | 18 | Then result addresses contain diff --git a/test/bdd/api/reverse/simple.feature b/test/bdd/api/reverse/simple.feature deleted file mode 100644 index 4da311e7..00000000 --- a/test/bdd/api/reverse/simple.feature +++ /dev/null @@ -1,137 +0,0 @@ -@APIDB -Feature: Simple Reverse Tests - Simple tests for internal server errors and response format. - - Scenario Outline: Simple reverse-geocoding - When sending reverse coordinates , - Then the result is valid xml - When sending xml reverse coordinates , - Then the result is valid xml - When sending json reverse coordinates , - Then the result is valid json - When sending jsonv2 reverse coordinates , - Then the result is valid json - When sending geojson reverse coordinates , - Then the result is valid geojson - - Examples: - | lat | lon | - | 0.0 | 0.0 | - | -34.830 | -56.105 | - | 45.174 | -103.072 | - | 21.156 | -12.2744 | - - Scenario Outline: Testing different parameters - When sending reverse coordinates 53.603,10.041 - | param | value | - | | | - Then the result is valid xml - When sending xml reverse coordinates 53.603,10.041 - | param | value | - | | | - Then the result is valid xml - When sending json reverse coordinates 53.603,10.041 - | param | value | - | | | - Then the result is valid json - When sending jsonv2 reverse coordinates 53.603,10.041 - | param | value | - | | | - Then the result is valid json - When sending geojson reverse coordinates 53.603,10.041 - | param | value | - | | | - Then the result is valid geojson - When sending geocodejson reverse coordinates 53.603,10.041 - | param | value | - | | | - Then the result is valid geocodejson - - Examples: - | parameter | value | - | polygon_text | 1 | - | polygon_text | 0 | - | polygon_kml | 1 | - | polygon_kml | 0 | - | polygon_geojson | 1 | - | polygon_geojson | 0 | - | polygon_svg | 1 | - | polygon_svg | 0 | - - Scenario Outline: Wrapping of legal jsonp requests - When sending reverse coordinates 67.3245,0.456 - | json_callback | - | foo | - Then the result is valid - - Examples: - | format | outformat | - | json | json | - | jsonv2 | json | - | geojson | geojson | - - Scenario Outline: Boundingbox is returned - When sending reverse coordinates 47.11,9.57 - | zoom | - | 8 | - Then result has bounding box in 47,48,9,10 - - Examples: - | format | - | json | - | jsonv2 | - | geojson | - | xml | - - Scenario Outline: Reverse-geocoding with zoom - When sending reverse coordinates 47.11,9.57 - | zoom | - | 10 | - Then exactly 1 result is returned - - Examples: - | format | - | json | - | jsonv2 | - | geojson | - | xml | - - Scenario: Missing lon parameter - When sending reverse coordinates 52.52, - Then a HTTP 400 is returned - - Scenario: Missing lat parameter - When sending reverse coordinates ,52.52 - Then a HTTP 400 is returned - - Scenario: Missing osm_id parameter - When sending reverse coordinates , - | osm_type | - | N | - Then a HTTP 400 is returned - - Scenario: Missing osm_type parameter - When sending reverse coordinates , - | osm_id | - | 3498564 | - Then a HTTP 400 is returned - - Scenario Outline: Bad format for lat or lon - When sending reverse coordinates , - | lat | lon | - | | | - Then a HTTP 400 is returned - - Examples: - | lat | lon | - | 48.9660 | 8,4482 | - | 48,9660 | 8.4482 | - | 48,9660 | 8,4482 | - | 48.966.0 | 8.4482 | - | 48.966 | 8.448.2 | - | Nan | 8.448 | - | 48.966 | Nan | - - Scenario: Reverse Debug output returns no errors - When sending debug reverse coordinates 47.11,9.57 - Then a HTTP 200 is returned diff --git a/test/bdd/api/reverse/v1_geocodejson.feature b/test/bdd/api/reverse/v1_geocodejson.feature new file mode 100644 index 00000000..b7ea0354 --- /dev/null +++ b/test/bdd/api/reverse/v1_geocodejson.feature @@ -0,0 +1,106 @@ +@APIDB +Feature: Geocodejson for Reverse API + Testing correctness of geocodejson output (API version v1). + + Scenario Outline: Simple OSM result + When sending v1/reverse at 47.066,9.504 with format geocodejson + | addressdetails | + | | + Then result has attributes place_id, accuracy + And result has country,postcode,county,city,district,street,housenumber, admin + Then results contain + | osm_type | osm_id | osm_key | osm_value | type | + | node | 6522627624 | shop | bakery | house | + And results contain + | name | label | + | Dorfbäckerei Herrmann | Dorfbäckerei Herrmann, 29, Gnetsch, Mäls, Balzers, Oberland, 9496, Liechtenstein | + And results contain in field geojson + | type | coordinates | + | Point | [9.5036065, 47.0660892] | + And results contain in field __geocoding + | version | licence | attribution | + | 0.1.0 | ODbL | Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright | + + Examples: + | has_address | attributes | + | 1 | attributes | + | 0 | not attributes | + + + Scenario: City housenumber-level address with street + When sending v1/reverse at 47.1068011,9.52810091 with format geocodejson + Then results contain + | housenumber | street | postcode | city | country | + | 8 | Im Winkel | 9495 | Triesen | Liechtenstein | + And results contain in field admin + | level6 | level8 | + | Oberland | Triesen | + + + Scenario: Town street-level address with street + When sending v1/reverse at 47.066,9.504 with format geocodejson + | zoom | + | 16 | + Then results contain + | name | city | postcode | country | + | Gnetsch | Balzers | 9496 | Liechtenstein | + + + Scenario: Poi street-level address with footway + When sending v1/reverse at 47.06515,9.50083 with format geocodejson + Then results contain + | street | city | postcode | country | + | Burgweg | Balzers | 9496 | Liechtenstein | + + + Scenario: City address with suburb + When sending v1/reverse at 47.146861,9.511771 with format geocodejson + Then results contain + | housenumber | street | district | city | postcode | country | + | 5 | Lochgass | Ebenholz | Vaduz | 9490 | Liechtenstein | + + + @Tiger + Scenario: Tiger address + When sending v1/reverse at 32.4752389363,-86.4810198619 with format geocodejson + Then results contain + | osm_type | osm_id | osm_key | osm_value | type | + | way | 396009653 | place | house | house | + And results contain + | housenumber | street | city | county | postcode | country | + | 707 | Upper Kingston Road | Prattville | Autauga County | 36067 | United States | + + + Scenario: Interpolation address + When sending v1/reverse at 47.118533,9.57056562 with format geocodejson + Then results contain + | osm_type | osm_id | osm_key | osm_value | type | + | way | 1 | place | house | house | + And results contain + | label | + | 1019, Grosssteg, Sücka, Triesenberg, Oberland, 9497, Liechtenstein | + And result has not attributes name + + + Scenario: Line geometry output is supported + When sending v1/reverse at 47.06597,9.50467 with format geocodejson + | param | value | + | polygon_geojson | 1 | + Then results contain in field geojson + | type | + | LineString | + + + Scenario Outline: Only geojson polygons are supported + When sending v1/reverse at 47.06597,9.50467 with format geocodejson + | param | value | + | | 1 | + Then results contain in field geojson + | type | + | Point | + + Examples: + | param | + | polygon_text | + | polygon_svg | + | polygon_kml | diff --git a/test/bdd/api/reverse/v1_geojson.feature b/test/bdd/api/reverse/v1_geojson.feature new file mode 100644 index 00000000..8acf067f --- /dev/null +++ b/test/bdd/api/reverse/v1_geojson.feature @@ -0,0 +1,72 @@ +@APIDB +Feature: Geojson for Reverse API + Testing correctness of geojson output (API version v1). + + Scenario Outline: Simple OSM result + When sending v1/reverse at 47.066,9.504 with format geojson + | addressdetails | + | | + Then result has attributes place_id, importance, __licence + And result has address + And results contain + | osm_type | osm_id | place_rank | category | type | addresstype | + | node | 6522627624 | 30 | shop | bakery | shop | + And results contain + | name | display_name | + | Dorfbäckerei Herrmann | Dorfbäckerei Herrmann, 29, Gnetsch, Mäls, Balzers, Oberland, 9496, Liechtenstein | + And results contain + | boundingbox | + | [47.0660392, 47.0661392, 9.5035565, 9.5036565] | + And results contain in field geojson + | type | coordinates | + | Point | [9.5036065, 47.0660892] | + + Examples: + | has_address | attributes | + | 1 | attributes | + | 0 | not attributes | + + + @Tiger + Scenario: Tiger address + When sending v1/reverse at 32.4752389363,-86.4810198619 with format geojson + Then results contain + | osm_type | osm_id | category | type | addresstype | place_rank | + | way | 396009653 | place | house | place | 30 | + + + Scenario: Interpolation address + When sending v1/reverse at 47.118533,9.57056562 with format geojson + Then results contain + | osm_type | osm_id | place_rank | category | type | addresstype | + | way | 1 | 30 | place | house | place | + And results contain + | boundingbox | + | [47.118495392, 47.118595392, 9.57049676, 9.57059676] | + And results contain + | display_name | + | 1019, Grosssteg, Sücka, Triesenberg, Oberland, 9497, Liechtenstein | + + + Scenario: Line geometry output is supported + When sending v1/reverse at 47.06597,9.50467 with format geojson + | param | value | + | polygon_geojson | 1 | + Then results contain in field geojson + | type | + | LineString | + + + Scenario Outline: Only geojson polygons are supported + When sending v1/reverse at 47.06597,9.50467 with format geojson + | param | value | + | | 1 | + Then results contain in field geojson + | type | + | Point | + + Examples: + | param | + | polygon_text | + | polygon_svg | + | polygon_kml | diff --git a/test/bdd/api/reverse/v1_json.feature b/test/bdd/api/reverse/v1_json.feature new file mode 100644 index 00000000..155e02b0 --- /dev/null +++ b/test/bdd/api/reverse/v1_json.feature @@ -0,0 +1,129 @@ +@APIDB +Feature: Json output for Reverse API + Testing correctness of json and jsonv2 output (API version v1). + + Scenario Outline: OSM result with and without addresses + When sending v1/reverse at 47.066,9.504 with format json + | addressdetails | + | | + Then result has address + When sending v1/reverse at 47.066,9.504 with format jsonv2 + | addressdetails | + | | + Then result has address + + Examples: + | has_address | attributes | + | 1 | attributes | + | 0 | not attributes | + + Scenario Outline: Siple OSM result + When sending v1/reverse at 47.066,9.504 with format + Then result has attributes place_id + And results contain + | licence | + | Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright | + And results contain + | osm_type | osm_id | + | node | 6522627624 | + And results contain + | centroid | boundingbox | + | 9.5036065 47.0660892 | ['47.0660392', '47.0661392', '9.5035565', '9.5036565'] | + And results contain + | display_name | + | Dorfbäckerei Herrmann, 29, Gnetsch, Mäls, Balzers, Oberland, 9496, Liechtenstein | + And result has not attributes namedetails,extratags + + Examples: + | format | + | json | + | jsonv2 | + + Scenario: Extra attributes of jsonv2 result + When sending v1/reverse at 47.066,9.504 with format jsonv2 + Then result has attributes importance + Then results contain + | category | type | name | place_rank | addresstype | + | shop | bakery | Dorfbäckerei Herrmann | 30 | shop | + + + @Tiger + Scenario: Tiger address + When sending v1/reverse at 32.4752389363,-86.4810198619 with format jsonv2 + Then results contain + | osm_type | osm_id | category | type | addresstype | + | way | 396009653 | place | house | place | + + + Scenario Outline: Interpolation address + When sending v1/reverse at 47.118533,9.57056562 with format + Then results contain + | osm_type | osm_id | + | way | 1 | + And results contain + | centroid | boundingbox | + | 9.57054676 47.118545392 | ['47.118495392', '47.118595392', '9.57049676', '9.57059676'] | + And results contain + | display_name | + | 1019, Grosssteg, Sücka, Triesenberg, Oberland, 9497, Liechtenstein | + + Examples: + | format | + | json | + | jsonv2 | + + + Scenario Outline: Output of geojson + When sending v1/reverse at 47.06597,9.50467 with format + | param | value | + | polygon_geojson | 1 | + Then results contain in field geojson + | type | coordinates | + | LineString | [[9.5039353, 47.0657546], [9.5040437, 47.0657781], [9.5040808, 47.065787], [9.5054298, 47.0661407]] | + + Examples: + | format | + | json | + | jsonv2 | + + + Scenario Outline: Output of WKT + When sending v1/reverse at 47.06597,9.50467 with format + | param | value | + | polygon_text | 1 | + Then results contain + | geotext | + | LINESTRING(9.5039353 47.0657546,9.5040437 47.0657781,9.5040808 47.065787,9.5054298 47.0661407) | + + Examples: + | format | + | json | + | jsonv2 | + + + Scenario Outline: Output of SVG + When sending v1/reverse at 47.06597,9.50467 with format + | param | value | + | polygon_svg | 1 | + Then results contain + | svg | + | M 9.5039353 -47.0657546 L 9.5040437 -47.0657781 9.5040808 -47.065787 9.5054298 -47.0661407 | + + Examples: + | format | + | json | + | jsonv2 | + + + Scenario Outline: Output of KML + When sending v1/reverse at 47.06597,9.50467 with format + | param | value | + | polygon_kml | 1 | + Then results contain + | geokml | + | ^9.5039\d*,47.0657\d* 9.5040\d*,47.0657\d* 9.5040\d*,47.065\d* 9.5054\d*,47.0661\d* | + + Examples: + | format | + | json | + | jsonv2 | diff --git a/test/bdd/api/reverse/v1_params.feature b/test/bdd/api/reverse/v1_params.feature new file mode 100644 index 00000000..70a6505b --- /dev/null +++ b/test/bdd/api/reverse/v1_params.feature @@ -0,0 +1,220 @@ +@APIDB +Feature: v1/reverse Parameter Tests + Tests for parameter inputs for the v1 reverse endpoint. + This file contains mostly bad parameter input. Valid parameters + are tested in the format tests. + + Scenario: Bad format + When sending v1/reverse at 47.14122383,9.52169581334 with format sdf + Then a HTTP 400 is returned + + Scenario: Missing lon parameter + When sending v1/reverse at 52.52, + Then a HTTP 400 is returned + + + Scenario: Missing lat parameter + When sending v1/reverse at ,52.52 + Then a HTTP 400 is returned + + @v1-api-php-only + Scenario: Missing osm_id parameter + When sending v1/reverse at , + | osm_type | + | N | + Then a HTTP 400 is returned + + @v1-api-php-only + Scenario: Missing osm_type parameter + When sending v1/reverse at , + | osm_id | + | 3498564 | + Then a HTTP 400 is returned + + + Scenario Outline: Bad format for lat or lon + When sending v1/reverse at , + | lat | lon | + | | | + Then a HTTP 400 is returned + + Examples: + | lat | lon | + | 48.9660 | 8,4482 | + | 48,9660 | 8.4482 | + | 48,9660 | 8,4482 | + | 48.966.0 | 8.4482 | + | 48.966 | 8.448.2 | + | Nan | 8.448 | + | 48.966 | Nan | + | Inf | 5.6 | + | 5.6 | -Inf | + | | 3.4 | + | 3.4 | | + | -45.3 | ; | + | gkjd | 50 | + + + Scenario: Non-numerical zoom levels return an error + When sending v1/reverse at 47.14122383,9.52169581334 + | zoom | + | adfe | + Then a HTTP 400 is returned + + + Scenario Outline: Truthy values for boolean parameters + When sending v1/reverse at 47.14122383,9.52169581334 + | addressdetails | + | | + Then exactly 1 result is returned + And result has attributes address + + When sending v1/reverse at 47.14122383,9.52169581334 + | extratags | + | | + Then exactly 1 result is returned + And result has attributes extratags + + When sending v1/reverse at 47.14122383,9.52169581334 + | namedetails | + | | + Then exactly 1 result is returned + And result has attributes namedetails + + When sending v1/reverse at 47.14122383,9.52169581334 + | polygon_geojson | + | | + Then exactly 1 result is returned + And result has attributes geojson + + When sending v1/reverse at 47.14122383,9.52169581334 + | polygon_kml | + | | + Then exactly 1 result is returned + And result has attributes geokml + + When sending v1/reverse at 47.14122383,9.52169581334 + | polygon_svg | + | | + Then exactly 1 result is returned + And result has attributes svg + + When sending v1/reverse at 47.14122383,9.52169581334 + | polygon_text | + | | + Then exactly 1 result is returned + And result has attributes geotext + + Examples: + | value | + | yes | + | no | + | -1 | + | 100 | + | false | + | 00 | + + + Scenario: Only one geometry can be requested + When sending v1/reverse at 47.165989816710066,9.515774846076965 + | polygon_text | polygon_svg | + | 1 | 1 | + Then a HTTP 400 is returned + + + Scenario Outline: Wrapping of legal jsonp requests + When sending v1/reverse at 67.3245,0.456 with format + | json_callback | + | foo | + Then the result is valid + + Examples: + | format | outformat | + | json | json | + | jsonv2 | json | + | geojson | geojson | + | geocodejson | geocodejson | + + + Scenario Outline: Illegal jsonp are not allowed + When sending v1/reverse at 47.165989816710066,9.515774846076965 + | param | value | + |json_callback | | + Then a HTTP 400 is returned + + Examples: + | data | + | 1asd | + | bar(foo) | + | XXX['bad'] | + | foo; evil | + + + @v1-api-python-only + Scenario Outline: Reverse debug mode produces valid HTML + When sending v1/reverse at , with format debug + | lat | lon | + | | | + Then the result is valid html + + Examples: + | lat | lon | + | 0.0 | 0.0 | + | 47.06645 | 9.56601 | + | 47.14081 | 9.52267 | + + + Scenario Outline: Full address display for city housenumber-level address with street + When sending v1/reverse at 47.1068011,9.52810091 with format + Then address of result 0 is + | type | value | + | house_number | 8 | + | road | Im Winkel | + | neighbourhood | Oberdorf | + | village | Triesen | + | ISO3166-2-lvl8 | LI-09 | + | county | Oberland | + | postcode | 9495 | + | country | Liechtenstein | + | country_code | li | + + Examples: + | format | + | json | + | jsonv2 | + | geojson | + | xml | + + + Scenario Outline: Results with name details + When sending v1/reverse at 47.14052,9.52202 with format + | zoom | namedetails | + | 14 | 1 | + Then results contain in field namedetails + | name | + | Ebenholz | + + Examples: + | format | + | json | + | jsonv2 | + | xml | + | geojson | + + + Scenario Outline: Results with extratags + When sending v1/reverse at 47.14052,9.52202 with format + | zoom | extratags | + | 14 | 1 | + Then results contain in field extratags + | wikidata | + | Q4529531 | + + Examples: + | format | + | json | + | jsonv2 | + | xml | + | geojson | + + diff --git a/test/bdd/api/reverse/v1_xml.feature b/test/bdd/api/reverse/v1_xml.feature new file mode 100644 index 00000000..fd6e1501 --- /dev/null +++ b/test/bdd/api/reverse/v1_xml.feature @@ -0,0 +1,87 @@ +@APIDB +Feature: XML output for Reverse API + Testing correctness of xml output (API version v1). + + Scenario Outline: OSM result with and without addresses + When sending v1/reverse at 47.066,9.504 with format xml + | addressdetails | + | | + Then result has attributes place_id + Then result has address + And results contain + | osm_type | osm_id | place_rank | address_rank | + | node | 6522627624 | 30 | 30 | + And results contain + | centroid | boundingbox | + | 9.5036065 47.0660892 | 47.0660392,47.0661392,9.5035565,9.5036565 | + And results contain + | ref | display_name | + | Dorfbäckerei Herrmann | Dorfbäckerei Herrmann, 29, Gnetsch, Mäls, Balzers, Oberland, 9496, Liechtenstein | + + Examples: + | has_address | attributes | + | 1 | attributes | + | 0 | not attributes | + + + @Tiger + Scenario: Tiger address + When sending v1/reverse at 32.4752389363,-86.4810198619 with format xml + Then results contain + | osm_type | osm_id | place_rank | address_rank | + | way | 396009653 | 30 | 30 | + And results contain + | centroid | boundingbox | + | -86.4808553258 32.4753580256 | ^32.475308025\d*,32.475408025\d*,-86.480905325\d*,-86.480805325\d* | + And results contain + | display_name | + | 707, Upper Kingston Road, Upper Kingston, Prattville, Autauga County, 36067, United States | + + + Scenario: Interpolation address + When sending v1/reverse at 47.118533,9.57056562 with format xml + Then results contain + | osm_type | osm_id | place_rank | address_rank | + | way | 1 | 30 | 30 | + And results contain + | centroid | boundingbox | + | 9.57054676 47.118545392 | 47.118495392,47.118595392,9.57049676,9.57059676 | + And results contain + | display_name | + | 1019, Grosssteg, Sücka, Triesenberg, Oberland, 9497, Liechtenstein | + + + Scenario: Output of geojson + When sending v1/reverse at 47.06597,9.50467 with format xml + | param | value | + | polygon_geojson | 1 | + Then results contain + | geojson | + | {"type":"LineString","coordinates":[[9.5039353,47.0657546],[9.5040437,47.0657781],[9.5040808,47.065787],[9.5054298,47.0661407]]} | + + + Scenario: Output of WKT + When sending v1/reverse at 47.06597,9.50467 with format xml + | param | value | + | polygon_text | 1 | + Then results contain + | geotext | + | LINESTRING(9.5039353 47.0657546,9.5040437 47.0657781,9.5040808 47.065787,9.5054298 47.0661407) | + + + Scenario: Output of SVG + When sending v1/reverse at 47.06597,9.50467 with format xml + | param | value | + | polygon_svg | 1 | + Then results contain + | geosvg | + | M 9.5039353 -47.0657546 L 9.5040437 -47.0657781 9.5040808 -47.065787 9.5054298 -47.0661407 | + + + Scenario: Output of KML + When sending v1/reverse at 47.06597,9.50467 with format xml + | param | value | + | polygon_kml | 1 | + Then results contain + | geokml | + | ^9.5039\d*,47.0657\d* 9.5040\d*,47.0657\d* 9.5040\d*,47.065\d* 9.5054\d*,47.0661\d* | diff --git a/test/bdd/db/import/interpolation.feature b/test/bdd/db/import/interpolation.feature index 56ca4cc1..0b939f59 100644 --- a/test/bdd/db/import/interpolation.feature +++ b/test/bdd/db/import/interpolation.feature @@ -273,12 +273,12 @@ Feature: Import of address interpolations | W3 | 14 | 14 | When sending search query "16 Cloud Street" Then results contain - | ID | osm_type | osm_id | - | 0 | N | 4 | + | ID | osm | + | 0 | N4 | When sending search query "14 Cloud Street" Then results contain - | ID | osm_type | osm_id | - | 0 | W | 11 | + | ID | osm | + | 0 | W11 | Scenario: addr:street on housenumber way Given the grid @@ -318,12 +318,12 @@ Feature: Import of address interpolations | W3 | 14 | 14 | When sending search query "16 Cloud Street" Then results contain - | ID | osm_type | osm_id | - | 0 | N | 4 | + | ID | osm | + | 0 | N4 | When sending search query "14 Cloud Street" Then results contain - | ID | osm_type | osm_id | - | 0 | W | 11 | + | ID | osm | + | 0 | W11 | Scenario: Geometry of points and way don't match (github #253) Given the places @@ -399,10 +399,10 @@ Feature: Import of address interpolations Then W1 expands to interpolation | start | end | geometry | | 2 | 8 | 10,11 | - When sending jsonv2 reverse coordinates 1,1 + When sending v1/reverse at 1,1 Then results contain - | ID | osm_type | osm_id | type | display_name | - | 0 | node | 1 | house | 0 | + | ID | osm | type | display_name | + | 0 | N1 | house | 0 | Scenario: Parenting of interpolation with additional tags Given the grid diff --git a/test/bdd/db/import/linking.feature b/test/bdd/db/import/linking.feature index 0fb3f76d..dfa4b77c 100644 --- a/test/bdd/db/import/linking.feature +++ b/test/bdd/db/import/linking.feature @@ -55,8 +55,8 @@ Feature: Linking of places | R23 | - | When sending search query "rhein" Then results contain - | osm_type | - | R | + | osm | + | R13 | Scenario: Relations are not linked when in waterway relations Given the grid @@ -81,9 +81,9 @@ Feature: Linking of places | R2 | - | When sending search query "rhein" Then results contain - | ID | osm_type | - | 0 | R | - | 1 | W | + | ID | osm | + | 0 | R1 | + | 1 | W2 | Scenario: Empty waterway relations are handled correctly @@ -138,8 +138,8 @@ Feature: Linking of places | W2 | R1 | When sending search query "rhein2" Then results contain - | osm_type | - | W | + | osm | + | W1 | # github #573 Scenario: Boundaries should only be linked to places @@ -205,14 +205,14 @@ Feature: Linking of places | city | | Berlin | Then results contain - | ID | osm_type | osm_id | - | 0 | R | 13 | + | ID | osm | + | 0 | R13 | When sending search query "" | state | | Berlin | Then results contain - | ID | osm_type | osm_id | - | 0 | R | 13 | + | ID | osm | + | 0 | R13 | Scenario: Boundaries without place tags only link against same admin level @@ -237,14 +237,14 @@ Feature: Linking of places | state | | Berlin | Then results contain - | ID | osm_type | osm_id | - | 0 | R | 13 | + | ID | osm | + | 0 | R13 | When sending search query "" | city | | Berlin | Then results contain - | ID | osm_type | osm_id | - | 0 | N | 2 | + | ID | osm | + | 0 | N2 | # github #1352 Scenario: Do not use linked centroid when it is outside the area diff --git a/test/bdd/db/import/parenting.feature b/test/bdd/db/import/parenting.feature index 5de3fde6..c349b69f 100644 --- a/test/bdd/db/import/parenting.feature +++ b/test/bdd/db/import/parenting.feature @@ -23,12 +23,12 @@ Feature: Parenting of objects | N2 | W1 | When sending search query "4 galoo" Then results contain - | ID | osm_type | osm_id | display_name | - | 0 | N | 1 | 4, galoo, 12345, Deutschland | + | ID | osm | display_name | + | 0 | N1 | 4, galoo, 12345, Deutschland | When sending search query "5 galoo" Then results contain - | ID | osm_type | osm_id | display_name | - | 0 | N | 2 | 5, galoo, 99999, Deutschland | + | ID | osm | display_name | + | 0 | N2 | 5, galoo, 99999, Deutschland | Scenario: Address without tags, closest street Given the grid diff --git a/test/bdd/db/query/interpolation.feature b/test/bdd/db/query/interpolation.feature index 602ac434..600de718 100644 --- a/test/bdd/db/query/interpolation.feature +++ b/test/bdd/db/query/interpolation.feature @@ -23,7 +23,7 @@ Feature: Query of address interpolations | id | nodes | | 1 | 1,3 | When importing - When sending jsonv2 reverse point 2 + When sending v1/reverse N2 Then results contain | ID | display_name | | 0 | 3, Nickway | @@ -43,12 +43,12 @@ Feature: Query of address interpolations And the places | osm | class | type | housenr | geometry | | N1 | place | house | 2 | 1 | - | N3 | place | house | 16 | 3 | + | N3 | place | house | 18 | 3 | And the ways | id | nodes | | 1 | 1,3 | When importing - When sending jsonv2 reverse point 2 + When sending v1/reverse N2 Then results contain | ID | display_name | centroid | | 0 | 10, Nickway | 2 | diff --git a/test/bdd/db/update/linked_places.feature b/test/bdd/db/update/linked_places.feature index c277e8bd..539d9285 100644 --- a/test/bdd/db/update/linked_places.feature +++ b/test/bdd/db/update/linked_places.feature @@ -44,8 +44,8 @@ Feature: Updates of linked places | dups | | 1 | Then results contain - | osm_type | - | R | + | osm | + | R1 | When updating places | osm | class | type | name | admin | geometry | | R1 | boundary | administrative | foobar | 8 | (10,11,12,13,10) | @@ -56,8 +56,8 @@ Feature: Updates of linked places | dups | | 1 | Then results contain - | osm_type | - | N | + | osm | + | N1 | Scenario: Add linked place when linking relation is removed Given the 0.1 grid @@ -75,8 +75,8 @@ Feature: Updates of linked places | dups | | 1 | Then results contain - | osm_type | - | R | + | osm | + | R1 | When marking for delete R1 Then placex contains | object | linked_place_id | @@ -85,8 +85,8 @@ Feature: Updates of linked places | dups | | 1 | Then results contain - | osm_type | - | N | + | osm | + | N1 | Scenario: Remove linked place when linking relation is added Given the 0.1 grid @@ -101,8 +101,8 @@ Feature: Updates of linked places | dups | | 1 | Then results contain - | osm_type | - | N | + | osm | + | N1 | When updating places | osm | class | type | name | admin | geometry | | R1 | boundary | administrative | foo | 8 | (10,11,12,13,10) | @@ -113,8 +113,8 @@ Feature: Updates of linked places | dups | | 1 | Then results contain - | osm_type | - | R | + | osm | + | R1 | Scenario: Remove linked place when linking relation is renamed Given the 0.1 grid @@ -132,8 +132,8 @@ Feature: Updates of linked places | dups | | 1 | Then results contain - | osm_type | - | N | + | osm | + | N1 | When updating places | osm | class | type | name | admin | geometry | | R1 | boundary | administrative | foo | 8 | (10,11,12,13,10) | @@ -144,8 +144,8 @@ Feature: Updates of linked places | dups | | 1 | Then results contain - | osm_type | - | R | + | osm | + | R1 | Scenario: Update linking relation when linkee name is updated Given the 0.1 grid diff --git a/test/bdd/steps/check_functions.py b/test/bdd/steps/check_functions.py index f214a886..58d6c1f2 100644 --- a/test/bdd/steps/check_functions.py +++ b/test/bdd/steps/check_functions.py @@ -2,11 +2,14 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2022 by the Nominatim developer community. +# Copyright (C) 2023 by the Nominatim developer community. # For a full list of authors see the git log. """ Collection of assertion functions used for the steps. """ +import json +import math +import re class Almost: """ Compares a float value with a certain jitter. @@ -18,6 +21,51 @@ class Almost: def __eq__(self, other): return abs(other - self.value) < self.offset + +OSM_TYPE = {'N' : 'node', 'W' : 'way', 'R' : 'relation', + 'n' : 'node', 'w' : 'way', 'r' : 'relation', + 'node' : 'n', 'way' : 'w', 'relation' : 'r'} + + +class OsmType: + """ Compares an OSM type, accepting both N/R/W and node/way/relation. + """ + + def __init__(self, value): + self.value = value + + + def __eq__(self, other): + return other == self.value or other == OSM_TYPE[self.value] + + + def __str__(self): + return f"{self.value} or {OSM_TYPE[self.value]}" + + +class Field: + """ Generic comparator for fields, which looks at the type of the + value compared. + """ + def __init__(self, value): + self.value = value + + def __eq__(self, other): + if isinstance(self.value, float): + return math.isclose(self.value, float(other)) + + if self.value.startswith('^'): + return re.fullmatch(self.value, other) + + if isinstance(other, dict): + return other == eval('{' + self.value + '}') + + return str(self.value) == str(other) + + def __str__(self): + return str(self.value) + + class Bbox: """ Comparator for bounding boxes. """ @@ -41,3 +89,24 @@ class Bbox: def __str__(self): return str(self.coord) + + + +def check_for_attributes(obj, attrs, presence='present'): + """ Check that the object has the given attributes. 'attrs' is a + string with a comma-separated list of attributes. If 'presence' + is set to 'absent' then the function checks that the attributes do + not exist for the object + """ + def _dump_json(): + return json.dumps(obj, sort_keys=True, indent=2, ensure_ascii=False) + + for attr in attrs.split(','): + attr = attr.strip() + if presence == 'absent': + assert attr not in obj, \ + f"Unexpected attribute {attr}. Full response:\n{_dump_json()}" + else: + assert attr in obj, \ + f"No attribute '{attr}'. Full response:\n{_dump_json()}" + diff --git a/test/bdd/steps/http_responses.py b/test/bdd/steps/http_responses.py index b493f013..22fcb010 100644 --- a/test/bdd/steps/http_responses.py +++ b/test/bdd/steps/http_responses.py @@ -7,43 +7,11 @@ """ Classes wrapping HTTP responses from the Nominatim API. """ -from collections import OrderedDict import re import json import xml.etree.ElementTree as ET -from check_functions import Almost - -OSM_TYPE = {'N' : 'node', 'W' : 'way', 'R' : 'relation', - 'n' : 'node', 'w' : 'way', 'r' : 'relation', - 'node' : 'n', 'way' : 'w', 'relation' : 'r'} - -def _geojson_result_to_json_result(geojson_result): - result = geojson_result['properties'] - result['geojson'] = geojson_result['geometry'] - if 'bbox' in geojson_result: - # bbox is minlon, minlat, maxlon, maxlat - # boundingbox is minlat, maxlat, minlon, maxlon - result['boundingbox'] = [geojson_result['bbox'][1], - geojson_result['bbox'][3], - geojson_result['bbox'][0], - geojson_result['bbox'][2]] - return result - -class BadRowValueAssert: - """ Lazily formatted message for failures to find a field content. - """ - - def __init__(self, response, idx, field, value): - self.idx = idx - self.field = field - self.value = value - self.row = response.result[idx] - - def __str__(self): - return "\nBad value for row {} field '{}'. Expected: {}, got: {}.\nFull row: {}"""\ - .format(self.idx, self.field, self.value, - self.row[self.field], json.dumps(self.row, indent=4)) +from check_functions import Almost, OsmType, Field, check_for_attributes class GenericResponse: @@ -70,63 +38,54 @@ class GenericResponse: else: code = m.group(2) self.header['json_func'] = m.group(1) - self.result = json.JSONDecoder(object_pairs_hook=OrderedDict).decode(code) - if isinstance(self.result, OrderedDict): + self.result = json.JSONDecoder().decode(code) + if isinstance(self.result, dict): if 'error' in self.result: self.result = [] else: self.result = [self.result] + def _parse_geojson(self): self._parse_json() if self.result: - self.result = list(map(_geojson_result_to_json_result, self.result[0]['features'])) + geojson = self.result[0] + # check for valid geojson + check_for_attributes(geojson, 'type,features') + assert geojson['type'] == 'FeatureCollection' + assert isinstance(geojson['features'], list) + + self.result = [] + for result in geojson['features']: + check_for_attributes(result, 'type,properties,geometry') + assert result['type'] == 'Feature' + new = result['properties'] + check_for_attributes(new, 'geojson', 'absent') + new['geojson'] = result['geometry'] + if 'bbox' in result: + check_for_attributes(new, 'boundingbox', 'absent') + # bbox is minlon, minlat, maxlon, maxlat + # boundingbox is minlat, maxlat, minlon, maxlon + new['boundingbox'] = [result['bbox'][1], + result['bbox'][3], + result['bbox'][0], + result['bbox'][2]] + for k, v in geojson.items(): + if k not in ('type', 'features'): + check_for_attributes(new, '__' + k, 'absent') + new['__' + k] = v + self.result.append(new) + def _parse_geocodejson(self): self._parse_geojson() - if self.result is not None: - self.result = [r['geocoding'] for r in self.result] - - def assert_field(self, idx, field, value): - """ Check that result row `idx` has a field `field` with value `value`. - Float numbers are matched approximately. When the expected value - starts with a carat, regular expression matching is used. - """ - assert field in self.result[idx], \ - "Result row {} has no field '{}'.\nFull row: {}"\ - .format(idx, field, json.dumps(self.result[idx], indent=4)) - - if isinstance(value, float): - assert Almost(value) == float(self.result[idx][field]), \ - BadRowValueAssert(self, idx, field, value) - elif value.startswith("^"): - assert re.fullmatch(value, self.result[idx][field]), \ - BadRowValueAssert(self, idx, field, value) - elif isinstance(self.result[idx][field], OrderedDict): - assert self.result[idx][field] == eval('{' + value + '}'), \ - BadRowValueAssert(self, idx, field, value) - else: - assert str(self.result[idx][field]) == str(value), \ - BadRowValueAssert(self, idx, field, value) - - - def assert_subfield(self, idx, path, value): - assert path - - field = self.result[idx] - for p in path: - assert isinstance(field, OrderedDict) - assert p in field - field = field[p] - - if isinstance(value, float): - assert Almost(value) == float(field) - elif value.startswith("^"): - assert re.fullmatch(value, field) - elif isinstance(field, OrderedDict): - assert field, eval('{' + value + '}') - else: - assert str(field) == str(value) + if self.result: + for r in self.result: + assert set(r.keys()) == {'geocoding', 'geojson', '__geocoding'}, \ + f"Unexpected keys in result: {r.keys()}" + check_for_attributes(r['geocoding'], 'geojson', 'absent') + inner = r.pop('geocoding') + r.update(inner) def assert_address_field(self, idx, field, value): @@ -139,20 +98,13 @@ class GenericResponse: todo = [int(idx)] for idx in todo: - assert 'address' in self.result[idx], \ - "Result row {} has no field 'address'.\nFull row: {}"\ - .format(idx, json.dumps(self.result[idx], indent=4)) + self.check_row(idx, 'address' in self.result[idx], "No field 'address'") address = self.result[idx]['address'] - assert field in address, \ - "Result row {} has no field '{}' in address.\nFull address: {}"\ - .format(idx, field, json.dumps(address, indent=4)) + self.check_row_field(idx, field, value, base=address) - assert address[field] == value, \ - "\nBad value for row {} field '{}' in address. Expected: {}, got: {}.\nFull address: {}"""\ - .format(idx, field, value, address[field], json.dumps(address, indent=4)) - def match_row(self, row, context=None): + def match_row(self, row, context=None, field=None): """ Match the result fields against the given behave table row. """ if 'ID' in row.headings: @@ -161,19 +113,20 @@ class GenericResponse: todo = range(len(self.result)) for i in todo: + subdict = self.result[i] + if field is not None: + for key in field.split('.'): + self.check_row(i, key in subdict, f"Missing subfield {key}") + subdict = subdict[key] + self.check_row(i, isinstance(subdict, dict), + f"Subfield {key} not a dict") + for name, value in zip(row.headings, row.cells): if name == 'ID': pass elif name == 'osm': - assert 'osm_type' in self.result[i], \ - "Result row {} has no field 'osm_type'.\nFull row: {}"\ - .format(i, json.dumps(self.result[i], indent=4)) - assert self.result[i]['osm_type'] in (OSM_TYPE[value[0]], value[0]), \ - BadRowValueAssert(self, i, 'osm_type', value) - self.assert_field(i, 'osm_id', value[1:]) - elif name == 'osm_type': - assert self.result[i]['osm_type'] in (OSM_TYPE[value[0]], value[0]), \ - BadRowValueAssert(self, i, 'osm_type', value) + self.check_row_field(i, 'osm_type', OsmType(value[0]), base=subdict) + self.check_row_field(i, 'osm_id', Field(value[1:]), base=subdict) elif name == 'centroid': if ' ' in value: lon, lat = value.split(' ') @@ -181,15 +134,43 @@ class GenericResponse: lon, lat = context.osm.grid_node(int(value)) else: raise RuntimeError("Context needed when using grid coordinates") - self.assert_field(i, 'lat', float(lat)) - self.assert_field(i, 'lon', float(lon)) - elif '+' in name: - self.assert_subfield(i, name.split('+'), value) + self.check_row_field(i, 'lat', Field(float(lat)), base=subdict) + self.check_row_field(i, 'lon', Field(float(lon)), base=subdict) else: - self.assert_field(i, name, value) + self.check_row_field(i, name, Field(value), base=subdict) + + + def check_row(self, idx, check, msg): + """ Assert for the condition 'check' and print 'msg' on fail together + with the contents of the failing result. + """ + class _RowError: + def __init__(self, row): + self.row = row + + def __str__(self): + return f"{msg}. Full row {idx}:\n" \ + + json.dumps(self.row, indent=4, ensure_ascii=False) + + assert check, _RowError(self.result[idx]) + + + def check_row_field(self, idx, field, expected, base=None): + """ Check field 'field' of result 'idx' for the expected value + and print a meaningful error if the condition fails. + When 'base' is set to a dictionary, then the field is checked + in that base. The error message will still report the contents + of the full result. + """ + if base is None: + base = self.result[idx] + + self.check_row(idx, field in base, f"No field '{field}'") + value = base[field] + + self.check_row(idx, expected == value, + f"\nBad value for field '{field}'. Expected: {expected}, got: {value}") - def property_list(self, prop): - return [x[prop] for x in self.result] class SearchResponse(GenericResponse): @@ -240,24 +221,33 @@ class ReverseResponse(GenericResponse): if child.tag == 'result': assert not self.result, "More than one result in reverse result" self.result.append(dict(child.attrib)) + check_for_attributes(self.result[0], 'display_name', 'absent') + self.result[0]['display_name'] = child.text elif child.tag == 'addressparts': + assert 'address' not in self.result[0], "More than one address in result" address = {} for sub in child: + assert len(sub) == 0, f"Address element '{sub.tag}' has subelements" address[sub.tag] = sub.text self.result[0]['address'] = address elif child.tag == 'extratags': + assert 'extratags' not in self.result[0], "More than one extratags in result" self.result[0]['extratags'] = {} for tag in child: + assert len(tag) == 0, f"Extratags element '{tag.attrib['key']}' has subelements" self.result[0]['extratags'][tag.attrib['key']] = tag.attrib['value'] elif child.tag == 'namedetails': + assert 'namedetails' not in self.result[0], "More than one namedetails in result" self.result[0]['namedetails'] = {} for tag in child: + assert len(tag) == 0, f"Namedetails element '{tag.attrib['desc']}' has subelements" self.result[0]['namedetails'][tag.attrib['desc']] = tag.text elif child.tag == 'geokml': - self.result[0][child.tag] = True + assert 'geokml' not in self.result[0], "More than one geokml in result" + self.result[0]['geokml'] = ET.tostring(child, encoding='unicode') else: assert child.tag == 'error', \ - "Unknown XML tag {} on page: {}".format(child.tag, self.page) + f"Unknown XML tag {child.tag} on page: {self.page}" class StatusResponse(GenericResponse): diff --git a/test/bdd/steps/steps_api_queries.py b/test/bdd/steps/steps_api_queries.py index 1df1d523..1c6fac69 100644 --- a/test/bdd/steps/steps_api_queries.py +++ b/test/bdd/steps/steps_api_queries.py @@ -15,11 +15,12 @@ import os import re import logging import asyncio +import xml.etree.ElementTree as ET from urllib.parse import urlencode from utils import run_script from http_responses import GenericResponse, SearchResponse, ReverseResponse, StatusResponse -from check_functions import Bbox +from check_functions import Bbox, check_for_attributes from table_compare import NominatimID LOG = logging.getLogger(__name__) @@ -48,6 +49,15 @@ BASE_SERVER_ENV = { } +def make_todo_list(context, result_id): + if result_id is None: + context.execute_steps("then at least 1 result is returned") + return range(len(context.response.result)) + + context.execute_steps(f"then more than {result_id}results are returned") + return (int(result_id.strip()), ) + + def compare(operator, op1, op2): if operator == 'less than': return op1 < op2 @@ -60,12 +70,16 @@ def compare(operator, op1, op2): elif operator == 'at most': return op1 <= op2 else: - raise Exception("unknown operator '%s'" % operator) + raise ValueError(f"Unknown operator '{operator}'") def send_api_query(endpoint, params, fmt, context): - if fmt is not None and fmt.strip() != 'debug': - params['format'] = fmt.strip() + if fmt is not None: + if fmt.strip() == 'debug': + params['debug'] = '1' + else: + params['format'] = fmt.strip() + if context.table: if context.table.headings[0] == 'param': for line in context.table: @@ -88,11 +102,11 @@ def send_api_query_php(endpoint, params, context): env = dict(BASE_SERVER_ENV) env['QUERY_STRING'] = urlencode(params) - env['SCRIPT_NAME'] = '/%s.php' % endpoint - env['REQUEST_URI'] = '%s?%s' % (env['SCRIPT_NAME'], env['QUERY_STRING']) + env['SCRIPT_NAME'] = f'/{endpoint}.php' + env['REQUEST_URI'] = f"{env['SCRIPT_NAME']}?{env['QUERY_STRING']}" env['CONTEXT_DOCUMENT_ROOT'] = os.path.join(context.nominatim.website_dir.name, 'website') env['SCRIPT_FILENAME'] = os.path.join(env['CONTEXT_DOCUMENT_ROOT'], - '%s.php' % endpoint) + f'{endpoint}.php') LOG.debug("Environment:" + json.dumps(env, sort_keys=True, indent=2)) @@ -104,7 +118,7 @@ def send_api_query_php(endpoint, params, context): env['XDEBUG_MODE'] = 'coverage' env['COV_SCRIPT_FILENAME'] = env['SCRIPT_FILENAME'] env['COV_PHP_DIR'] = context.nominatim.src_dir - env['COV_TEST_NAME'] = '%s:%s' % (context.scenario.filename, context.scenario.line) + env['COV_TEST_NAME'] = f"{context.scenario.filename}:{context.scenario.line}" env['SCRIPT_FILENAME'] = \ os.path.join(os.path.split(__file__)[0], 'cgi-with-coverage.php') cmd.append(env['SCRIPT_FILENAME']) @@ -113,11 +127,11 @@ def send_api_query_php(endpoint, params, context): cmd.append(env['SCRIPT_FILENAME']) for k,v in params.items(): - cmd.append("%s=%s" % (k, v)) + cmd.append(f"{k}={v}") outp, err = run_script(cmd, cwd=context.nominatim.website_dir.name, env=env) - assert len(err) == 0, "Unexpected PHP error: %s" % (err) + assert len(err) == 0, f"Unexpected PHP error: {err}" if outp.startswith('Status: '): status = int(outp[8:11]) @@ -145,41 +159,37 @@ def website_search_request(context, fmt, query, addr): params['q'] = query if addr is not None: params['addressdetails'] = '1' - if fmt and fmt.strip() == 'debug': - params['debug'] = '1' outp, status = send_api_query('search', params, fmt, context) context.response = SearchResponse(outp, fmt or 'json', status) -@when(u'sending (?P\S+ )?reverse coordinates (?P.+)?,(?P.+)?') -def website_reverse_request(context, fmt, lat, lon): + +@when('sending v1/reverse at (?P[\d.-]*),(?P[\d.-]*)(?: with format (?P.+))?') +def api_endpoint_v1_reverse(context, lat, lon, fmt): params = {} if lat is not None: params['lat'] = lat if lon is not None: params['lon'] = lon - if fmt and fmt.strip() == 'debug': - params['debug'] = '1' + if fmt is None: + fmt = 'jsonv2' + elif fmt == "''": + fmt = None outp, status = send_api_query('reverse', params, fmt, context) - context.response = ReverseResponse(outp, fmt or 'xml', status) -@when(u'sending (?P\S+ )?reverse point (?P.+)') -def website_reverse_request(context, fmt, nodeid): + +@when('sending v1/reverse N(?P\d+)(?: with format (?P.+))?') +def api_endpoint_v1_reverse_from_node(context, nodeid, fmt): params = {} - if fmt and fmt.strip() == 'debug': - params['debug'] = '1' params['lon'], params['lat'] = (f'{c:f}' for c in context.osm.grid_node(int(nodeid))) - outp, status = send_api_query('reverse', params, fmt, context) - context.response = ReverseResponse(outp, fmt or 'xml', status) - @when(u'sending (?P\S+ )?details query for (?P.*)') def website_details_request(context, fmt, query): params = {} @@ -211,15 +221,15 @@ def website_status_request(context, fmt): @step(u'(?Pless than|more than|exactly|at least|at most) (?P\d+) results? (?:is|are) returned') def validate_result_number(context, operator, number): - assert context.response.errorcode == 200 + context.execute_steps("Then a HTTP 200 is returned") numres = len(context.response.result) assert compare(operator, numres, int(number)), \ - "Bad number of results: expected {} {}, got {}.".format(operator, number, numres) + f"Bad number of results: expected {operator} {number}, got {numres}." @then(u'a HTTP (?P\d+) is returned') def check_http_return_status(context, status): assert context.response.errorcode == int(status), \ - "Return HTTP status is {}.".format(context.response.errorcode) + f"Return HTTP status is {context.response.errorcode}." @then(u'the page contents equals "(?P.+)"') def check_page_content_equals(context, text): @@ -228,7 +238,19 @@ def check_page_content_equals(context, text): @then(u'the result is valid (?P\w+)') def step_impl(context, fmt): context.execute_steps("Then a HTTP 200 is returned") - assert context.response.format == fmt + if fmt.strip() == 'html': + try: + tree = ET.fromstring(context.response.page) + except Exception as ex: + assert False, f"Could not parse page:\n{context.response.page}" + + assert tree.tag == 'html' + body = tree.find('./body') + assert body is not None + assert body.find('.//script') is None + else: + assert context.response.format == fmt + @then(u'a (?P\w+) user error is returned') def check_page_error(context, fmt): @@ -243,49 +265,31 @@ def check_page_error(context, fmt): @then(u'result header contains') def check_header_attr(context): for line in context.table: - assert re.fullmatch(line['value'], context.response.header[line['attr']]) is not None, \ - "attribute '%s': expected: '%s', got '%s'" % ( - line['attr'], line['value'], - context.response.header[line['attr']]) + value = context.response.header[line['attr']] + assert re.fullmatch(line['value'], value) is not None, \ + f"Attribute '{line['attr']}': expected: '{line['value']}', got '{value}'" + @then(u'result header has (?Pnot )?attributes (?P.*)') def check_header_no_attr(context, neg, attrs): - for attr in attrs.split(','): - if neg: - assert attr not in context.response.header, \ - "Unexpected attribute {}. Full response:\n{}".format( - attr, json.dumps(context.response.header, sort_keys=True, indent=2)) - else: - assert attr in context.response.header, \ - "No attribute {}. Full response:\n{}".format( - attr, json.dumps(context.response.header, sort_keys=True, indent=2)) + check_for_attributes(context.response.header, attrs, + 'absent' if neg else 'present') -@then(u'results contain') -def step_impl(context): + +@then(u'results contain(?: in field (?P.*))?') +def step_impl(context, field): context.execute_steps("then at least 1 result is returned") for line in context.table: - context.response.match_row(line, context=context) + context.response.match_row(line, context=context, field=field) + @then(u'result (?P\d+ )?has (?Pnot )?attributes (?P.*)') def validate_attributes(context, lid, neg, attrs): - if lid is None: - idx = range(len(context.response.result)) - context.execute_steps("then at least 1 result is returned") - else: - idx = [int(lid.strip())] - context.execute_steps("then more than %sresults are returned" % lid) - - for i in idx: - for attr in attrs.split(','): - if neg: - assert attr not in context.response.result[i],\ - "Unexpected attribute {}. Full response:\n{}".format( - attr, json.dumps(context.response.result[i], sort_keys=True, indent=2)) - else: - assert attr in context.response.result[i], \ - "No attribute {}. Full response:\n{}".format( - attr, json.dumps(context.response.result[i], sort_keys=True, indent=2)) + for i in make_todo_list(context, lid): + check_for_attributes(context.response.result[i], attrs, + 'absent' if neg else 'present') + @then(u'result addresses contain') def step_impl(context): @@ -300,7 +304,7 @@ def step_impl(context): @then(u'address of result (?P\d+) has(?P no)? types (?P.*)') def check_address(context, lid, neg, attrs): - context.execute_steps("then more than %s results are returned" % lid) + context.execute_steps(f"then more than {lid} results are returned") addr_parts = context.response.result[int(lid)]['address'] @@ -312,7 +316,7 @@ def check_address(context, lid, neg, attrs): @then(u'address of result (?P\d+) (?Pis|contains)') def check_address(context, lid, complete): - context.execute_steps("then more than %s results are returned" % lid) + context.execute_steps(f"then more than {lid} results are returned") lid = int(lid) addr_parts = dict(context.response.result[lid]['address']) @@ -322,38 +326,30 @@ def check_address(context, lid, complete): del addr_parts[line['type']] if complete == 'is': - assert len(addr_parts) == 0, "Additional address parts found: %s" % str(addr_parts) + assert len(addr_parts) == 0, f"Additional address parts found: {addr_parts!s}" -@then(u'result (?P\d+ )?has bounding box in (?P[\d,.-]+)') -def step_impl(context, lid, coords): - if lid is None: - context.execute_steps("then at least 1 result is returned") - bboxes = context.response.property_list('boundingbox') - else: - context.execute_steps("then more than {}results are returned".format(lid)) - bboxes = [context.response.result[int(lid)]['boundingbox']] +@then(u'result (?P\d+ )?has bounding box in (?P[\d,.-]+)') +def check_bounding_box_in_area(context, lid, coords): expected = Bbox(coords) - for bbox in bboxes: - assert bbox in expected, "Bbox {} is not contained in {}.".format(bbox, expected) + for idx in make_todo_list(context, lid): + res = context.response.result[idx] + check_for_attributes(res, 'boundingbox') + context.response.check_row(idx, res['boundingbox'] in expected, + f"Bbox is not contained in {expected}") -@then(u'result (?P\d+ )?has centroid in (?P[\d,.-]+)') -def step_impl(context, lid, coords): - if lid is None: - context.execute_steps("then at least 1 result is returned") - centroids = zip(context.response.property_list('lon'), - context.response.property_list('lat')) - else: - context.execute_steps("then more than %sresults are returned".format(lid)) - res = context.response.result[int(lid)] - centroids = [(res['lon'], res['lat'])] +@then(u'result (?P\d+ )?has centroid in (?P[\d,.-]+)') +def check_centroid_in_area(context, lid, coords): expected = Bbox(coords) - for centroid in centroids: - assert centroid in expected,\ - "Centroid {} is not inside {}.".format(centroid, expected) + for idx in make_todo_list(context, lid): + res = context.response.result[idx] + check_for_attributes(res, 'lat,lon') + context.response.check_row(idx, (res['lon'], res['lat']) in expected, + f"Centroid is not inside {expected}") + @then(u'there are(?P no)? duplicates') def check_for_duplicates(context, neg): @@ -370,6 +366,7 @@ def check_for_duplicates(context, neg): resarr.add(dup) if neg: - assert not has_dupe, "Found duplicate for %s" % (dup, ) + assert not has_dupe, f"Found duplicate for {dup}" else: assert has_dupe, "No duplicates found" + diff --git a/test/python/tokenizer/sanitizers/test_delete_tags.py b/test/python/tokenizer/sanitizers/test_delete_tags.py new file mode 100644 index 00000000..f9ccc2f6 --- /dev/null +++ b/test/python/tokenizer/sanitizers/test_delete_tags.py @@ -0,0 +1,327 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Tests for the sanitizer that normalizes housenumbers. +""" +import pytest + + +from nominatim.data.place_info import PlaceInfo +from nominatim.tokenizer.place_sanitizer import PlaceSanitizer + + +class TestWithDefault: + + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config + + def run_sanitizer_on(self, type, **kwargs): + + place = PlaceInfo({type: {k.replace('_', ':'): v for k, v in kwargs.items()}, + 'country_code': 'de', 'rank_address': 30}) + + sanitizer_args = {'step': 'delete-tags'} + + name, address = PlaceSanitizer([sanitizer_args], + self.config).process_names(place) + + return { + 'name': sorted([(p.name, p.kind, p.suffix or '') for p in name]), + 'address': sorted([(p.name, p.kind, p.suffix or '') for p in address]) + } + + + def test_on_name(self): + res = self.run_sanitizer_on('name', name='foo', ref='bar', ref_abc='baz') + + assert res.get('name') == [] + + def test_on_address(self): + res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz') + + assert res.get('address') == [('bar', 'ref', ''), ('baz', 'ref', 'abc'), + ('foo', 'name', '')] + + +class TestTypeField: + + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config + + def run_sanitizer_on(self, type, **kwargs): + + place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, + 'country_code': 'de', 'rank_address': 30}) + + sanitizer_args = { + 'step': 'delete-tags', + 'type': type, + } + + name, _ = PlaceSanitizer([sanitizer_args], + self.config).process_names(place) + + return sorted([(p.name, p.kind, p.suffix or '') for p in name]) + + def test_name_type(self): + res = self.run_sanitizer_on('name', name='foo', ref='bar', ref_abc='baz') + + assert res == [] + + def test_address_type(self): + res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz') + + assert res == [('bar', 'ref', ''), ('baz', 'ref', 'abc'), + ('foo', 'name', '')] + +class TestFilterKind: + + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config + + def run_sanitizer_on(self, filt, **kwargs): + + place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, + 'country_code': 'de', 'rank_address': 30}) + + sanitizer_args = { + 'step': 'delete-tags', + 'filter-kind': filt, + } + + name, _ = PlaceSanitizer([sanitizer_args], + self.config).process_names(place) + + return sorted([(p.name, p.kind, p.suffix or '') for p in name]) + + def test_single_exact_name(self): + res = self.run_sanitizer_on(['name'], ref='foo', name='foo', + name_abc='bar', ref_abc='bar') + + assert res == [('bar', 'ref', 'abc'), ('foo', 'ref', '')] + + + def test_single_pattern(self): + res = self.run_sanitizer_on(['.*name'], + name_fr='foo', ref_fr='foo', namexx_fr='bar', + shortname_fr='bar', name='bar') + + assert res == [('bar', 'namexx', 'fr'), ('foo', 'ref', 'fr')] + + + def test_multiple_patterns(self): + res = self.run_sanitizer_on(['.*name', 'ref'], + name_fr='foo', ref_fr='foo', oldref_fr='foo', + namexx_fr='bar', shortname_fr='baz', name='baz') + + assert res == [('bar', 'namexx', 'fr'), ('foo', 'oldref', 'fr')] + + +class TestRankAddress: + + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config + + def run_sanitizer_on(self, rank_addr, **kwargs): + + place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, + 'country_code': 'de', 'rank_address': 30}) + + sanitizer_args = { + 'step': 'delete-tags', + 'rank_address': rank_addr + } + + name, _ = PlaceSanitizer([sanitizer_args], + self.config).process_names(place) + + return sorted([(p.name, p.kind, p.suffix or '') for p in name]) + + + def test_single_rank(self): + res = self.run_sanitizer_on('30', name='foo', ref='bar') + + assert res == [] + + def test_single_rank_fail(self): + res = self.run_sanitizer_on('28', name='foo', ref='bar') + + assert res == [('bar', 'ref', ''), ('foo', 'name', '')] + + def test_ranged_rank_pass(self): + res = self.run_sanitizer_on('26-30', name='foo', ref='bar') + + assert res == [] + + def test_ranged_rank_fail(self): + res = self.run_sanitizer_on('26-29', name='foo', ref='bar') + + assert res == [('bar', 'ref', ''), ('foo', 'name', '')] + + def test_mixed_rank_pass(self): + res = self.run_sanitizer_on(['4', '20-28', '30', '10-12'], name='foo', ref='bar') + + assert res == [] + + def test_mixed_rank_fail(self): + res = self.run_sanitizer_on(['4-8', '10', '26-29', '18'], name='foo', ref='bar') + + assert res == [('bar', 'ref', ''), ('foo', 'name', '')] + + +class TestSuffix: + + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config + + def run_sanitizer_on(self, suffix, **kwargs): + + place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, + 'country_code': 'de', 'rank_address': 30}) + + sanitizer_args = { + 'step': 'delete-tags', + 'suffix': suffix, + } + + name, _ = PlaceSanitizer([sanitizer_args], + self.config).process_names(place) + + return sorted([(p.name, p.kind, p.suffix or '') for p in name]) + + + def test_single_suffix(self): + res = self.run_sanitizer_on('abc', name='foo', name_abc='foo', + name_pqr='bar', ref='bar', ref_abc='baz') + + assert res == [('bar', 'name', 'pqr'), ('bar', 'ref', ''), ('foo', 'name', '')] + + def test_multiple_suffix(self): + res = self.run_sanitizer_on(['abc.*', 'pqr'], name='foo', name_abcxx='foo', + ref_pqr='bar', name_pqrxx='baz') + + assert res == [('baz', 'name', 'pqrxx'), ('foo', 'name', '')] + + + +class TestCountryCodes: + + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config + + def run_sanitizer_on(self, country_code, **kwargs): + + place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, + 'country_code': 'de', 'rank_address': 30}) + + sanitizer_args = { + 'step': 'delete-tags', + 'country_code': country_code, + } + + name, _ = PlaceSanitizer([sanitizer_args], + self.config).process_names(place) + + return sorted([(p.name, p.kind) for p in name]) + + + def test_single_country_code_pass(self): + res = self.run_sanitizer_on('de', name='foo', ref='bar') + + assert res == [] + + def test_single_country_code_fail(self): + res = self.run_sanitizer_on('in', name='foo', ref='bar') + + assert res == [('bar', 'ref'), ('foo', 'name')] + + def test_empty_country_code_list(self): + res = self.run_sanitizer_on([], name='foo', ref='bar') + + assert res == [('bar', 'ref'), ('foo', 'name')] + + def test_multiple_country_code_pass(self): + res = self.run_sanitizer_on(['in', 'de', 'fr'], name='foo', ref='bar') + + assert res == [] + + def test_multiple_country_code_fail(self): + res = self.run_sanitizer_on(['in', 'au', 'fr'], name='foo', ref='bar') + + assert res == [('bar', 'ref'), ('foo', 'name')] + +class TestAllParameters: + + @pytest.fixture(autouse=True) + def setup_country(self, def_config): + self.config = def_config + + def run_sanitizer_on(self, country_code, rank_addr, suffix, **kwargs): + + place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}, + 'country_code': 'de', 'rank_address': 30}) + + sanitizer_args = { + 'step': 'delete-tags', + 'type': 'name', + 'filter-kind': ['name', 'ref'], + 'country_code': country_code, + 'rank_address': rank_addr, + 'suffix': suffix, + 'name': r'[\s\S]*', + } + + name, _ = PlaceSanitizer([sanitizer_args], + self.config).process_names(place) + + return sorted([(p.name, p.kind, p.suffix or '') for p in name]) + + + def test_string_arguments_pass(self): + res = self.run_sanitizer_on('de', '25-30', r'[\s\S]*', + name='foo', ref='foo', name_abc='bar', ref_abc='baz') + + assert res == [] + + def test_string_arguments_fail(self): + res = self.run_sanitizer_on('in', '25-30', r'[\s\S]*', + name='foo', ref='foo', name_abc='bar', ref_abc='baz') + + assert res == [('bar', 'name', 'abc'), ('baz', 'ref', 'abc'), + ('foo', 'name', ''), ('foo', 'ref', '')] + + def test_list_arguments_pass(self): + res = self.run_sanitizer_on(['de', 'in'], ['20-28', '30'], [r'abc.*', r'[\s\S]*'], + name='foo', ref_abc='foo', name_abcxx='bar', ref_pqr='baz') + + assert res == [] + + def test_list_arguments_fail(self): + res = self.run_sanitizer_on(['de', 'in'], ['14', '20-29'], [r'abc.*', r'pqr'], + name='foo', ref_abc='foo', name_abcxx='bar', ref_pqr='baz') + + assert res == [('bar', 'name', 'abcxx'), ('baz', 'ref', 'pqr'), + ('foo', 'name', ''), ('foo', 'ref', 'abc')] + + def test_mix_arguments_pass(self): + res = self.run_sanitizer_on('de', ['10', '20-28', '30'], r'[\s\S]*', + name='foo', ref_abc='foo', name_abcxx='bar', ref_pqr='baz') + + assert res == [] + + def test_mix_arguments_fail(self): + res = self.run_sanitizer_on(['de', 'in'], ['10', '20-28', '30'], r'abc.*', + name='foo', ref='foo', name_pqr='bar', ref_pqr='baz') + + assert res == [('bar', 'name', 'pqr'), ('baz', 'ref', 'pqr'), + ('foo', 'name', ''), ('foo', 'ref', '')] \ No newline at end of file