From: Sarah Hoffmann Date: Tue, 17 Aug 2021 19:55:32 +0000 (+0200) Subject: Merge pull request #2427 from lonvia/remove-us-states-special-casing X-Git-Tag: v4.0.0~40 X-Git-Url: https://git.openstreetmap.org/nominatim.git/commitdiff_plain/656c1291b15d7f81f87768a7a1eafb7f2d0223d5?hp=4ae5ba7fc4a3451c806d89a583158412ea26bd01 Merge pull request #2427 from lonvia/remove-us-states-special-casing Move US state hack into legacy tokenizer --- diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index 0f76a9c4..43d10368 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -506,13 +506,6 @@ class Geocode userError('Query string is not UTF-8 encoded.'); } - // Conflicts between US state abreviations and various words for 'the' in different languages - if (isset($this->aLangPrefOrder['name:en'])) { - $sQuery = preg_replace('/(^|,)\s*il\s*(,|$)/i', '\1illinois\2', $sQuery); - $sQuery = preg_replace('/(^|,)\s*al\s*(,|$)/i', '\1alabama\2', $sQuery); - $sQuery = preg_replace('/(^|,)\s*la\s*(,|$)/i', '\1louisiana\2', $sQuery); - } - // Do we have anything that looks like a lat/lon pair? $sQuery = $oCtx->setNearPointFromQuery($sQuery); diff --git a/lib-php/Phrase.php b/lib-php/Phrase.php index cdde6134..4307a230 100644 --- a/lib-php/Phrase.php +++ b/lib-php/Phrase.php @@ -9,7 +9,8 @@ namespace Nominatim; */ class Phrase { - // Complete phrase as a string. + // Complete phrase as a string (guaranteed to have no leading or trailing + // spaces). private $sPhrase; // Element type for structured searches. private $sPhraseType; diff --git a/lib-php/tokenizer/legacy_tokenizer.php b/lib-php/tokenizer/legacy_tokenizer.php index e5ffbe02..b508d220 100644 --- a/lib-php/tokenizer/legacy_tokenizer.php +++ b/lib-php/tokenizer/legacy_tokenizer.php @@ -87,6 +87,23 @@ class Tokenizer $sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase()); $sSQL .= 'make_standard_name(:' .$iPhrase.') as p'.$iPhrase.','; $aParams[':'.$iPhrase] = $oPhrase->getPhrase(); + + // Conflicts between US state abbreviations and various words + // for 'the' in different languages + switch (strtolower($oPhrase->getPhrase())) { + case 'il': + $aParams[':'.$iPhrase] = 'illinois'; + break; + case 'al': + $aParams[':'.$iPhrase] = 'alabama'; + break; + case 'la': + $aParams[':'.$iPhrase] = 'louisiana'; + break; + default: + $aParams[':'.$iPhrase] = $oPhrase->getPhrase(); + break; + } } $sSQL = substr($sSQL, 0, -1); diff --git a/test/bdd/db/query/search_simple.feature b/test/bdd/db/query/search_simple.feature index 8c19fb7d..bcd73eaf 100644 --- a/test/bdd/db/query/search_simple.feature +++ b/test/bdd/db/query/search_simple.feature @@ -61,7 +61,7 @@ Feature: Searching of simple objects | osm | | N20 | - Scenario: when the housenumber is missing the stret is still returned + Scenario: when the housenumber is missing the street is still returned Given the grid | 1 | | 2 | Given the places @@ -72,3 +72,34 @@ Feature: Searching of simple objects Then results contain | osm | | W1 | + + + Scenario Outline: Special cased american states will be found + Given the grid + | 1 | | 2 | + | | 10 | | + | 4 | | 3 | + Given the places + | osm | class | type | admin | name | name+ref | geometry | + | R1 | boundary | administrative | 4 | | | (1,2,3,4,1) | + Given the places + | osm | class | type | name | geometry | + | N2 | place | town | | 10 | + | N3 | place | city | | country:ca | + When importing + And sending search query ", " + Then results contain + | osm | + | N2 | + When sending search query ", " + | accept-language | + | en | + Then results contain + | osm | + | N2 | + + Examples: + | city | state | ref | + | Chicago | Illinois | IL | + | Auburn | Alabama | AL | + | New Orleans | Louisiana | LA |