Merge pull request #3091 from lonvia/fix-postcode-search

author Sarah Hoffmann <lonvia@denofr.de>

Tue, 20 Jun 2023 14:13:20 +0000 (16:13 +0200)

committer GitHub <noreply@github.com>

Tue, 20 Jun 2023 14:13:20 +0000 (16:13 +0200)
author Sarah Hoffmann <lonvia@denofr.de>
Tue, 20 Jun 2023 14:13:20 +0000 (16:13 +0200)
committer GitHub <noreply@github.com>
Tue, 20 Jun 2023 14:13:20 +0000 (16:13 +0200)
diff --git a/nominatim/api/logging.py b/nominatim/api/logging.py

index 6bf3ed38f10230f1b1b3b3a48d4be97e4d3bcd39..6c8b1b388224f8a787977b15e7c2e2fce5aaab03 100644 (file)
--- a/nominatim/api/logging.py
+++ b/nominatim/api/logging.py
@@ -178,7 +178,7 @@ class HTMLLogger(BaseLogger):
              self._write(f"rank={res.rank_address}, ")
              self._write(f"osm={format_osm(res.osm_object)}, ")
              self._write(f'cc={res.country_code}, ')
-            self._write(f'importance={res.importance or -1:.5f})</dd>')
+            self._write(f'importance={res.importance or float("nan"):.5f})</dd>')
              total += 1
          self._write(f'</dl><b>TOTAL:</b> {total}</p>')
  
@@ -196,7 +196,7 @@ class HTMLLogger(BaseLogger):
  
      def _python_var(self, var: Any) -> str:
          if CODE_HIGHLIGHT:
-            fmt = highlight(repr(var), PythonLexer(), HtmlFormatter(nowrap=True))
+            fmt = highlight(str(var), PythonLexer(), HtmlFormatter(nowrap=True))
              return f'<div class="highlight"><code class="lang-python">{fmt}</code></div>'
  
          return f'<code class="lang-python">{str(var)}</code>'
diff --git a/nominatim/api/search/db_search_builder.py b/nominatim/api/search/db_search_builder.py

index b6ba211c81d76e4c93a1f4e4d6d08bdd9d6849cf..9ff8c03c90c3d6ef4b7f1ff1c038e24bdb165171 100644 (file)
--- a/nominatim/api/search/db_search_builder.py
+++ b/nominatim/api/search/db_search_builder.py
@@ -141,12 +141,14 @@ class SearchBuilder:
              yield dbs.CountrySearch(sdata)
  
          if sdata.postcodes and (is_category or self.configured_for_postcode):
+            penalty = 0.0 if sdata.countries else 0.1
              if address:
                  sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
                                                   [t.token for r in address
                                                    for t in self.query.get_partials_list(r)],
                                                   'restrict')]
-            yield dbs.PostcodeSearch(0.4, sdata)
+                penalty += 0.2
+            yield dbs.PostcodeSearch(penalty, sdata)
  
  
      def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token],
diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py

index db35f7265427b1e7b957c468556de4f67e50ec44..76ff368f85b177ae8ad9dee579ec0bbc6de4e160 100644 (file)
--- a/nominatim/api/search/db_searches.py
+++ b/nominatim/api/search/db_searches.py
@@ -403,6 +403,12 @@ class CountrySearch(AbstractSearch):
                                        details: SearchDetails) -> nres.SearchResults:
          """ Look up the country in the fallback country tables.
          """
+        # Avoid the fallback search when this is a more search. Country results
+        # usually are in the first batch of results and it is not possible
+        # to exclude these fallbacks.
+        if details.excluded:
+            return nres.SearchResults()
+
          t = conn.t.country_name
          tgrid = conn.t.country_grid
  
@@ -562,6 +568,8 @@ class PlaceSearch(AbstractSearch):
              sql = sql.where(tsearch.c.country_code.in_(self.countries.values))
  
          if self.postcodes:
+            # if a postcode is given, don't search for state or country level objects
+            sql = sql.where(tsearch.c.address_rank > 9)
              tpc = conn.t.postcode
              if self.expected_count > 1000:
                  # Many results expected. Restrict by postcode.
diff --git a/nominatim/api/search/geocoder.py b/nominatim/api/search/geocoder.py

index 5e90d408fbadd364ec6512e6aa72208c48afc17e..0ef649d99ab8857924ef7d86bc3aa96865608666 100644 (file)
--- a/nominatim/api/search/geocoder.py
+++ b/nominatim/api/search/geocoder.py
@@ -180,7 +180,7 @@ def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
          return f'{c[0]}^{c[1]}'
  
      for search in searches[start:]:
-        fields = ('name_lookups', 'name_ranking', 'countries', 'housenumbers',
+        fields = ('lookups', 'rankings', 'countries', 'housenumbers',
                    'postcodes', 'qualifier')
          iters = itertools.zip_longest([f"{search.penalty:.3g}"],
                                        *(getattr(search, attr, []) for attr in fields),
diff --git a/nominatim/api/search/token_assignment.py b/nominatim/api/search/token_assignment.py

index 747fea6ca853e8e59c1f29bbdbcfb0f66e70fb14..11da23594880f9f4353630e69e6e26dbee6f0f32 100644 (file)
--- a/nominatim/api/search/token_assignment.py
+++ b/nominatim/api/search/token_assignment.py
@@ -270,7 +270,12 @@ class _TokenSequence:
              if (base.postcode.start == 0 and self.direction != -1)\
                 or (base.postcode.end == query.num_token_slots() and self.direction != 1):
                  log().comment('postcode search')
-                yield dataclasses.replace(base, penalty=self.penalty)
+                # <address>,<postcode> should give preference to address search
+                if base.postcode.start == 0:
+                    penalty = self.penalty
+                else:
+                    penalty = self.penalty + 0.1
+                yield dataclasses.replace(base, penalty=penalty)
  
          # Postcode or country-only search
          if not base.address:
@@ -278,6 +283,9 @@ class _TokenSequence:
                  log().comment('postcode/country search')
                  yield dataclasses.replace(base, penalty=self.penalty)
          else:
+            # <postcode>,<address> should give preference to postcode search
+            if base.postcode and base.postcode.start == 0:
+                self.penalty += 0.1
              # Use entire first word as name
              if self.direction != -1:
                  log().comment('first word = name')
diff --git a/nominatim/api/types.py b/nominatim/api/types.py

index c7e15843b551da95579b67f1389488d8b18d396e..87568a09ac59cd96f2185c61cc44f42ab4e85b02 100644 (file)
--- a/nominatim/api/types.py
+++ b/nominatim/api/types.py
@@ -302,10 +302,11 @@ def format_excluded(ids: Any) -> List[int]:
      else:
          raise UsageError("Parameter 'excluded' needs to be a comma-separated list "
                           "or a Python list of numbers.")
-    if not all(isinstance(i, int) or (isinstance(i, str) and i.isdigit()) for i in plist):
+    if not all(isinstance(i, int) or
+               (isinstance(i, str) and (not i or i.isdigit())) for i in plist):
          raise UsageError("Parameter 'excluded' only takes place IDs.")
  
-    return [int(id) for id in plist if id]
+    return [int(id) for id in plist if id] or [0]
  
  
  def format_categories(categories: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
diff --git a/nominatim/api/v1/server_glue.py b/nominatim/api/v1/server_glue.py

index 43cc6e56fb30b0f66d0548528ce9b4aa6a60d372..865e13318c61732f2f29f04a77fee3f1f4c28344 100644 (file)
--- a/nominatim/api/v1/server_glue.py
+++ b/nominatim/api/v1/server_glue.py
@@ -185,7 +185,7 @@ class ASGIAdaptor(abc.ABC):
          """ Return the accepted languages.
          """
          return self.get('accept-language')\
-               or self.get_header('http_accept_language')\
+               or self.get_header('accept-language')\
                 or self.config().DEFAULT_LANGUAGE
  
  
diff --git a/test/python/api/search/test_token_assignment.py b/test/python/api/search/test_token_assignment.py

index f78d5430e350089c155a19f648c416bb4c9533c4..dc123403ab24185aa78e59d842cecb0bce48e296 100644 (file)
--- a/test/python/api/search/test_token_assignment.py
+++ b/test/python/api/search/test_token_assignment.py
@@ -253,7 +253,7 @@ def test_postcode_with_designation():
                     (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
  
      check_assignments(yield_token_assignments(q),
-                      TokenAssignment(name=TokenRange(1, 2),
+                      TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
                                        postcode=TokenRange(0, 1)),
                        TokenAssignment(postcode=TokenRange(0, 1),
                                        address=[TokenRange(1, 2)]))
@@ -266,7 +266,7 @@ def test_postcode_with_designation_backwards():
      check_assignments(yield_token_assignments(q),
                        TokenAssignment(name=TokenRange(0, 1),
                                        postcode=TokenRange(1, 2)),
-                      TokenAssignment(postcode=TokenRange(1, 2),
+                      TokenAssignment(penalty=0.1, postcode=TokenRange(1, 2),
                                        address=[TokenRange(0, 1)]))
  
  
diff --git a/test/python/api/test_server_glue_v1.py b/test/python/api/test_server_glue_v1.py

index 538d91f155eaa78357c318521e3f9a37bca4152c..a731e72034df09c0dadfc985d8057ff656bf6b97 100644 (file)
--- a/test/python/api/test_server_glue_v1.py
+++ b/test/python/api/test_server_glue_v1.py
@@ -123,7 +123,7 @@ def test_accepted_languages_from_param():
  
  
  def test_accepted_languages_from_header():
-    a = FakeAdaptor(headers={'http_accept_language': 'de'})
+    a = FakeAdaptor(headers={'accept-language': 'de'})
      assert a.get_accepted_languages() == 'de'
  
  
@@ -135,13 +135,13 @@ def test_accepted_languages_from_default(monkeypatch):
  
  def test_accepted_languages_param_over_header():
      a = FakeAdaptor(params={'accept-language': 'de'},
-                    headers={'http_accept_language': 'en'})
+                    headers={'accept-language': 'en'})
      assert a.get_accepted_languages() == 'de'
  
  
  def test_accepted_languages_header_over_default(monkeypatch):
      monkeypatch.setenv('NOMINATIM_DEFAULT_LANGUAGE', 'en')
-    a = FakeAdaptor(headers={'http_accept_language': 'de'})
+    a = FakeAdaptor(headers={'accept-language': 'de'})
      assert a.get_accepted_languages() == 'de'
  
  
@@ -197,14 +197,14 @@ def test_raise_error_during_debug():
      loglib.log().section('Ongoing')
  
      with pytest.raises(FakeError) as excinfo:
-        a.raise_error('bad state')
+        a.raise_error('badstate')
  
      content = ET.fromstring(excinfo.value.msg)
  
      assert content.tag == 'html'
  
      assert '>Ongoing<' in excinfo.value.msg
-    assert 'bad state' in excinfo.value.msg
+    assert 'badstate' in excinfo.value.msg
  
  
  # ASGIAdaptor.build_response
author	Sarah Hoffmann <lonvia@denofr.de>
	Tue, 20 Jun 2023 14:13:20 +0000 (16:13 +0200)
committer	GitHub <noreply@github.com>
	Tue, 20 Jun 2023 14:13:20 +0000 (16:13 +0200)
nominatim/api/logging.py		patch \| blob \| history
nominatim/api/search/db_search_builder.py		patch \| blob \| history
nominatim/api/search/db_searches.py		patch \| blob \| history
nominatim/api/search/geocoder.py		patch \| blob \| history
nominatim/api/search/token_assignment.py		patch \| blob \| history
nominatim/api/types.py		patch \| blob \| history
nominatim/api/v1/server_glue.py		patch \| blob \| history
test/python/api/search/test_token_assignment.py		patch \| blob \| history
test/python/api/test_server_glue_v1.py		patch \| blob \| history