]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Fri, 16 Nov 2018 23:11:29 +0000 (00:11 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Fri, 16 Nov 2018 23:11:29 +0000 (00:11 +0100)
37 files changed:
CMakeLists.txt
Vagrantfile
data-sources/us-tiger/README.md [new file with mode: 0644]
data-sources/us-tiger/convert.sh [new file with mode: 0755]
data-sources/us-tiger/tiger_address_convert.py [moved from utils/tigerAddressImport.py with 100% similarity]
data-sources/us-tiger/tiger_county_fips.json [moved from utils/tiger_county_fips.json with 100% similarity]
docs/CMakeLists.txt
docs/admin/Import-and-Update.md
docs/data-sources/overview.md [new file with mode: 0644]
docs/mkdocs.yml
lib/DatabaseError.php [new file with mode: 0644]
lib/Geocode.php
lib/Result.php
lib/SearchDescription.php
lib/init-website.php
lib/lib.php
lib/setup/SetupClass.php
lib/template/error-html.php [new file with mode: 0644]
lib/template/error-json.php [new file with mode: 0644]
lib/template/error-xml.php [new file with mode: 0644]
test/bdd/api/errors/formats.feature [new file with mode: 0644]
test/bdd/api/lookup/simple.feature
test/bdd/api/reverse/queries.feature
test/bdd/api/search/simple.feature
test/bdd/steps/queries.py
test/php/Nominatim/AddressDetailsTest.php
test/php/Nominatim/DatabaseErrorTest.php [new file with mode: 0644]
test/php/Nominatim/LibTest.php
test/php/Nominatim/StatusTest.php
test/php/Nominatim/TokenListTest.php
test/php/bootstrap.php
utils/imports.php [deleted file]
vagrant/Install-on-Ubuntu-18-nginx.sh [new file with mode: 0755]
website/details.php
website/lookup.php
website/reverse.php
website/search.php

index 0c6d4094ed7eaadd78d42b7898e33793d7e72450..7c5763fd47b678f03f7ac7f998c04c6b394c8023 100644 (file)
@@ -113,7 +113,6 @@ set(CUSTOMFILES
     website/taginfo.json
     utils/blocks.php
     utils/country_languages.php
-    utils/imports.php
     utils/importWikipedia.php
     utils/export.php
     utils/query.php
index bd610e7e756fda910b69da6838cc6a398907c80f..4740f87944cc81f4b7468348e6de21541c380574 100644 (file)
@@ -23,6 +23,15 @@ Vagrant.configure("2") do |config|
       end
   end
 
+  config.vm.define "ubuntu18nginx" do |sub|
+      sub.vm.box = "bento/ubuntu-18.04"
+      sub.vm.provision :shell do |s|
+        s.path = "vagrant/Install-on-Ubuntu-18-nginx.sh"
+        s.privileged = false
+        s.args = [checkout]
+      end
+  end
+
   config.vm.define "ubuntu16" do |sub|
       sub.vm.box = "bento/ubuntu-16.04"
       sub.vm.provision :shell do |s|
diff --git a/data-sources/us-tiger/README.md b/data-sources/us-tiger/README.md
new file mode 100644 (file)
index 0000000..e75a9ef
--- /dev/null
@@ -0,0 +1,29 @@
+# US TIGER address data
+
+Convert [TIGER](https://www.census.gov/geo/maps-data/data/tiger.html)/Line dataset of the US Census Bureau to SQL files which can be imported by Nominatim. The created tables in the Nominatim database are separate from OpenStreetMap tables and get queried at search time separately.
+
+The dataset gets updated once per year. Downloading is prown to be slow (can take a full day) and converting them can take hours as well.
+
+Replace '2018' with the current year throughout.
+
+  1. Install the GDAL library and python bindings and the unzip tool
+
+        # Ubuntu:
+        sudo apt-get install python-gdal unzip
+        # CentOS:
+        sudo yum install gdal-python unzip
+
+  2. Get the TIGER 2018 data. You will need the EDGES files
+     (3,233 zip files, 11GB total).
+
+         wget -r ftp://ftp2.census.gov/geo/tiger/TIGER2018/EDGES/
+
+  3. Convert the data into SQL statements. Adjust the file paths in the scripts as needed
+
+        cd data-sources/us-tiger
+        ./convert.sh <input-path> <output-path>
+        
+  4. Maybe: package the created files
+  
+        tar -czf tiger2018-nominatim-preprocessed.tar.gz tiger
+        
\ No newline at end of file
diff --git a/data-sources/us-tiger/convert.sh b/data-sources/us-tiger/convert.sh
new file mode 100755 (executable)
index 0000000..b94017e
--- /dev/null
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+INPATH=$1
+OUTPATH=$2
+
+if [[ ! -d "$INPATH" ]]; then
+    echo "input path does not exist"
+    exit 1
+fi
+
+if [[ ! -d "$OUTPATH" ]]; then
+    echo "output path does not exist"
+    exit 1
+fi
+
+INREGEX='_([0-9]{5})_edges.zip'
+WORKPATH="$OUTPATH/tmp-workdir/"
+mkdir -p "$WORKPATH"
+
+
+
+INFILES=($INPATH/*.zip)
+echo "Found ${#INFILES[*]} files."
+
+for F in ${INFILES[*]}; do
+    # echo $F
+
+    if [[ "$F" =~ $INREGEX ]]; then
+        COUNTYID=${BASH_REMATCH[1]}
+        SHAPEFILE="$WORKPATH/$(basename $F '.zip').shp"
+        SQLFILE="$OUTPATH/$COUNTYID.sql"
+
+        unzip -o -q -d "$WORKPATH" "$F"
+        if [[ ! -e "$SHAPEFILE" ]]; then
+            echo "Unzip failed. $SHAPEFILE not found."
+            exit 1
+        fi
+
+        ./tiger_address_convert.py "$SHAPEFILE" "$SQLFILE"
+
+        rm $WORKPATH/*
+    fi
+done
+
+OUTFILES=($OUTPATH/*.sql)
+echo "Wrote ${#OUTFILES[*]} files."
+
+rmdir $WORKPATH
index cbe91b91d355e90416b0aa30be7d096cec010b25..68af5429257b2501858f0b9fce0ccdd02ce02160 100644 (file)
@@ -10,8 +10,10 @@ ADD_CUSTOM_TARGET(doc
    COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/admin ${CMAKE_CURRENT_BINARY_DIR}/admin
    COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/develop ${CMAKE_CURRENT_BINARY_DIR}/develop
    COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/api ${CMAKE_CURRENT_BINARY_DIR}/api
+   COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/data-sources ${CMAKE_CURRENT_BINARY_DIR}/data-sources
    COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/index.md ${CMAKE_CURRENT_BINARY_DIR}/index.md
    COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/extra.css ${CMAKE_CURRENT_BINARY_DIR}/extra.css
+   COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/us-tiger/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/US-Tiger.md
    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-7.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-7.md
    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-16.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-16.md
    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-18.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-18.md
index 731ff8faee6bda0fb480b53e62fdceb7052b6bf9..847aa37d8eb98956d92b4789d0c7a976ada7df96 100644 (file)
@@ -101,52 +101,34 @@ Note that this command downloads the phrases from the wiki link above.
 
 ## Installing Tiger housenumber data for the US
 
-Nominatim is able to use the official TIGER address set to complement the
-OSM house number data in the US. You can add TIGER data to your own Nominatim
-instance by following these steps:
+Nominatim is able to use the official [TIGER](https://www.census.gov/geo/maps-data/data/tiger.html)
+address set to complement the OSM house number data in the US. You can add
+TIGER data to your own Nominatim instance by following these steps. The
+entire US adds about 10GB to your database.
 
-  1. Install the GDAL library and python bindings and the unzip tool
-
-       * Ubuntu: `sudo apt-get install python-gdal unzip`
-       * CentOS: `sudo yum install gdal-python unzip`
-
-  2. Get preprocessed TIGER 2017 data and unpack it into the
+  1. Get preprocessed TIGER 2018 data and unpack it into the
      data directory in your Nominatim sources:
 
         cd Nominatim/data
-        wget https://nominatim.org/data/tiger2017-nominatim-preprocessed.tar.gz
-        tar xf tiger2017-nominatim-preprocessed.tar.gz
+        wget https://nominatim.org/data/tiger2018-nominatim-preprocessed.tar.gz
+        tar xf tiger2018-nominatim-preprocessed.tar.gz
+
+    `data-source/us-tiger/README.md` explains how the data got preprocessed.
 
-  3. Import the data into your Nominatim database: 
+  2. Import the data into your Nominatim database: 
 
         ./utils/setup.php --import-tiger-data
 
-  4. Enable use of the Tiger data in your `settings/local.php` by adding:
+  3. Enable use of the Tiger data in your `settings/local.php` by adding:
 
          @define('CONST_Use_US_Tiger_Data', true);
 
-  5. Apply the new settings:
+  4. Apply the new settings:
 
 ```sh
     ./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
 ```
 
-The entire US adds about 10GB to your database.
-
-You can also process the data from the original TIGER data to create the
-SQL files, Nominatim needs for the import:
-
-  1. Get the TIGER 2017 data. You will need the EDGES files
-     (3,234 zip files, 11GB total).
-
-         wget -r ftp://ftp2.census.gov/geo/tiger/TIGER2017/EDGES/
-
-  2. Convert the data into SQL statements: 
-
-         ./utils/imports.php --parse-tiger <tiger edge data directory>
-
-Be warned that this can take quite a long time. After this process is finished,
-the same preprocessed files as above are available in `data/tiger`.
 
 ## Updates
 
diff --git a/docs/data-sources/overview.md b/docs/data-sources/overview.md
new file mode 100644 (file)
index 0000000..a6dc0db
--- /dev/null
@@ -0,0 +1,4 @@
+# Additional Data Sources
+
+This guide explains how data sources other than OpenStreetMap mentioned in
+the install instructions got obtained and converted.
index b620decf19bbed792e0aa9b15f4c450d5e7a4d69..1a690e7b096427eca08d387123c8559758fe6375 100644 (file)
@@ -20,6 +20,9 @@ pages:
         - 'Troubleshooting' : 'admin/Faq.md'
     - 'Developers Guide':
         - 'Overview' : 'develop/overview.md'
+    - 'External Data Sources':
+        - 'Overview' : 'data-sources/overview.md'
+        - 'US Census (Tiger)': 'data-sources/US-Tiger.md'
     - 'Appendix':
         - 'Installation on CentOS 7' : 'appendix/Install-on-Centos-7.md'
         - 'Installation on Ubuntu 16' : 'appendix/Install-on-Ubuntu-16.md'
diff --git a/lib/DatabaseError.php b/lib/DatabaseError.php
new file mode 100644 (file)
index 0000000..2df331b
--- /dev/null
@@ -0,0 +1,32 @@
+<?php
+
+namespace Nominatim;
+
+class DatabaseError extends \Exception
+{
+
+    public function __construct($message, $code = 500, Exception $previous = null, $oSql)
+    {
+        parent::__construct($message, $code, $previous);
+        $this->oSql = $oSql;
+    }
+
+    public function __toString()
+    {
+        return __CLASS__ . ": [{$this->code}]: {$this->message}\n";
+    }
+
+    public function getSqlError()
+    {
+        return $this->oSql->getMessage();
+    }
+
+    public function getSqlDebugDump()
+    {
+        if (CONST_Debug) {
+            return var_export($this->oSql, true);
+        } else {
+            return $this->oSql->getUserInfo();
+        }
+    }
+}
index 1dd5a0e5ea3dd484d963d362959d2aafeae369c1..5343873fb9d59bdd8987954f7f20692bc0866be3 100644 (file)
@@ -546,7 +546,6 @@ class Geocode
         // Do we have anything that looks like a lat/lon pair?
         $sQuery = $oCtx->setNearPointFromQuery($sQuery);
 
-        $aResults = array();
         if ($sQuery || $this->aStructuredQuery) {
             // Start with a single blank search
             $aSearches = array(new SearchDescription($oCtx));
@@ -746,8 +745,10 @@ class Geocode
             // Start the search process
             $iGroupLoop = 0;
             $iQueryLoop = 0;
+            $aNextResults = array();
             foreach ($aGroupedSearches as $iGroupedRank => $aSearches) {
                 $iGroupLoop++;
+                $aResults = $aNextResults;
                 foreach ($aSearches as $oSearch) {
                     $iQueryLoop++;
 
@@ -767,6 +768,23 @@ class Geocode
                     if ($iQueryLoop > 20) break;
                 }
 
+                if (!empty($aResults)) {
+                    $aSplitResults = Result::splitResults($aResults);
+                    Debug::printVar('Split results', $aSplitResults);
+                    if ($iGroupLoop <= 4 && empty($aSplitResults['tail'])
+                        && reset($aSplitResults['head'])->iResultRank > 0) {
+                        // Haven't found an exact match for the query yet.
+                        // Therefore add result from the next group level.
+                        $aNextResults = $aSplitResults['head'];
+                        foreach ($aNextResults as $oRes) {
+                            $oRes->iResultRank--;
+                        }
+                        $aResults = array();
+                    } else {
+                        $aResults = $aSplitResults['head'];
+                    }
+                }
+
                 if (!empty($aResults) && ($this->iMinAddressRank != 0 || $this->iMaxAddressRank != 30)) {
                     // Need to verify passes rank limits before dropping out of the loop (yuk!)
                     // reduces the number of place ids, like a filter
index d69ac62f1f7b28cc731a49146b830a59f0323f05..5c39f473746b1dffd7d9ffd93d7343a73b59c750 100644 (file)
@@ -68,4 +68,32 @@ class Result
 
         return $sHousenumbers;
     }
+
+    /**
+     * Split a result array into highest ranked result and the rest
+     *
+     * @param object[] $aResults  List of results to split.
+     *
+     * @return array[]
+     */
+    public static function splitResults($aResults)
+    {
+        $aHead = array();
+        $aTail = array();
+        $iMinRank = 10000;
+
+        foreach ($aResults as $oRes) {
+            if ($oRes->iResultRank < $iMinRank) {
+                $aTail = array_merge($aTail, $aHead);
+                $aHead = array($oRes->iId => $oRes);
+                $iMinRank = $oRes->iResultRank;
+            } elseif ($oRes->iResultRank == $iMinRank) {
+                $aHead[$oRes->iId] = $oRes;
+            } else {
+                $aTail[$oRes->iId] = $oRes;
+            }
+        }
+
+        return array('head' => $aHead, 'tail' => $aTail);
+    }
 }
index ec14e54600ee11f106d83b54ab6510b9b98be708..204a735885f8d3222e6ab1b5236e91f7f87a712f 100644 (file)
@@ -453,6 +453,9 @@ class SearchDescription
 
                 if (empty($aResults) && $this->looksLikeFullAddress()) {
                     $aResults = $aNamedPlaceIDs;
+                    foreach ($aResults as $oRes) {
+                        $oRes->iResultRank++;
+                    }
                 }
             }
 
@@ -469,16 +472,13 @@ class SearchDescription
             if ($sPlaceIds) {
                 $sSQL = 'SELECT place_id FROM placex';
                 $sSQL .= ' WHERE place_id in ('.$sPlaceIds.')';
-                $sSQL .= " AND postcode = '".$this->sPostcode."'";
+                $sSQL .= " AND postcode != '".$this->sPostcode."'";
                 Debug::printSQL($sSQL);
                 $aFilteredPlaceIDs = chksql($oDB->getCol($sSQL));
                 if ($aFilteredPlaceIDs) {
-                    $aNewResults = array();
                     foreach ($aFilteredPlaceIDs as $iPlaceId) {
-                        $aNewResults[$iPlaceId] = $aResults[$iPlaceId];
+                        $aResults[$iPlaceId]->iResultRank++;
                     }
-                    $aResults = $aNewResults;
-                    Debug::printVar('Place IDs after postcode filtering', $aResults);
                 }
             }
         }
index ca5214da07fd50530ae5abec74bb3cba1afa5e1c..ae2a5d360ae57dd439b29b46507bff99afdc0432 100644 (file)
@@ -2,6 +2,7 @@
 
 require_once('init.php');
 require_once('ParameterParser.php');
+require_once('DatabaseError.php');
 require_once(CONST_Debug ? 'DebugHtml.php' : 'DebugNone.php');
 
 /***************************************************************************
@@ -15,74 +16,51 @@ function chksql($oSql, $sMsg = 'Database request failed')
 {
     if (!PEAR::isError($oSql)) return $oSql;
 
-    header('HTTP/1.0 500 Internal Server Error');
-    header('Content-type: text/html; charset=utf-8');
-
-    $sSqlError = $oSql->getMessage();
-
-    echo <<<INTERNALFAIL
-<html>
-  <head><title>Internal Server Error</title></head>
-  <body>
-    <h1>Internal Server Error</h1>
-    <p>Nominatim has encountered an internal error while accessing the database.
-       This may happen because the database is broken or because of a bug in
-       the software. If you think it is a bug, feel free to report
-       it over on <a href="https://github.com/openstreetmap/Nominatim/issues">
-       Github</a>. Please include the URL that caused the problem and the
-       complete error details below.</p>
-    <p><b>Message:</b> $sMsg</p>
-    <p><b>SQL Error:</b> $sSqlError</p>
-    <p><b>Details:</b> <pre>
-INTERNALFAIL;
-
-    if (CONST_Debug) {
-        var_dump($oSql);
-    } else {
-        echo "<pre>\n".$oSql->getUserInfo().'</pre>';
-    }
+    throw new Nominatim\DatabaseError($sMsg, 500, null, $oSql);
+}
+
 
-    echo '</pre></p></body></html>';
-    exit;
+function userError($sMsg)
+{
+    throw new Exception($sMsg, 400);
 }
 
-function failInternalError($sError, $sSQL = false, $vDumpVar = false)
+
+function exception_handler_html($exception)
 {
-    header('HTTP/1.0 500 Internal Server Error');
-    header('Content-type: text/html; charset=utf-8');
-    echo '<html><body><h1>Internal Server Error</h1>';
-    echo '<p>Nominatim has encountered an internal error while processing your request. This is most likely because of a bug in the software.</p>';
-    echo '<p><b>Details:</b> '.$sError,'</p>';
-    echo '<p>Feel free to file an issue on <a href="https://github.com/openstreetmap/Nominatim/issues">Github</a>. ';
-    echo 'Please include the error message above and the URL you used.</p>';
-    if (CONST_Debug) {
-        echo '<hr><h2>Debugging Information</h2><br>';
-        if ($sSQL) {
-            echo '<h3>SQL query</h3><code>'.$sSQL.'</code>';
-        }
-        if ($vDumpVar) {
-            echo '<h3>Result</h3> <code>';
-            var_dump($vDumpVar);
-            echo '</code>';
-        }
-    }
-    echo "\n</body></html>\n";
-    exit;
+    http_response_code($exception->getCode());
+    header('Content-type: text/html; charset=UTF-8');
+    include(CONST_BasePath.'/lib/template/error-html.php');
 }
 
+function exception_handler_json($exception)
+{
+    http_response_code($exception->getCode());
+    header('Content-type: application/json; charset=utf-8');
+    include(CONST_BasePath.'/lib/template/error-json.php');
+}
 
-function userError($sError)
+function exception_handler_xml($exception)
 {
-    header('HTTP/1.0 400 Bad Request');
-    header('Content-type: text/html; charset=utf-8');
-    echo '<html><body><h1>Bad Request</h1>';
-    echo '<p>Nominatim has encountered an error with your request.</p>';
-    echo '<p><b>Details:</b> '.$sError.'</p>';
-    echo '<p>If you feel this error is incorrect feel file an issue on <a href="https://github.com/openstreetmap/Nominatim/issues">Github</a>. ';
-    echo 'Please include the error message above and the URL you used.</p>';
-    echo "\n</body></html>\n";
-    exit;
+    http_response_code($exception->getCode());
+    header('Content-type: text/xml; charset=utf-8');
+    echo '<?xml version="1.0" encoding="UTF-8" ?>'."\n";
+    include(CONST_BasePath.'/lib/template/error-xml.php');
+}
+
+
+function set_exception_handler_by_format($sFormat = 'html')
+{
+    if ($sFormat == 'html') {
+        set_exception_handler('exception_handler_html');
+    } elseif ($sFormat == 'xml') {
+        set_exception_handler('exception_handler_xml');
+    } else {
+        set_exception_handler('exception_handler_json');
+    }
 }
+// set a default
+set_exception_handler_by_format();
 
 
 /***************************************************************************
@@ -96,6 +74,6 @@ if (CONST_NoAccessControl) {
         header('Access-Control-Allow-Headers: '.$_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']);
     }
 }
-if ($_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit;
+if (isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit;
 
 if (CONST_Debug) header('Content-type: text/html; charset=utf-8');
index 0f87c37eca00c1f4f6077ef0452218ed25030abf..37ef5e190ba2d405447abe736cc1cc3feac12daf 100644 (file)
@@ -61,6 +61,13 @@ function byImportance($a, $b)
 
 function javascript_renderData($xVal, $iOptions = 0)
 {
+    $sCallback = isset($_GET['json_callback']) ? $_GET['json_callback'] : '';
+    if ($sCallback && !preg_match('/^[$_\p{L}][$_\p{L}\p{Nd}.[\]]*$/u', $sCallback)) {
+        // Unset, we call javascript_renderData again during exception handling
+        unset($_GET['json_callback']);
+        throw new Exception('Invalid json_callback value', 400);
+    }
+
     $iOptions |= JSON_UNESCAPED_UNICODE;
     if (isset($_GET['pretty']) && in_array(strtolower($_GET['pretty']), array('1', 'true'))) {
         $iOptions |= JSON_PRETTY_PRINT;
@@ -68,16 +75,12 @@ function javascript_renderData($xVal, $iOptions = 0)
 
     $jsonout = json_encode($xVal, $iOptions);
 
-    if (!isset($_GET['json_callback'])) {
+    if ($sCallback) {
+        header('Content-Type: application/javascript; charset=UTF-8');
+        echo $_GET['json_callback'].'('.$jsonout.')';
+    } else {
         header('Content-Type: application/json; charset=UTF-8');
         echo $jsonout;
-    } else {
-        if (preg_match('/^[$_\p{L}][$_\p{L}\p{Nd}.[\]]*$/u', $_GET['json_callback'])) {
-            header('Content-Type: application/javascript; charset=UTF-8');
-            echo $_GET['json_callback'].'('.$jsonout.')';
-        } else {
-            header('HTTP/1.0 400 Bad Request');
-        }
     }
 }
 
index bd53260eb92d97fa040e595442043d6374998da2..5e5b16d6c04f08f68f648ba06d8f5f36d3516e51 100755 (executable)
@@ -154,10 +154,11 @@ class SetupFunctions
         $this->pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql.gz');
         $this->pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_table.sql');
 
-        if (file_exists(CONST_BasePath.'/data/gb_postcode_data.sql.gz')) {
-            $this->pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_data.sql.gz');
+        $sPostcodeFilename = CONST_BasePath.'/data/gb_postcode_data.sql.gz';
+        if (file_exists($sPostcodeFilename)) {
+            $this->pgsqlRunScriptFile($sPostcodeFilename);
         } else {
-            warn('external UK postcode table not found.');
+            warn('optional external UK postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
         }
 
         if (CONST_Use_Extra_US_Postcodes) {
diff --git a/lib/template/error-html.php b/lib/template/error-html.php
new file mode 100644 (file)
index 0000000..11a1846
--- /dev/null
@@ -0,0 +1,60 @@
+<?php
+
+    $title = 'Internal Server Error';
+    if ( $exception->getCode() == 400 ) {
+        $title = 'Bad Request';
+    }
+?>
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <style>
+        em { font-weight: bold; font-family: monospace; color: #e00404; background-color: #ffeaea; }
+    </style>
+</head>
+<body>
+    <h1><?php echo $title ?></h1>
+    
+    <?php if (get_class($exception) == 'Nominatim\DatabaseError') { ?>
+
+        <p>Nominatim has encountered an internal error while accessing the database.
+           This may happen because the database is broken or because of a bug in
+           the software.</p>
+
+    <?php } else { ?>
+
+        <p>Nominatim has encountered an error with your request.</p>
+
+    <?php } ?>
+
+
+    <h3>Details</h3>
+
+    <?php echo $exception->getMessage() ?>
+
+    <?php if (CONST_Debug) { ?>
+        <p>
+        Exception <em><?php echo get_class($exception) ?></em> thrown in <em><?php echo $exception->getFile() . '('. $exception->getLine() . ')' ?></em>.
+
+        <?php if (get_class($exception) == 'Nominatim\DatabaseError') { ?>
+
+            <h3>SQL Error</h3>
+            <em><?php echo $exception->getSqlError() ?></em>
+
+            <pre><?php echo $exception->getSqlDebugDump() ?></pre>
+
+        <?php } ?>
+
+        <h3>Stack trace</h3>
+        <pre><?php echo $exception->getTraceAsString() ?></pre>
+
+    <?php } ?>
+
+    <p>
+        If you feel this error is incorrect feel file an issue on
+        <a href="https://github.com/openstreetmap/Nominatim/issues">Github</a>.
+
+        Please include the error message above and the URL you used.
+    </p>
+</body>
+</html>
diff --git a/lib/template/error-json.php b/lib/template/error-json.php
new file mode 100644 (file)
index 0000000..81caa71
--- /dev/null
@@ -0,0 +1,11 @@
+<?php
+    $error = array(
+              'code' => $exception->getCode(),
+              'message' => $exception->getMessage()
+    );
+
+    if (CONST_Debug) {
+        $error['details'] = $exception->getFile() . '('. $exception->getLine() . ')';
+    }
+
+    echo javascript_renderData(array('error' => $error));
diff --git a/lib/template/error-xml.php b/lib/template/error-xml.php
new file mode 100644 (file)
index 0000000..a21ac19
--- /dev/null
@@ -0,0 +1,7 @@
+<error>
+    <code><?php echo $exception->getCode() ?></code>
+    <message><?php echo $exception->getMessage() ?></message>
+    <?php if (CONST_Debug) { ?>
+    <details><?php echo $exception->getFile() . '('. $exception->getLine() . ')' ?></details>
+    <?php } ?>
+</error>
\ No newline at end of file
diff --git a/test/bdd/api/errors/formats.feature b/test/bdd/api/errors/formats.feature
new file mode 100644 (file)
index 0000000..8a8e656
--- /dev/null
@@ -0,0 +1,13 @@
+@APIDB
+Feature: Places by osm_type and osm_id Tests
+    Simple tests for errors in various response formats.
+
+    Scenario Outline: Force error by providing too many ids
+        When sending <format> lookup query for N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15,N16,N17,N18,N19,N20,N21,N22,N23,N24,N25,N26,N27,N28,N29,N30,N31,N32,N33,N34,N35,N36,N37,N38,N39,N40,N41,N42,N43,N44,N45,N46,N47,N48,N49,N50,N51
+        Then a <format> user error is returned
+
+    Examples:
+        | format  |
+        | xml     |
+        | json    |
+        | geojson |
index e29812e0123b15b4364afd506f1029ab65a744c3..660598db17352acb962ae94c2d7de9025397f253 100644 (file)
@@ -1,6 +1,6 @@
 @APIDB
 Feature: Places by osm_type and osm_id Tests
-    Simple tests for internal server errors and response format.
+    Simple tests for response format.
 
     Scenario Outline: address lookup for existing node, way, relation
         When sending <format> lookup query for N3284625766,W6065798,,R123924,X99,N0
index 1973f0b94aea217d8bdd6a28e66e1dced07dd96b..8fbe552c6255a928758691d6ec8f0ca577394485 100644 (file)
@@ -10,7 +10,7 @@ Feature: Reverse geocoding
           | way      | place    | house |
         And result addresses contain
           | house_number | road            | postcode | country_code |
-          | 906          | West 1st Street | 57274    | us |
+          | 909          | West 1st Street | 57274    | us |
 
     @Tiger
     Scenario: No TIGER house number for zoom < 18
index ca441258784c2fde1f468883d77f768fb833b145..c12635972dd29a48afb0cd9ed8f72b871da0334a 100644 (file)
@@ -194,7 +194,7 @@ Feature: Simple Tests
         When sending json search query "Tokyo"
             | param        | value |
             |json_callback | <data> |
-        Then a HTTP 400 is returned
+        Then a json user error is returned
 
     Examples:
       | data |
index df34b5cc0696fc17d156a5500fb4047cb03b5b8f..4d59b923ed3c39b9fee9f7556b6fe31ea7c49417 100644 (file)
@@ -494,6 +494,18 @@ def step_impl(context, fmt):
     context.execute_steps("Then a HTTP 200 is returned")
     eq_(context.response.format, fmt)
 
+@then(u'a (?P<fmt>\w+) user error is returned')
+def check_page_error(context, fmt):
+    context.execute_steps("Then a HTTP 400 is returned")
+    eq_(context.response.format, fmt)
+
+    if fmt == 'html':
+        assert_is_not_none(re.search(r'<html( |>).+</html>', context.response.page, re.DOTALL))
+    elif fmt == 'xml':
+        assert_is_not_none(re.search(r'<error>.+</error>', context.response.page, re.DOTALL))
+    else:
+        assert_is_not_none(re.search(r'({"error":)', context.response.page, re.DOTALL))
+
 @then(u'result header contains')
 def check_header_attr(context):
     for line in context.table:
index 62faf1a4b2d7caa27b0dd9cbcc819d65f2dc0457..b29d908862346cc1c03f3dd819b410f10b53f66a 100644 (file)
@@ -2,14 +2,10 @@
 
 namespace Nominatim;
 
+require_once(CONST_BasePath.'/lib/init-website.php');
 require_once(CONST_BasePath.'/lib/AddressDetails.php');
 
 
-function chksql($oSql, $sMsg = 'Database request failed')
-{
-    return $oSql;
-}
-
 class AddressDetailsTest extends \PHPUnit\Framework\TestCase
 {
 
diff --git a/test/php/Nominatim/DatabaseErrorTest.php b/test/php/Nominatim/DatabaseErrorTest.php
new file mode 100644 (file)
index 0000000..25b4aa0
--- /dev/null
@@ -0,0 +1,44 @@
+<?php
+
+namespace Nominatim;
+
+require_once(CONST_BasePath.'/lib/init-website.php');
+require_once(CONST_BasePath.'/lib/DatabaseError.php');
+
+class DatabaseErrorTest extends \PHPUnit\Framework\TestCase
+{
+
+    public function testSqlMessage()
+    {
+        $oSqlStub = $this->getMockBuilder(\DB_Error::class)
+                    ->setMethods(array('getMessage'))
+                    ->getMock();
+
+        $oSqlStub->method('getMessage')
+                ->willReturn('Unknown table.');
+
+        $oErr = new DatabaseError('Sql error', 123, null, $oSqlStub);
+        $this->assertEquals('Sql error', $oErr->getMessage());
+        $this->assertEquals(123, $oErr->getCode());
+        $this->assertEquals('Unknown table.', $oErr->getSqlError());
+
+        // causes a circular reference warning during dump
+        // $this->assertRegExp('/Mock_DB_Error/', $oErr->getSqlDebugDump());
+    }
+
+    public function testSqlObjectDump()
+    {
+        $oErr = new DatabaseError('Sql error', 123, null, array('one' => 'two'));
+        $this->assertRegExp('/two/', $oErr->getSqlDebugDump());
+    }
+
+    public function testChksqlThrows()
+    {
+        $this->expectException(DatabaseError::class);
+        $this->expectExceptionMessage('My custom error message');
+        $this->expectExceptionCode(500);
+
+        $oDB = new \DB_Error;
+        $this->assertEquals(false, chksql($oDB, 'My custom error message'));
+    }
+}
index 2891388d371b83976a8b50ae9dce379b916e47df..dbf8feca712a4ecf7d5d401baaa8f157760ef306 100644 (file)
@@ -2,6 +2,9 @@
 
 namespace Nominatim;
 
+require_once(CONST_BasePath.'/lib/lib.php');
+require_once(CONST_BasePath.'/lib/ClassTypes.php');
+
 class LibTest extends \PHPUnit\Framework\TestCase
 {
 
index 4f21706ecaf9274ce74740d750025152dc2c133a..eb4ad68aae25b2d84f9e844416c40efdff946878 100644 (file)
@@ -2,6 +2,7 @@
 
 namespace Nominatim;
 
+require_once(CONST_BasePath.'/lib/db.php');
 require_once(CONST_BasePath.'/lib/Status.php');
 
 
index fa1331e87b17d187fc0ebbd29b15e9c3b9567703..4016a839860294eb05bb2167fe002e243453a1b5 100644 (file)
@@ -2,8 +2,8 @@
 
 namespace Nominatim;
 
-require_once(CONST_BasePath.'/lib/db.php');
-require_once(CONST_BasePath.'/lib/cmd.php');
+// require_once(CONST_BasePath.'/lib/db.php');
+// require_once(CONST_BasePath.'/lib/cmd.php');
 require_once(CONST_BasePath.'/lib/TokenList.php');
 
 
index 0d4759622ff06f0ec8d6e4d09e2a673e4eb06a38..d6968717b5b46bb699b1eb1486d9d1c1327f3194 100644 (file)
@@ -1,2 +1,4 @@
 <?php
     @define('CONST_BasePath', '../..');
+    @define('CONST_Debug', true);
+    @define('CONST_NoAccessControl', false);
diff --git a/utils/imports.php b/utils/imports.php
deleted file mode 100755 (executable)
index 9d1085f..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-#!@PHP_BIN@ -Cq
-<?php
-
-require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
-require_once(CONST_BasePath.'/lib/init-cmd.php');
-ini_set('memory_limit', '800M');
-
-$aCMDOptions
- = array(
-    'Create and setup nominatim search system',
-    array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
-    array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
-    array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
-
-    array('parse-tiger', '', 0, 1, 1, 1, 'realpath', 'Convert tiger edge files to nominatim sql import - datafiles from 2011 or later (source: edges directory of tiger data)'),
-   );
-getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
-
-
-if (isset($aCMDResult['parse-tiger'])) {
-    if (!file_exists(CONST_Tiger_Data_Path)) mkdir(CONST_Tiger_Data_Path);
-
-    $sTempDir = tempnam('/tmp', 'tiger');
-    unlink($sTempDir);
-    mkdir($sTempDir);
-
-    foreach (glob($aCMDResult['parse-tiger'].'/tl_20??_?????_edges.zip', 0) as $sImportFile) {
-        set_time_limit(30);
-        preg_match('#([0-9]{5})_(.*)#', basename($sImportFile), $aMatch);
-        $sCountyID = $aMatch[1];
-
-        echo 'Processing '.$sCountyID."...\n";
-        $sUnzipCmd = "unzip -d $sTempDir $sImportFile";
-        exec($sUnzipCmd);
-
-        $sShapeFilename = $sTempDir.'/'.basename($sImportFile, '.zip').'.shp';
-        $sSqlFilenameTmp = $sTempDir.'/'.$sCountyID.'.sql';
-        $sSqlFilename = CONST_Tiger_Data_Path.'/'.$sCountyID.'.sql';
-
-        if (!file_exists($sShapeFilename)) {
-            echo "Failed unzip ($sImportFile)\n";
-        } else {
-            $sParseCmd = CONST_BasePath.'/utils/tigerAddressImport.py '.$sShapeFilename.' '.$sSqlFilenameTmp;
-            exec($sParseCmd);
-            if (!file_exists($sSqlFilenameTmp)) {
-                echo "Failed parse ($sImportFile)\n";
-            } else {
-                copy($sSqlFilenameTmp, $sSqlFilename);
-            }
-        }
-        // Cleanup
-        foreach (glob($sTempDir.'/*') as $sTmpFile) {
-            unlink($sTmpFile);
-        }
-    }
-}
diff --git a/vagrant/Install-on-Ubuntu-18-nginx.sh b/vagrant/Install-on-Ubuntu-18-nginx.sh
new file mode 100755 (executable)
index 0000000..fdd7c46
--- /dev/null
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+#
+# This is variation of Install-on-Ubuntu.sh showcasing how to use the
+# nginx webserver instead of Apache2. We might eventually merge both
+# files. Right now expect this file to become outdated/unmaintained
+# over time.
+#
+# This file lacks many comments found in Install-on-Ubuntu.sh, you
+# should check that file first to get a basic understanding.
+#
+
+# hacks for broken vagrant box
+sudo rm -f /var/lib/dpkg/lock
+sudo update-locale LANG=en_US.UTF-8
+export APT_LISTCHANGES_FRONTEND=none
+export DEBIAN_FRONTEND=noninteractive
+
+    sudo apt-get update -qq
+    sudo apt-get install -y build-essential cmake g++ libboost-dev libboost-system-dev \
+                            libboost-filesystem-dev libexpat1-dev zlib1g-dev libxml2-dev\
+                            libbz2-dev libpq-dev libproj-dev \
+                            postgresql-server-dev-10 postgresql-10-postgis-2.4 \
+                            postgresql-contrib-10 \
+                            nginx php-fpm php php-pgsql php-pear php-db \
+                            php-intl git
+
+    export USERNAME=vagrant
+    export USERHOME=/home/vagrant
+
+    chmod a+x $USERHOME
+
+# Setting up PostgreSQL
+# ---------------------
+#
+# Tune the postgresql configuration, see same section in Install-on-Ubuntu.sh
+
+    sudo systemctl restart postgresql
+
+    sudo -u postgres createuser -s $USERNAME
+    sudo -u postgres createuser www-data
+
+#
+# Setting up the Nginx Webserver
+# -------------------------------
+#
+# You need to configure php-fpm to listen on a Unix socket. Then create Nginx
+# configuration to forward localhost:80 requests to that socket.
+#
+
+
+sudo tee /etc/php/7.2/fpm/pool.d/www.conf << EOF_PHP_FPM_CONF
+[www]
+; Comment out the tcp listener and add the unix socket
+;listen = 127.0.0.1:9000
+listen = /var/run/php7.2-fpm.sock
+
+; Ensure that the daemon runs as the correct user
+listen.owner = www-data
+listen.group = www-data
+listen.mode = 0666
+
+; Unix user of FPM processes
+user = www-data
+group = www-data
+
+; Choose process manager type (static, dynamic, ondemand)
+pm = ondemand
+pm.max_children = 5
+EOF_PHP_FPM_CONF
+
+
+
+
+sudo tee /etc/nginx/sites-available/default << EOF_NGINX_CONF
+server {
+    listen 80 default_server;
+    listen [::]:80 default_server;
+
+    root $USERHOME/build/website;
+    index search.php index.html;
+    location / {
+        try_files \$uri \$uri/ @php;
+    }
+
+    location @php {
+        fastcgi_param SCRIPT_FILENAME "\$document_root\$uri.php";
+        fastcgi_param PATH_TRANSLATED "\$document_root\$uri.php";
+        fastcgi_param QUERY_STRING    \$args;
+        fastcgi_pass unix:/var/run/php/php7.2-fpm.sock;
+        fastcgi_index index.php;
+        include fastcgi_params;
+    }
+
+    location ~ [^/]\.php(/|$) {
+        fastcgi_split_path_info ^(.+?\.php)(/.*)$;
+        if (!-f \$document_root\$fastcgi_script_name) {
+            return 404;
+        }
+        fastcgi_pass unix:/var/run/php7.2-fpm.sock;
+        fastcgi_index search.php;
+        include fastcgi.conf;
+    }
+}
+EOF_NGINX_CONF
+
+
+sudo sed -i 's:#.*::' /etc/nginx/sites-available/default
+
+
+#
+# Enable the configuration and restart Nginx
+#
+
+    sudo systemctl stop apache2 # just in case it's installed as well
+    sudo systemctl restart php7.2-fpm nginx
+
+# From here continue in the 'Installing Nominatim' section in
+# Install-on-Ubuntu.sh
+
index c9e86312c9cc63a8c9da6954db45b214c9adff4e..81d643f052fd4c1438bca9711533957701c6ea09 100755 (executable)
@@ -11,6 +11,7 @@ ini_set('memory_limit', '200M');
 $oParams = new Nominatim\ParameterParser();
 
 $sOutputFormat = $oParams->getSet('format', array('html', 'json'), 'html');
+set_exception_handler_by_format($sOutputFormat);
 
 $aLangPrefOrder = $oParams->getPreferredLanguages();
 $sLanguagePrefArraySQL = 'ARRAY['.join(',', array_map('getDBQuoted', $aLangPrefOrder)).']';
index f09506a4c0de7ae1aef97afa22b38a4aecc9cae1..ec2a3a8a30f97880a4871d54d8539186b4a19832 100755 (executable)
@@ -12,6 +12,7 @@ $oParams = new Nominatim\ParameterParser();
 
 // Format for output
 $sOutputFormat = $oParams->getSet('format', array('xml', 'json', 'geojson'), 'xml');
+set_exception_handler_by_format($sOutputFormat);
 
 // Preferred language
 $aLangPrefOrder = $oParams->getPreferredLanguages();
index 5f9268c680e31e222370b001b8f9edcdeb355134..075d4cf032c7678e8b643935b14c95293db67a27 100755 (executable)
@@ -13,6 +13,7 @@ $oParams = new Nominatim\ParameterParser();
 
 // Format for output
 $sOutputFormat = $oParams->getSet('format', array('html', 'xml', 'json', 'jsonv2', 'geojson', 'geocodejson'), 'xml');
+set_exception_handler_by_format($sOutputFormat);
 
 // Preferred language
 $aLangPrefOrder = $oParams->getPreferredLanguages();
index 0ebf1814bcb129fafc63477b637fe5a5c52832cc..0b678caa65994c16145ac18c6141e6c9f11d4793 100755 (executable)
@@ -27,6 +27,7 @@ if (CONST_Search_ReversePlanForAll
 
 // Format for output
 $sOutputFormat = $oParams->getSet('format', array('html', 'xml', 'json', 'jsonv2', 'geojson', 'geocodejson'), 'html');
+set_exception_handler_by_format($sOutputFormat);
 
 $sForcedGeometry = ($sOutputFormat == 'html') ? 'geojson' : null;
 $oGeocode->loadParamArray($oParams, $sForcedGeometry);