To run Nominatim via webservers like Apache or nginx, please read the
[Deployment chapter](Deployment.md).
-## Tuning the database
-
-Accurate word frequency information for search terms helps PostgreSQL's query
-planner to make the right decisions. Recomputing them can improve the performance
-of forward geocoding in particular under high load. To recompute word counts run:
-
-```sh
-nominatim refresh --word-counts
-```
-
-This will take a couple of hours for a full planet installation. You can
-also defer that step to a later point in time when you realise that
-performance becomes an issue. Just make sure that updates are stopped before
-running this function.
+## Adding search through category phrases
If you want to be able to search for places by their type through
[special phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
private $aName = array();
/// True if the name is rare enough to force index use on name.
private $bRareName = false;
+ /// True if the name requires to be accompanied by address terms.
+ private $bNameNeedsAddress = false;
/// List of word ids making up the address of the object.
private $aAddress = array();
/// List of word ids that appear in the name but should be ignored.
return false;
}
}
+ if ($this->bNameNeedsAddress && empty($this->aAddress)) {
+ return false;
+ }
return true;
}
{
$this->aName[$iId] = $iId;
$this->bRareName = $bRareName;
+ $this->bNameNeedsAddress = false;
}
/**
* @param integer iID ID of term to add.
* @param bool bSearchable Term should be used to search for result
* (i.e. term is not a stop word).
+ * @param bool bNeedsAddress True if the term is too unspecific to be used
+ * in a stand-alone search without an address
+ * to narrow down the search.
* @param integer iPhraseNumber Index of phrase, where the partial term
* appears.
*/
- public function addPartialNameToken($iId, $bSearchable, $iPhraseNumber)
+ public function addPartialNameToken($iId, $bSearchable, $bNeedsAddress, $iPhraseNumber)
{
+ if (empty($this->aName)) {
+ $this->bNameNeedsAddress = $bNeedsAddress;
+ } else {
+ $this->bNameNeedsAddress |= $bNeedsAddress;
+ }
if ($bSearchable) {
$this->aName[$iId] = $iId;
} else {
{
$this->aAddress = array_merge($this->aAddress, $this->aName);
$this->bRareName = false;
+ $this->bNameNeedsAddress = true;
$this->aName = array($iId => $iId);
$this->iNamePhrase = -1;
}
$oNewSearch->addPartialNameToken(
$this->iId,
$this->iSearchNameCount < CONST_Max_Word_Frequency,
+ $this->iSearchNameCount > CONST_Search_NameOnlySearchFrequencyThreshold,
$oPosition->getPhrase()
);
freeze.drop_update_tables(conn)
tokenizer.finalize_import(args.config)
+ LOG.warning('Recompute word counts')
+ tokenizer.update_statistics()
webdir = args.project_dir / 'website'
LOG.warning('Setup website at %s', webdir)
with connect(args.config.get_libpq_dsn()) as conn:
refresh.setup_website(webdir, args.config, conn)
- with connect(args.config.get_libpq_dsn()) as conn:
- SetupAll._set_database_date(conn)
- properties.set_property(conn, 'database_version',
- '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
+ SetupAll._set_database_date(args.config.get_libpq_dsn())
return 0
@staticmethod
- def _set_database_date(conn):
+ def _set_database_date(dsn):
""" Determine the database date and set the status accordingly.
"""
- try:
- dbdate = status.compute_database_date(conn)
- status.set_status(conn, dbdate)
- LOG.info('Database is at %s.', dbdate)
- except Exception as exc: # pylint: disable=broad-except
- LOG.error('Cannot determine date of database: %s', exc)
+ with connect(dsn) as conn:
+ try:
+ dbdate = status.compute_database_date(conn)
+ status.set_status(conn, dbdate)
+ LOG.info('Database is at %s.', dbdate)
+ except Exception as exc: # pylint: disable=broad-except
+ LOG.error('Cannot determine date of database: %s', exc)
+
+ properties.set_property(conn, 'database_version',
+ '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
""" Recompute frequencies for all name words.
"""
with connect(self.dsn) as conn:
- with conn.cursor() as cur:
- cur.drop_table("word_frequencies")
- LOG.info("Computing word frequencies")
- cur.execute("""CREATE TEMP TABLE word_frequencies AS
- SELECT unnest(name_vector) as id, count(*)
- FROM search_name GROUP BY id""")
- cur.execute("CREATE INDEX ON word_frequencies(id)")
- LOG.info("Update word table with recomputed frequencies")
- cur.execute("""UPDATE word
- SET info = info || jsonb_build_object('count', count)
- FROM word_frequencies WHERE word_id = id""")
- cur.drop_table("word_frequencies")
+ if conn.table_exists('search_name'):
+ with conn.cursor() as cur:
+ cur.drop_table("word_frequencies")
+ LOG.info("Computing word frequencies")
+ cur.execute("""CREATE TEMP TABLE word_frequencies AS
+ SELECT unnest(name_vector) as id, count(*)
+ FROM search_name GROUP BY id""")
+ cur.execute("CREATE INDEX ON word_frequencies(id)")
+ LOG.info("Update word table with recomputed frequencies")
+ cur.execute("""UPDATE word
+ SET info = info || jsonb_build_object('count', count)
+ FROM word_frequencies WHERE word_id = id""")
+ cur.drop_table("word_frequencies")
conn.commit()
""" Recompute the frequency of full words.
"""
with connect(self.dsn) as conn:
- with conn.cursor() as cur:
- cur.drop_table("word_frequencies")
- LOG.info("Computing word frequencies")
- cur.execute("""CREATE TEMP TABLE word_frequencies AS
- SELECT unnest(name_vector) as id, count(*)
- FROM search_name GROUP BY id""")
- cur.execute("CREATE INDEX ON word_frequencies(id)")
- LOG.info("Update word table with recomputed frequencies")
- cur.execute("""UPDATE word SET search_name_count = count
- FROM word_frequencies
- WHERE word_token like ' %' and word_id = id""")
- cur.drop_table("word_frequencies")
+ if conn.table_exists('search_name'):
+ with conn.cursor() as cur:
+ cur.drop_table("word_frequencies")
+ LOG.info("Computing word frequencies")
+ cur.execute("""CREATE TEMP TABLE word_frequencies AS
+ SELECT unnest(name_vector) as id, count(*)
+ FROM search_name GROUP BY id""")
+ cur.execute("CREATE INDEX ON word_frequencies(id)")
+ LOG.info("Update word table with recomputed frequencies")
+ cur.execute("""UPDATE word SET search_name_count = count
+ FROM word_frequencies
+ WHERE word_token like ' %' and word_id = id""")
+ cur.drop_table("word_frequencies")
conn.commit()
def name_analyzer(self):
Then at most 50 results are returned
Scenario: Limit number of search results
- When sending json search query "schloss"
+ When sending json search query "landstr"
Then more than 4 results are returned
- When sending json search query "schloss"
+ When sending json search query "landstr"
| limit |
| 4 |
Then exactly 4 results are returned
| place | village |
Scenario Outline: Search with polygon threshold (json)
- When sending json search query "switzerland"
+ When sending json search query "triesenberg"
| polygon_geojson | polygon_threshold |
| 1 | <th> |
Then at least 1 result is returned
| 999 |
Scenario Outline: Search with polygon threshold (xml)
- When sending xml search query "switzerland"
+ When sending xml search query "triesenberg"
| polygon_geojson | polygon_threshold |
| 1 | <th> |
Then at least 1 result is returned
| 999 |
Scenario Outline: Search with invalid polygon threshold (xml)
- When sending xml search query "switzerland"
+ When sending xml search query "triesenberg"
| polygon_geojson | polygon_threshold |
| 1 | <th> |
Then a HTTP 400 is returned
| geokml |
Scenario: Search along a route
- When sending json search query "schloss" with address
+ When sending json search query "rathaus" with address
Then result addresses contain
| ID | town |
- | 0 | Vaduz |
- When sending json search query "schloss" with address
+ | 0 | Schaan |
+ When sending json search query "rathaus" with address
| bounded | routewidth | route |
| 1 | 0.1 | 9.54353,47.11772,9.54314,47.11894 |
Then result addresses contain
| way | ^697,.* |
Scenario: Search with class-type feature
- When sending jsonv2 search query "Hotel in California"
+ When sending jsonv2 search query "bars in ebenholz"
Then results contain
| place_rank |
| 30 |