From 40d5b78eb80c1e7479a51d62969a35e8c31ca98a Mon Sep 17 00:00:00 2001 From: anqixxx Date: Thu, 29 May 2025 09:25:08 -0700 Subject: [PATCH] Added command line (default 0) min argument for minimum filtering, updated args.py to reflect this --- src/nominatim_db/clicmd/args.py | 1 + src/nominatim_db/clicmd/special_phrases.py | 6 +++++- src/nominatim_db/tools/special_phrases/sp_importer.py | 9 +++++---- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/nominatim_db/clicmd/args.py b/src/nominatim_db/clicmd/args.py index 45df9b7c..5c6a806a 100644 --- a/src/nominatim_db/clicmd/args.py +++ b/src/nominatim_db/clicmd/args.py @@ -136,6 +136,7 @@ class NominatimArgs: import_from_wiki: bool import_from_csv: Optional[str] no_replace: bool + min: int # Arguments to all query functions format: str diff --git a/src/nominatim_db/clicmd/special_phrases.py b/src/nominatim_db/clicmd/special_phrases.py index 9ba751a0..90560fb7 100644 --- a/src/nominatim_db/clicmd/special_phrases.py +++ b/src/nominatim_db/clicmd/special_phrases.py @@ -58,6 +58,8 @@ class ImportSpecialPhrases: help='Import special phrases from a CSV file') group.add_argument('--no-replace', action='store_true', help='Keep the old phrases and only add the new ones') + group.add_argument('--min', type=int, default=0, + help='Restrict special phrases by minimum occurance') def run(self, args: NominatimArgs) -> int: @@ -82,7 +84,9 @@ class ImportSpecialPhrases: tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) should_replace = not args.no_replace + min = args.min + with connect(args.config.get_libpq_dsn()) as db_connection: SPImporter( args.config, db_connection, loader - ).import_phrases(tokenizer, should_replace) + ).import_phrases(tokenizer, should_replace, min) diff --git a/src/nominatim_db/tools/special_phrases/sp_importer.py b/src/nominatim_db/tools/special_phrases/sp_importer.py index ac50377f..6bd3c287 100644 --- a/src/nominatim_db/tools/special_phrases/sp_importer.py +++ b/src/nominatim_db/tools/special_phrases/sp_importer.py @@ -87,7 +87,7 @@ class SPImporter(): return db_combinations - def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool) -> None: + def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool, min: int) -> None: """ Iterate through all SpecialPhrases extracted from the loader and import them into the database. @@ -107,7 +107,7 @@ class SPImporter(): if result: class_type_pairs.add(result) - self._create_classtype_table_and_indexes(class_type_pairs) + self._create_classtype_table_and_indexes(class_type_pairs, min) if should_replace: self._remove_non_existent_tables_from_db() @@ -186,7 +186,8 @@ class SPImporter(): return (phrase.p_class, phrase.p_type) def _create_classtype_table_and_indexes(self, - class_type_pairs: Iterable[Tuple[str, str]]) -> None: + class_type_pairs: Iterable[Tuple[str, str]], + min: int) -> None: """ Create table place_classtype for each given pair. Also create indexes on place_id and centroid. @@ -200,7 +201,7 @@ class SPImporter(): with self.db_connection.cursor() as db_cursor: db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)") - allowed_special_phrases = self.get_classtype_pairs() + allowed_special_phrases = self.get_classtype_pairs(min) for pair in class_type_pairs: phrase_class = pair[0] -- 2.39.5