]> git.openstreetmap.org Git - nominatim.git/commitdiff
improve progress logging during indexing
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 30 Nov 2020 09:59:29 +0000 (10:59 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 30 Nov 2020 09:59:29 +0000 (10:59 +0100)
Wait for 2 seconds before logging the first progress, so that we
have numbers that are a bit more reliable statistically speaking.

Also provides an actual implementation for the log_interval
parameter and fixes some small style issues.

nominatim/indexer/progress.py

index 456d3eae08aeff9258cc4446f709241451070563..99120673faa67680216ac5fc48d6c8f93da62d03 100644 (file)
@@ -2,13 +2,17 @@
 #
 # This file is part of Nominatim.
 # Copyright (C) 2020 Sarah Hoffmann
-
+"""
+Helpers for progress logging.
+"""
 import logging
 from datetime import datetime
 
-log = logging.getLogger()
+LOG = logging.getLogger()
+
+INITIAL_PROGRESS = 10
 
-class ProgressLogger(object):
+class ProgressLogger:
     """ Tracks and prints progress for the indexing process.
         `name` is the name of the indexing step being tracked.
         `total` sets up the total number of items that need processing.
@@ -21,32 +25,40 @@ class ProgressLogger(object):
         self.total_places = total
         self.done_places = 0
         self.rank_start_time = datetime.now()
-        self.next_info = 100 if log.isEnabledFor(logging.INFO) else total + 1
+        self.log_interval = log_interval
+        self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.INFO) else total + 1
 
     def add(self, num=1):
         """ Mark `num` places as processed. Print a log message if the
-            logging is at least info and the log interval has past.
+            logging is at least info and the log interval has passed.
         """
         self.done_places += num
 
-        if self.done_places >= self.next_info:
-            now = datetime.now()
-            done_time = (now - self.rank_start_time).total_seconds()
-            places_per_sec = self.done_places / done_time
-            eta = (self.total_places - self.done_places)/places_per_sec
+        if self.done_places < self.next_info:
+            return
+
+        now = datetime.now()
+        done_time = (now - self.rank_start_time).total_seconds()
+
+        if done_time < 2:
+            self.next_info = self.done_places + INITIAL_PROGRESS
+            return
+
+        places_per_sec = self.done_places / done_time
+        eta = (self.total_places - self.done_places) / places_per_sec
 
-            log.info("Done {} in {} @ {:.3f} per second - {} ETA (seconds): {:.2f}"
-                     .format(self.done_places, int(done_time),
-                             places_per_sec, self.name, eta))
+        LOG.info("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f",
+                 self.done_places, int(done_time),
+                 places_per_sec, self.name, eta)
 
-            self.next_info += int(places_per_sec)
+        self.next_info += int(places_per_sec) * self.log_interval
 
     def done(self):
-        """ Print final staticstics about the progress.
+        """ Print final statistics about the progress.
         """
         rank_end_time = datetime.now()
         diff_seconds = (rank_end_time-self.rank_start_time).total_seconds()
 
-        log.warning("Done {}/{} in {} @ {:.3f} per second - FINISHED {}\n".format(
+        LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n",
                     self.done_places, self.total_places, int(diff_seconds),
-                    self.done_places/diff_seconds, self.name))
+                    self.done_places/diff_seconds, self.name)