Add sdk.metric.matplotlib_untitled_history_size to limit number of untitled matplotlib plots (default: 100)

2025-06-26 18:16:07 +00:00 · 2019-11-08 22:29:36 +02:00 · 2019-11-08 22:29:36 +02:00 · 9362831269
commit 9362831269
parent 1bfee56977
5 changed files with 95 additions and 18 deletions
--- a/trains/backend_interface/metrics/reporter.py
+++ b/trains/backend_interface/metrics/reporter.py
@ -55,6 +55,7 @@ class Reporter(InterfaceBase, AbstractContextManager, SetupUploadMixin, AsyncMan
        self._thread = Thread(target=self._daemon)
        self._thread.daemon = True
        self._thread.start()
+        self._max_iteration = 0

    def _set_storage_uri(self, value):
        value = '/'.join(x for x in (value.rstrip('/'), self._metrics.storage_key_prefix) if x)
@ -78,6 +79,10 @@ class Reporter(InterfaceBase, AbstractContextManager, SetupUploadMixin, AsyncMan
    def async_enable(self, value):
        self._async_enable = bool(value)

+    @property
+    def max_iteration(self):
+        return self._max_iteration
+
    def _daemon(self):
        while not self._exit_flag:
            self._flush_event.wait(self._flush_frequency)
@ -92,6 +97,9 @@ class Reporter(InterfaceBase, AbstractContextManager, SetupUploadMixin, AsyncMan
            self.wait_for_results()

    def _report(self, ev):
+        ev_iteration = ev.get_iteration()
+        if ev_iteration is not None:
+            self._max_iteration = max(self._max_iteration, ev_iteration)
        self._events.append(ev)
        if len(self._events) >= self._flush_threshold:
            self.flush()
--- a/trains/backend_interface/task/task.py
+++ b/trains/backend_interface/task/task.py
@ -389,7 +389,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
        return self._reporter

    def _get_output_destination_suffix(self, extra_path=None):
-        return '/'.join(quote(x, safe='[]{}()$^,.; -_+-=') for x in
+        return '/'.join(quote(x, safe="'[]{}()$^,.; -_+-=") for x in
                        (self.get_project_name(), '%s.%s' % (self.name, self.data.id), extra_path) if x)

    def _reload(self):
--- a/trains/binding/frameworks/tensorflow_bind.py
+++ b/trains/binding/frameworks/tensorflow_bind.py
@ -66,6 +66,15 @@ class EventTrainsWriter(object):
    _title_series_writers_lookup = {}
    _event_writers_id_to_logdir = {}

+    # Protect against step (iteration) reuse, for example,
+    # steps counter inside an epoch, but wrapping around when epoch ends
+    # i.e. step = 0..100 then epoch ends and again step = 0..100
+    # We store the first report per title/series combination, and if wraparound occurs
+    # we synthetically continue to increase the step/iteration based on the previous epoch counter
+    # example: _title_series_wraparound_counter[('title', 'series')] =
+    #           {'first_step':None, 'last_step':None, 'adjust_counter':0,}
+    _title_series_wraparound_counter = {}
+
    @property
    def variants(self):
        return self._variants
@ -111,8 +120,8 @@ class EventTrainsWriter(object):
                org_series = series
                org_title = title
                other_logdir = self._event_writers_id_to_logdir[event_writer_id]
-                split_logddir = self._logdir.split(os.path.sep)
-                unique_logdir = set(split_logddir) - set(other_logdir.split(os.path.sep))
+                split_logddir = self._logdir.split('/')
+                unique_logdir = set(split_logddir) - set(other_logdir.split('/'))
                header = '/'.join(s for s in split_logddir if s in unique_logdir)
                if logdir_header == 'series_last':
                    series = header + ': ' + series
@ -160,6 +169,9 @@ class EventTrainsWriter(object):
        # We are the events_writer, so that's what we'll pass
        IsTensorboardInit.set_tensorboard_used()
        self._logdir = logdir or ('unknown %d' % len(self._event_writers_id_to_logdir))
+        # conform directory structure to unix
+        if os.path.sep == '\\':
+            self._logdir = self._logdir.replace('\\', '/')
        self._id = hash(self._logdir)
        self._event_writers_id_to_logdir[self._id] = self._logdir
        self.max_keep_images = max_keep_images
@ -220,6 +232,8 @@ class EventTrainsWriter(object):

        title, series = self.tag_splitter(tag, num_split_parts=3, default_title='Images', logdir_header='title',
                                          auto_reduce_num_split=True)
+        step = self._fix_step_counter(title, series, step)
+
        if img_data_np.dtype != np.uint8:
            # assume scale 0-1
            img_data_np = (img_data_np * 255).astype(np.uint8)
@ -259,6 +273,7 @@ class EventTrainsWriter(object):
        default_title = tag if not self._logger._get_tensorboard_auto_group_scalars() else 'Scalars'
        title, series = self.tag_splitter(tag, num_split_parts=1,
                                          default_title=default_title, logdir_header='series_last')
+        step = self._fix_step_counter(title, series, step)

        # update scalar cache
        num, value = self._scalar_report_cache.get((title, series), (0, 0))
@ -310,6 +325,7 @@ class EventTrainsWriter(object):
        # Z-axis actual value (interpolated 'bucket')
        title, series = self.tag_splitter(tag, num_split_parts=1, default_title='Histograms',
                                          logdir_header='series')
+        step = self._fix_step_counter(title, series, step)

        # get histograms from cache
        hist_list, hist_iters, minmax = self._hist_report_cache.get((title, series), ([], np.array([]), None))
@ -418,6 +434,23 @@ class EventTrainsWriter(object):
        except Exception:
            pass

+    def _fix_step_counter(self, title, series, step):
+        key = (title, series)
+        if key not in EventTrainsWriter._title_series_wraparound_counter:
+            EventTrainsWriter._title_series_wraparound_counter[key] = {'first_step': step, 'last_step': step,
+                                                                       'adjust_counter': 0}
+            return step
+        wraparound_counter = EventTrainsWriter._title_series_wraparound_counter[key]
+        # we decide on wrap around if the current step is less than 10% of the previous step
+        # notice since counter is int and we want to avoid rounding error, we have double check in the if
+        if step < wraparound_counter['last_step'] and step < 0.9*wraparound_counter['last_step']:
+            # adjust step base line
+            wraparound_counter['adjust_counter'] += wraparound_counter['last_step'] + (1 if step <= 0 else step)
+
+        # return adjusted step
+        wraparound_counter['last_step'] = step
+        return step + wraparound_counter['adjust_counter']
+
    def add_event(self, event, step=None, walltime=None, **kwargs):
        supported_metrics = {
            'simpleValue', 'image', 'histo', 'tensor'
--- a/trains/binding/matplotlib_bind.py
+++ b/trains/binding/matplotlib_bind.py
@ -21,6 +21,8 @@ class PatchedMatplotlib:
    __patched_draw_all_recursion_guard = False
    _global_plot_counter = -1
    _global_image_counter = -1
+    _global_image_counter_limit = None
+    _last_iteration_plot_titles = (-1, [])
    _current_task = None
    _support_image_plot = False
    _matplotlylib = None
@ -125,6 +127,9 @@ class PatchedMatplotlib:
    def update_current_task(task):
        if PatchedMatplotlib.patch_matplotlib():
            PatchedMatplotlib._current_task = task
+            if PatchedMatplotlib._global_image_counter_limit is None:
+                from ..config import config
+                PatchedMatplotlib._global_image_counter_limit = config.get('metric.matplotlib_untitled_history_size', 100)

    @staticmethod
    def patched_imshow(*args, **kw):
@ -310,8 +315,13 @@ class PatchedMatplotlib:

                # remove borders and size, we should let the web take care of that
                if plotly_fig:
-                    PatchedMatplotlib._global_plot_counter += 1
-                    title = plot_title or 'untitled %d' % PatchedMatplotlib._global_plot_counter
+                    last_iteration = PatchedMatplotlib._current_task.get_last_iteration()
+                    if plot_title:
+                        title = PatchedMatplotlib._enforce_unique_title_per_iteration(plot_title, last_iteration)
+                    else:
+                        PatchedMatplotlib._global_plot_counter += 1
+                        title = 'untitled %d' % PatchedMatplotlib._global_plot_counter
+
                    plotly_fig.layout.margin = {}
                    plotly_fig.layout.autosize = True
                    plotly_fig.layout.height = None
@ -321,38 +331,59 @@ class PatchedMatplotlib:
                    if not plotly_dict.get('layout'):
                        plotly_dict['layout'] = {}
                    plotly_dict['layout']['title'] = title
-                    reporter.report_plot(title=title, series='plot', plot=plotly_dict,
-                                         iter=PatchedMatplotlib._global_plot_counter if plot_title else 0)
+                    reporter.report_plot(title=title, series='plot', plot=plotly_dict, iter=last_iteration)
                else:
                    logger = PatchedMatplotlib._current_task.get_logger()

                    # this is actually a failed plot, we should put it under plots:
                    # currently disabled
                    if force_save_as_image or not PatchedMatplotlib._support_image_plot:
+                        last_iteration = PatchedMatplotlib._current_task.get_last_iteration()
                        # send the plot as image
-                        PatchedMatplotlib._global_image_counter += 1
-                        title = plot_title or 'untitled %d' % PatchedMatplotlib._global_image_counter
+                        if plot_title:
+                            title = PatchedMatplotlib._enforce_unique_title_per_iteration(plot_title, last_iteration)
+                        else:
+                            PatchedMatplotlib._global_image_counter += 1
+                            title = 'untitled %d' % (PatchedMatplotlib._global_image_counter %
+                                                     PatchedMatplotlib._global_image_counter_limit)

                        logger.report_image(title=title, series='plot image', local_path=image,
-                                            delete_after_upload=True,
-                                            iteration=PatchedMatplotlib._global_image_counter
-                                            if plot_title else 0)
+                                            delete_after_upload=True, iteration=last_iteration)
                    else:
                        # send the plot as plotly with embedded image
-                        PatchedMatplotlib._global_plot_counter += 1
-                        title = plot_title or 'untitled %d' % PatchedMatplotlib._global_plot_counter
+                        last_iteration = PatchedMatplotlib._current_task.get_last_iteration()
+                        if plot_title:
+                            title = PatchedMatplotlib._enforce_unique_title_per_iteration(plot_title, last_iteration)
+                        else:
+                            PatchedMatplotlib._global_plot_counter += 1
+                            title = 'untitled %d' % (PatchedMatplotlib._global_plot_counter %
+                                                     PatchedMatplotlib._global_image_counter_limit)

                        logger._report_image_plot_and_upload(title=title, series='plot image', path=image,
-                                                             delete_after_upload=True,
-                                                             iteration=PatchedMatplotlib._global_plot_counter
-                                                             if plot_title else 0)
-
+                                                             delete_after_upload=True, iteration=last_iteration)
        except Exception:
            # plotly failed
            pass

        return

+    @staticmethod
+    def _enforce_unique_title_per_iteration(title, last_iteration):
+        if last_iteration != PatchedMatplotlib._last_iteration_plot_titles[0]:
+            PatchedMatplotlib._last_iteration_plot_titles = (last_iteration, [title])
+        elif title not in PatchedMatplotlib._last_iteration_plot_titles[1]:
+            PatchedMatplotlib._last_iteration_plot_titles[1].append(title)
+        else:
+            base_title = title
+            counter = 1
+            while title in PatchedMatplotlib._last_iteration_plot_titles[1]:
+                # we already used this title in this iteration, we should change the title
+                title = base_title + ' %d' % counter
+                counter += 1
+            # store the new title
+            PatchedMatplotlib._last_iteration_plot_titles[1].append(title)
+        return title
+
    @staticmethod
    def _get_output_figures(stored_figure, all_figures):
        try:
--- a/trains/config/default/sdk.conf
+++ b/trains/config/default/sdk.conf
@ -21,6 +21,11 @@
        # X files are stored in the upload destination for each metric/variant combination.
        file_history_size: 100

+        # Max history size for matplotlib imshow files per plot title.
+        # File names for the uploaded images will be recycled in such a way that no more than
+        # X images are stored in the upload destination for each matplotlib plot title.
+        matplotlib_untitled_history_size: 100
+
        # Settings for generated debug images
        images {
            format: JPEG