Fix Tensorboard 2d convolution histogram, improve histogram accuracy on very small histograms.

Add Logger._set_tensorboard_series_prefix add series prefix for all TB reports
2025-06-26 18:16:07 +00:00 · 2020-06-23 23:08:08 +03:00 · 2020-06-23 23:08:08 +03:00 · 5d13ec0dfe
commit 5d13ec0dfe
parent f6a8736ef6
2 changed files with 43 additions and 12 deletions
--- a/trains/binding/frameworks/tensorflow_bind.py
+++ b/trains/binding/frameworks/tensorflow_bind.py
@ -88,7 +88,7 @@ class WeightsGradientHistHelper(object):

        if isinstance(hist_data, dict):
            pass
-        elif isinstance(hist_data, np.ndarray) and np.atleast_2d(hist_data).shape[1] == 3:
+        elif isinstance(hist_data, np.ndarray) and len(hist_data.shape) == 2 and np.atleast_2d(hist_data).shape[1] == 3:
            # prepare the dictionary, assume numpy
            # hist_data['bucketLimit'] is the histogram bucket right side limit, meaning X axis
            # hist_data['bucket'] is the histogram height, meaning the Y axis
@ -96,7 +96,7 @@ class WeightsGradientHistHelper(object):
            hist_data = {'bucketLimit': hist_data[:, 0].tolist(), 'bucket': hist_data[:, 2].tolist()}
        else:
            # assume we have to do the histogram on the data
-            hist_data = np.histogram(hist_data)
+            hist_data = np.histogram(hist_data, bins=32)
            hist_data = {'bucketLimit': hist_data[1].tolist(), 'bucket': hist_data[0].tolist()}

        self._add_histogram(title=title, series=series, step=step, hist_data=hist_data)
@ -148,10 +148,11 @@ class WeightsGradientHistHelper(object):
                                   hist_iters.size % self._histogram_update_freq_multiplier != 0):
            return None

-        # resample histograms on a unified bin axis
-        _minmax = minmax[0] - 1, minmax[1] + 1
+        # resample histograms on a unified bin axis +- epsilon
+        _epsilon = abs((minmax[1] - minmax[0])/float(self._hist_x_granularity))
+        _minmax = minmax[0] - _epsilon, minmax[1] + _epsilon
        prev_xedge = np.arange(start=_minmax[0],
-                               step=(_minmax[1] - _minmax[0]) / (self._hist_x_granularity - 2), stop=_minmax[1])
+                               step=(_minmax[1] - _minmax[0]) / float(self._hist_x_granularity - 2), stop=_minmax[1])
        # uniformly select histograms and the last one
        cur_idx = self._sample_histograms(hist_iters, self._histogram_granularity)
        report_hist = np.zeros(shape=(len(cur_idx), prev_xedge.size), dtype=np.float32)
@ -215,19 +216,20 @@ class EventTrainsWriter(object):
        return self.variants.copy()

    def tag_splitter(self, tag, num_split_parts, split_char='/', join_char='_', default_title='variant',
-                     logdir_header='series', auto_reduce_num_split=False):
+                     logdir_header='series', auto_reduce_num_split=False, force_add_prefix=None):
        """
        Split a tf.summary tag line to variant and metric.
        Variant is the first part of the split tag, metric is the second.
        :param str tag:
        :param int num_split_parts:
        :param str split_char: a character to split the tag on
-        :param str join_char:  a character to join the the splits
+        :param str join_char: a character to join the the splits
        :param str default_title: variant to use in case no variant can be inferred automatically
        :param str logdir_header: if 'series_last' then series=header: series, if 'series then series=series :header,
            if 'title_last' then title=header title, if 'title' then title=title header
-        :param boolean auto_reduce_num_split: if True and the tag is split for less parts then requested,
+        :param bool auto_reduce_num_split: if True and the tag is split for less parts then requested,
            then requested number of split parts is adjusted.
+        :param str force_add_prefix: always add the prefix to the series name
        :return: (str, str) variant and metric
        """
        splitted_tag = tag.split(split_char)
@ -236,6 +238,9 @@ class EventTrainsWriter(object):
        series = join_char.join(splitted_tag[-num_split_parts:])
        title = join_char.join(splitted_tag[:-num_split_parts]) or default_title

+        if force_add_prefix:
+            series = str(force_add_prefix)+series
+
        # check if we already decided that we need to change the title/series
        graph_id = hash((title, series))
        if graph_id in self._graph_name_lookup:
@ -369,8 +374,10 @@ class EventTrainsWriter(object):
        if img_data_np is None:
            return

+        # noinspection PyProtectedMember
        title, series = self.tag_splitter(tag, num_split_parts=3, default_title='Images', logdir_header='title',
-                                          auto_reduce_num_split=True)
+                                          auto_reduce_num_split=True,
+                                          force_add_prefix=self._logger._get_tensorboard_series_prefix())
        step = self._fix_step_counter(title, series, step)

        if img_data_np.dtype != np.uint8:
@ -412,9 +419,11 @@ class EventTrainsWriter(object):
        default_title = tag if not self._logger._get_tensorboard_auto_group_scalars() else 'Scalars'
        series_per_graph = self._logger._get_tensorboard_single_series_per_graph()

+        # noinspection PyProtectedMember
        title, series = self.tag_splitter(
            tag, num_split_parts=1, default_title=default_title,
-            logdir_header='title' if series_per_graph else 'series_last'
+            logdir_header='title' if series_per_graph else 'series_last',
+            force_add_prefix=self._logger._get_tensorboard_series_prefix()
        )

        step = self._fix_step_counter(title, series, step)
@ -455,8 +464,10 @@ class EventTrainsWriter(object):
        )

    def _add_histogram(self, tag, step, hist_data):
+        # noinspection PyProtectedMember
        title, series = self.tag_splitter(tag, num_split_parts=1, default_title='Histograms',
-                                          logdir_header='series')
+                                          logdir_header='series',
+                                          force_add_prefix=self._logger._get_tensorboard_series_prefix())

        self._grad_helper.add_histogram(
            title=title,
@ -519,8 +530,10 @@ class EventTrainsWriter(object):
        if audio_data is None:
            return

+        # noinspection PyProtectedMember
        title, series = self.tag_splitter(tag, num_split_parts=3, default_title='Audio', logdir_header='title',
-                                          auto_reduce_num_split=True)
+                                          auto_reduce_num_split=True,
+                                          force_add_prefix=self._logger._get_tensorboard_series_prefix())
        step = self._fix_step_counter(title, series, step)

        stream = BytesIO(audio_data)
--- a/trains/logger.py
+++ b/trains/logger.py
@ -69,6 +69,7 @@ class Logger(object):
        self._report_worker = None
        self._task_handler = None
        self._graph_titles = {}
+        self._tensorboard_series_force_prefix = None

        StdStreamPatch.patch_std_streams(self)

@ -1263,10 +1264,26 @@ class Logger(object):
        self._graph_titles[title].add(series)

    def _get_used_title_series(self):
+        # type: () -> dict
        return self._graph_titles

+    def _get_tensorboard_series_prefix(self):
+        # type: () -> Optional[str]
+        """
+        :return str: return a string prefix to put in front of every report combing from tensorboard
+        """
+        return self._tensorboard_series_force_prefix
+
+    def _set_tensorboard_series_prefix(self, prefix):
+        # type: (Optional[str]) -> ()
+        """
+        :param str prefix: Set a string prefix to put in front of every report combing from tensorboard
+        """
+        self._tensorboard_series_force_prefix = str(prefix) if prefix else None
+
    @classmethod
    def _get_tensorboard_auto_group_scalars(cls):
+        # type: () -> bool
        """
        :return: True, if we preserve Tensorboard backward compatibility behaviour,
            i.e., scalars without specific title will be under the "Scalars" graph
@ -1276,6 +1293,7 @@ class Logger(object):

    @classmethod
    def _get_tensorboard_single_series_per_graph(cls):
+        # type: () -> bool
        """
        :return: True, if we generate a separate graph (plot) for each Tensorboard scalar series
            default is False: Tensorboard scalar series will be grouped according to their title