Fix ontinuing Tasks with tensorflow sometimes result in wrong initial iteration (#762)

This commit is contained in:
allegroai 2022-12-13 15:36:05 +02:00
parent 3da182426f
commit 16df0794be

View File

@ -726,6 +726,16 @@ class EventTrainsWriter(object):
'Received event without step, assuming step = {}'.format(step))
else:
step = int(step)
# unlike other frameworks, tensorflow already accounts for the iteration number
# when continuing the training. we substract the smallest iteration such that we
# don't increment the step twice number
step_before = step
if EventTrainsWriter._current_task:
step -= EventTrainsWriter._current_task.get_initial_iteration()
# there can be a few metrics getting reported again, so the step can be negative
# for the first few reports
if step <= 0:
return
self._max_step = max(self._max_step, step)
if value_dicts is None:
LoggerRoot.get_base_logger(TensorflowBinding).debug("Summary arrived without 'value'")