mirror of
https://github.com/clearml/clearml
synced 2025-01-31 09:07:00 +00:00
Fix support non-linear iteration reporting
This commit is contained in:
parent
31a0867ac9
commit
02ba0e28d5
@ -50,10 +50,10 @@ class ResourceMonitor(object):
|
|||||||
seconds_since_started = 0
|
seconds_since_started = 0
|
||||||
reported = 0
|
reported = 0
|
||||||
last_iteration = 0
|
last_iteration = 0
|
||||||
last_iteration_ts = 0
|
|
||||||
last_iteration_interval = None
|
|
||||||
repeated_iterations = 0
|
|
||||||
fallback_to_sec_as_iterations = None
|
fallback_to_sec_as_iterations = None
|
||||||
|
# last_iteration_interval = None
|
||||||
|
# last_iteration_ts = 0
|
||||||
|
# repeated_iterations = 0
|
||||||
while True:
|
while True:
|
||||||
last_report = time()
|
last_report = time()
|
||||||
current_report_frequency = self._report_frequency if reported != 0 else self._first_report_sec
|
current_report_frequency = self._report_frequency if reported != 0 else self._first_report_sec
|
||||||
@ -67,8 +67,6 @@ class ResourceMonitor(object):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
reported += 1
|
|
||||||
average_readouts = self._get_average_readouts()
|
|
||||||
seconds_since_started += int(round(time() - last_report))
|
seconds_since_started += int(round(time() - last_report))
|
||||||
# check if we do not report any metric (so it means the last iteration will not be changed)
|
# check if we do not report any metric (so it means the last iteration will not be changed)
|
||||||
if fallback_to_sec_as_iterations is None:
|
if fallback_to_sec_as_iterations is None:
|
||||||
@ -79,27 +77,37 @@ class ResourceMonitor(object):
|
|||||||
'falling back to iterations as seconds-from-start')
|
'falling back to iterations as seconds-from-start')
|
||||||
fallback_to_sec_as_iterations = True
|
fallback_to_sec_as_iterations = True
|
||||||
|
|
||||||
|
clear_readouts = True
|
||||||
# if we do not have last_iteration, we just use seconds as iteration
|
# if we do not have last_iteration, we just use seconds as iteration
|
||||||
if fallback_to_sec_as_iterations:
|
if fallback_to_sec_as_iterations:
|
||||||
iteration = seconds_since_started
|
iteration = seconds_since_started
|
||||||
else:
|
else:
|
||||||
iteration = self._task.get_last_iteration()
|
iteration = self._task.get_last_iteration()
|
||||||
if iteration == last_iteration:
|
if iteration < last_iteration:
|
||||||
repeated_iterations += 1
|
# we started a new session?!
|
||||||
if last_iteration_interval:
|
# wait out
|
||||||
# to be on the safe side, we don't want to pass the actual next iteration
|
clear_readouts = False
|
||||||
iteration += int(0.95*last_iteration_interval[0] * (seconds_since_started - last_iteration_ts)
|
iteration = last_iteration
|
||||||
/ last_iteration_interval[1])
|
elif iteration == last_iteration:
|
||||||
else:
|
# repeated_iterations += 1
|
||||||
iteration += 1
|
# if last_iteration_interval:
|
||||||
|
# # to be on the safe side, we don't want to pass the actual next iteration
|
||||||
|
# iteration += int(0.95*last_iteration_interval[0] * (seconds_since_started - last_iteration_ts)
|
||||||
|
# / last_iteration_interval[1])
|
||||||
|
# else:
|
||||||
|
# iteration += 1
|
||||||
|
clear_readouts = False
|
||||||
|
iteration = last_iteration
|
||||||
else:
|
else:
|
||||||
last_iteration_interval = (iteration - last_iteration, seconds_since_started - last_iteration_ts)
|
# last_iteration_interval = (iteration - last_iteration, seconds_since_started - last_iteration_ts)
|
||||||
last_iteration_ts = seconds_since_started
|
# repeated_iterations = 0
|
||||||
|
# last_iteration_ts = seconds_since_started
|
||||||
last_iteration = iteration
|
last_iteration = iteration
|
||||||
repeated_iterations = 0
|
|
||||||
fallback_to_sec_as_iterations = False
|
fallback_to_sec_as_iterations = False
|
||||||
|
clear_readouts = True
|
||||||
|
|
||||||
# start reporting only when we figured out, if this is seconds based, or iterations based
|
# start reporting only when we figured out, if this is seconds based, or iterations based
|
||||||
|
average_readouts = self._get_average_readouts()
|
||||||
if fallback_to_sec_as_iterations is not None:
|
if fallback_to_sec_as_iterations is not None:
|
||||||
for k, v in average_readouts.items():
|
for k, v in average_readouts.items():
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
@ -110,7 +118,12 @@ class ResourceMonitor(object):
|
|||||||
logger.report_scalar(title=title, series=k, iteration=iteration, value=value)
|
logger.report_scalar(title=title, series=k, iteration=iteration, value=value)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
self._clear_readouts()
|
# clear readouts if this is update is not averaged
|
||||||
|
if clear_readouts:
|
||||||
|
self._clear_readouts()
|
||||||
|
|
||||||
|
# count reported iterations
|
||||||
|
reported += 1
|
||||||
|
|
||||||
def _update_readouts(self):
|
def _update_readouts(self):
|
||||||
readouts = self._machine_stats()
|
readouts = self._machine_stats()
|
||||||
|
Loading…
Reference in New Issue
Block a user