clearml-server/apiserver/tests/automated/test_task_events.py

667 lines
25 KiB
Python
Raw Normal View History

import json
import operator
import unittest
from statistics import mean
from typing import Sequence, Optional, Tuple
from boltons.iterutils import first
2022-11-29 15:34:06 +00:00
from apiserver.apierrors import errors
from apiserver.es_factory import es_factory
2021-01-05 14:28:49 +00:00
from apiserver.apierrors.errors.bad_request import EventsNotAdded
from apiserver.tests.automated import TestService
class TestTaskEvents(TestService):
2022-11-29 15:34:06 +00:00
delete_params = dict(can_fail=True, force=True)
def _temp_task(self, name="test task events"):
2023-05-25 16:17:40 +00:00
task_input = dict(name=name, type="training",)
return self.create_temp(
"tasks", delete_paramse=self.delete_params, **task_input
)
2022-11-29 15:34:06 +00:00
def _temp_model(self, name="test model events", **kwargs):
self.update_missing(kwargs, name=name, uri="file:///a/b", labels={})
return self.create_temp("models", delete_params=self.delete_params, **kwargs)
@staticmethod
def _create_task_event(type_, task, iteration, **kwargs):
return {
"worker": "test",
"type": type_,
"task": task,
"iter": iteration,
"timestamp": kwargs.get("timestamp") or es_factory.get_timestamp_millis(),
**kwargs,
}
def test_task_metrics(self):
tasks = {
self._temp_task(): {
"Metric1": ["training_debug_image"],
"Metric2": ["training_debug_image", "log"],
},
self._temp_task(): {"Metric3": ["training_debug_image"]},
}
events = [
self._create_task_event(
event_type,
task=task,
iteration=1,
metric=metric,
variant="Test variant",
)
for task, metrics in tasks.items()
for metric, event_types in metrics.items()
for event_type in event_types
]
self.send_batch(events)
self._assert_task_metrics(tasks, "training_debug_image")
self._assert_task_metrics(tasks, "log")
self._assert_task_metrics(tasks, "training_stats_scalar")
def _assert_task_metrics(self, tasks: dict, event_type: str):
res = self.api.events.get_task_metrics(tasks=list(tasks), event_type=event_type)
for task, metrics in tasks.items():
res_metrics = next(
(tm.metrics for tm in res.metrics if tm.task == task), ()
)
self.assertEqual(
set(res_metrics),
set(
metric for metric, events in metrics.items() if event_type in events
),
)
2022-07-08 14:38:31 +00:00
def test_task_single_value_metrics(self):
metric = "Metric1"
variant = "Variant1"
iter_count = 10
task = self._temp_task()
special_iteration = -(2 ** 31)
events = [
{
**self._create_task_event(
"training_stats_scalar", task, iteration or special_iteration
),
"metric": metric,
"variant": variant,
"value": iteration,
}
for iteration in range(iter_count)
]
self.send_batch(events)
# special iteration is present in the events retrieval
metric_param = {"metric": metric, "variants": [variant]}
res = self.api.events.scalar_metrics_iter_raw(
task=task, batch_size=100, metric=metric_param, count_total=True
)
self.assertEqual(res.returned, iter_count)
self.assertEqual(res.total, iter_count)
self.assertEqual(
res.variants[variant]["iter"],
[x or special_iteration for x in range(iter_count)],
)
self.assertEqual(res.variants[variant]["y"], list(range(iter_count)))
2022-07-08 14:38:31 +00:00
# but not in the histogram
data = self.api.events.scalar_metrics_iter_histogram(task=task)
self.assertEqual(data[metric][variant]["x"], list(range(1, iter_count)))
# new api
res = self.api.events.get_task_single_value_metrics(tasks=[task]).tasks
self.assertEqual(len(res), 1)
data = res[0]
self.assertEqual(data.task, task)
self.assertEqual(len(data["values"]), 1)
value = data["values"][0]
self.assertEqual(value.metric, metric)
self.assertEqual(value.variant, variant)
self.assertEqual(value.value, 0)
# update is working
task_data = self.api.tasks.get_by_id(task=task).task
last_metrics = first(first(task_data.last_metrics.values()).values())
self.assertEqual(last_metrics.value, iter_count - 1)
new_value = 1000
new_event = {
**self._create_task_event("training_stats_scalar", task, special_iteration),
"metric": metric,
"variant": variant,
"value": new_value,
}
self.send(new_event)
res = self.api.events.scalar_metrics_iter_raw(
task=task, batch_size=100, metric=metric_param, count_total=True
)
self.assertEqual(
res.variants[variant]["y"], [y or new_value for y in range(iter_count)],
2022-07-08 14:38:31 +00:00
)
task_data = self.api.tasks.get_by_id(task=task).task
last_metrics = first(first(task_data.last_metrics.values()).values())
self.assertEqual(last_metrics.value, new_value)
data = self.api.events.get_task_single_value_metrics(tasks=[task]).tasks[0]
self.assertEqual(data.task, task)
self.assertEqual(len(data["values"]), 1)
value = data["values"][0]
self.assertEqual(value.value, new_value)
def test_last_scalar_metrics(self):
metric = "Metric1"
variant = "Variant1"
iter_count = 100
task = self._temp_task()
events = [
{
**self._create_task_event("training_stats_scalar", task, iteration),
"metric": metric,
"variant": variant,
"value": iteration,
}
for iteration in range(iter_count)
]
# send 2 batches to check the interaction with already stored db value
# each batch contains multiple iterations
self.send_batch(events[:50])
self.send_batch(events[50:])
task_data = self.api.tasks.get_by_id(task=task).task
metric_data = first(first(task_data.last_metrics.values()).values())
self.assertEqual(iter_count - 1, metric_data.value)
self.assertEqual(iter_count - 1, metric_data.max_value)
self.assertEqual(iter_count - 1, metric_data.max_value_iteration)
self.assertEqual(0, metric_data.min_value)
self.assertEqual(0, metric_data.min_value_iteration)
res = self.api.events.get_task_latest_scalar_values(task=task)
self.assertEqual(iter_count - 1, res.last_iter)
2022-11-29 15:34:06 +00:00
def test_model_events(self):
model = self._temp_model(ready=False)
# task log events are not allowed
log_event = self._create_task_event(
"log",
task=model,
iteration=0,
msg=f"This is a log message",
model_event=True,
)
with self.api.raises(errors.bad_request.EventsNotAdded):
self.send(log_event)
# mixed batch
2022-11-29 15:34:06 +00:00
events = [
{
**self._create_task_event("training_stats_scalar", model, iteration),
"metric": f"Metric{metric_idx}",
"variant": f"Variant{variant_idx}",
"value": iteration,
"model_event": True,
}
for iteration in range(2)
for metric_idx in range(5)
for variant_idx in range(5)
]
task = self._temp_task()
# noinspection PyTypeChecker
events.append(
self._create_task_event(
"log",
task=task,
iteration=0,
msg=f"This is a log message",
metric="Metric0",
variant="Variant0",
allow_locked=True,
)
)
2022-11-29 15:34:06 +00:00
self.send_batch(events)
data = self.api.events.scalar_metrics_iter_histogram(
task=model, model_events=True
)
2022-11-29 15:34:06 +00:00
self.assertEqual(list(data), [f"Metric{idx}" for idx in range(5)])
metric_data = data.Metric0
self.assertEqual(list(metric_data), [f"Variant{idx}" for idx in range(5)])
variant_data = metric_data.Variant0
self.assertEqual(variant_data.x, [0, 1])
self.assertEqual(variant_data.y, [0.0, 1.0])
2022-11-29 15:34:06 +00:00
2023-05-25 16:17:40 +00:00
model_data = self.api.models.get_all_ex(
id=[model], only_fields=["last_metrics", "last_iteration"]
).models[0]
metric_data = first(first(model_data.last_metrics.values()).values())
self.assertEqual(1, model_data.last_iteration)
self.assertEqual(1, metric_data.value)
self.assertEqual(1, metric_data.max_value)
self.assertEqual(1, metric_data.max_value_iteration)
self.assertEqual(0, metric_data.min_value)
self.assertEqual(0, metric_data.min_value_iteration)
self._assert_log_events(task=task, expected_total=1)
def test_error_events(self):
task = self._temp_task()
events = [
self._create_task_event("unknown type", task, iteration=1),
self._create_task_event("training_debug_image", task=None, iteration=1),
self._create_task_event(
"training_debug_image", task="Invalid task", iteration=1
),
]
# failure if no events added
with self.api.raises(EventsNotAdded):
self.send_batch(events)
events.append(
self._create_task_event("training_debug_image", task=task, iteration=1)
)
# success if at least one event added
res = self.send_batch(events)
self.assertEqual(res["added"], 1)
self.assertEqual(res["errors"], 3)
self.assertEqual(len(res["errors_info"]), 3)
res = self.api.events.get_task_events(task=task)
self.assertEqual(len(res.events), 1)
def test_task_logs(self):
task = self._temp_task()
timestamp = es_factory.get_timestamp_millis()
events = [
self._create_task_event(
"log",
task=task,
iteration=iter_,
timestamp=timestamp + iter_ * 1000,
msg=f"This is a log message from test task iter {iter_}",
)
for iter_ in range(10)
]
self.send_batch(events)
# test forward navigation
ftime, ltime = None, None
for page in range(2):
ftime, ltime = self._assert_log_events(
task=task, timestamp=ltime, expected_page=page
)
# test backwards navigation
self._assert_log_events(task=task, timestamp=ftime, navigate_earlier=False)
# test order
self._assert_log_events(task=task, order="asc")
def _assert_log_events(
self,
task,
batch_size: int = 5,
timestamp: Optional[int] = None,
expected_total: int = 10,
expected_page: int = 0,
**extra_params,
) -> Tuple[int, int]:
res = self.api.events.get_task_log(
task=task, batch_size=batch_size, from_timestamp=timestamp, **extra_params,
)
self.assertEqual(res.total, expected_total)
expected_events = max(
0, batch_size - max(0, (expected_page + 1) * batch_size - expected_total)
)
self.assertEqual(res.returned, expected_events)
self.assertEqual(len(res.events), expected_events)
unique_events = len({ev.iter for ev in res.events})
self.assertEqual(len(res.events), unique_events)
if res.events:
cmp_operator = operator.ge
if (
not extra_params.get("navigate_earlier", True)
or extra_params.get("order", None) == "asc"
):
cmp_operator = operator.le
self.assertTrue(
all(
cmp_operator(first.timestamp, second.timestamp)
for first, second in zip(res.events, res.events[1:])
)
)
return (
(res.events[0].timestamp, res.events[-1].timestamp)
if res.events
else (None, None)
)
def test_task_metric_value_intervals_keys(self):
metric = "Metric1"
variant = "Variant1"
iter_count = 100
task = self._temp_task()
events = [
{
**self._create_task_event("training_stats_scalar", task, iteration),
"metric": metric,
"variant": variant,
"value": iteration,
}
for iteration in range(iter_count)
]
self.send_batch(events)
for key in None, "iter", "timestamp", "iso_time":
with self.subTest(key=key):
data = self.api.events.scalar_metrics_iter_histogram(
task=task, **(dict(key=key) if key is not None else {})
)
self.assertIn(metric, data)
self.assertIn(variant, data[metric])
self.assertIn("x", data[metric][variant])
self.assertIn("y", data[metric][variant])
def test_multitask_events_many_metrics(self):
tasks = [
self._temp_task(name="test events1"),
self._temp_task(name="test events2"),
]
iter_count = 10
metrics_count = 10
variants_count = 10
events = [
{
**self._create_task_event("training_stats_scalar", task, iteration),
"metric": f"Metric{metric_idx}",
"variant": f"Variant{variant_idx}",
"value": iteration,
}
for iteration in range(iter_count)
for task in tasks
for metric_idx in range(metrics_count)
for variant_idx in range(variants_count)
]
self.send_batch(events)
data = self.api.events.multi_task_scalar_metrics_iter_histogram(tasks=tasks)
self._assert_metrics_and_variants(
data.metrics,
metrics=metrics_count,
variants=variants_count,
tasks=tasks,
iterations=iter_count,
)
def _assert_metrics_and_variants(
self, data: dict, metrics: int, variants: int, tasks: Sequence, iterations: int
):
self.assertEqual(len(data), metrics)
for m in range(metrics):
metric_data = data[f"Metric{m}"]
self.assertEqual(len(metric_data), variants)
for v in range(variants):
variant_data = metric_data[f"Variant{v}"]
self.assertEqual(len(variant_data), len(tasks))
for t in tasks:
task_data = variant_data[t]
self.assertEqual(len(task_data["x"]), iterations)
self.assertEqual(len(task_data["y"]), iterations)
def test_task_metric_raw(self):
metric = "Metric1"
variant = "Variant1"
iter_count = 100
task = self._temp_task()
events = [
{
**self._create_task_event("training_stats_scalar", task, iteration),
"metric": metric,
"variant": variant,
"value": iteration,
}
for iteration in range(iter_count)
]
self.send_batch(events)
batch_size = 15
metric_param = {"metric": metric, "variants": [variant]}
res = self.api.events.scalar_metrics_iter_raw(
task=task, batch_size=batch_size, metric=metric_param, count_total=True
)
self.assertEqual(res.total, len(events))
self.assertTrue(res.scroll_id)
res_iters = []
res_ys = []
calls = 0
while res.returned or calls > 10:
calls += 1
res_iters.extend(res.variants[variant]["iter"])
res_ys.extend(res.variants[variant]["y"])
scroll_id = res.scroll_id
res = self.api.events.scalar_metrics_iter_raw(
task=task, metric=metric_param, scroll_id=scroll_id
)
self.assertEqual(calls, len(events) // batch_size + 1)
self.assertEqual(res_iters, [ev["iter"] for ev in events])
self.assertEqual(res_ys, [ev["value"] for ev in events])
def test_task_metric_value_intervals(self):
metric = "Metric1"
variant = "Variant1"
iter_count = 100
task = self._temp_task()
events = [
{
**self._create_task_event("training_stats_scalar", task, iteration),
"metric": metric,
"variant": variant,
"value": iteration,
}
for iteration in range(iter_count)
]
self.send_batch(events)
data = self.api.events.scalar_metrics_iter_histogram(task=task)
self._assert_metrics_histogram(data[metric][variant], iter_count, 100)
data = self.api.events.scalar_metrics_iter_histogram(task=task, samples=100)
self._assert_metrics_histogram(data[metric][variant], iter_count, 100)
data = self.api.events.scalar_metrics_iter_histogram(task=task, samples=10)
self._assert_metrics_histogram(data[metric][variant], iter_count, 10)
def _assert_metrics_histogram(self, data, iters, samples):
interval = iters // samples
self.assertEqual(len(data["x"]), samples)
self.assertEqual(len(data["y"]), samples)
for curr in range(samples):
self.assertEqual(data["x"][curr], curr * interval)
self.assertEqual(
data["y"][curr],
mean(v for v in range(curr * interval, (curr + 1) * interval)),
)
def test_multitask_plots(self):
task1 = self._temp_task()
events = [
self._create_task_event("plot", task1, 1, metric="A", variant="AX", plot_str="Task1_1_A_AX"),
self._create_task_event("plot", task1, 2, metric="B", variant="BX", plot_str="Task1_2_B_BX"),
self._create_task_event("plot", task1, 3, metric="B", variant="BX", plot_str="Task1_3_B_BX"),
self._create_task_event("plot", task1, 3, metric="C", variant="CX", plot_str="Task1_3_C_CX"),
]
self.send_batch(events)
task2 = self._temp_task()
events = [
self._create_task_event("plot", task2, 1, metric="C", variant="CX", plot_str="Task2_1_C_CX"),
self._create_task_event("plot", task2, 2, metric="A", variant="AY", plot_str="Task2_2_A_AY"),
]
self.send_batch(events)
plots = self.api.events.get_multi_task_plots(tasks=[task1, task2]).plots
self.assertEqual(len(plots), 3)
self.assertEqual(len(plots.A), 2)
self.assertEqual(len(plots.A.AX), 1)
self.assertEqual(len(plots.A.AY), 1)
self.assertEqual(plots.A.AX[task1]["1"]["plots"][0]["plot_str"], "Task1_1_A_AX")
self.assertEqual(plots.A.AY[task2]["2"]["plots"][0]["plot_str"], "Task2_2_A_AY")
self.assertEqual(len(plots.B), 1)
self.assertEqual(len(plots.B.BX), 1)
self.assertEqual(plots.B.BX[task1]["3"]["plots"][0]["plot_str"], "Task1_3_B_BX")
self.assertEqual(len(plots.C), 1)
self.assertEqual(len(plots.C.CX), 2)
self.assertEqual(plots.C.CX[task1]["3"]["plots"][0]["plot_str"], "Task1_3_C_CX")
self.assertEqual(plots.C.CX[task2]["1"]["plots"][0]["plot_str"], "Task2_1_C_CX")
def test_task_plots(self):
task = self._temp_task()
event = self._create_task_event("plot", task, 0)
event["metric"] = "roc"
event.update(
{
"plot_str": json.dumps(
{
"data": [
{
"x": [0, 1, 2, 3, 4, 5, 6, 7, 8],
"y": [0, 1, 2, 3, 4, 5, 6, 7, 8],
"text": [
"Th=0.1",
"Th=0.2",
"Th=0.3",
"Th=0.4",
"Th=0.5",
"Th=0.6",
"Th=0.7",
"Th=0.8",
],
"name": "class1",
},
{
"x": [0, 1, 2, 3, 4, 5, 6, 7, 8],
"y": [2.0, 3.0, 5.0, 8.2, 6.4, 7.5, 9.2, 8.1, 10.0],
"text": [
"Th=0.1",
"Th=0.2",
"Th=0.3",
"Th=0.4",
"Th=0.5",
"Th=0.6",
"Th=0.7",
"Th=0.8",
],
"name": "class2",
},
],
"layout": {
"title": "ROC for iter 0",
"xaxis": {"title": "my x axis"},
"yaxis": {"title": "my y axis"},
},
}
)
}
)
self.send(event)
event1 = self._create_task_event("plot", task, 100)
event1["metric"] = "confusion"
event1.update(
{
"plot_str": json.dumps(
{
"data": [
{
"y": [
"lying",
"sitting",
"standing",
"people",
"backgroun",
],
"x": [
"lying",
"sitting",
"standing",
"people",
"backgroun",
],
"z": [
[758, 163, 0, 0, 23],
[63, 858, 3, 0, 0],
[0, 50, 188, 21, 35],
[0, 22, 8, 40, 4],
[12, 91, 26, 29, 368],
],
"type": "heatmap",
}
],
"layout": {
"title": "Confusion Matrix for iter 100",
"xaxis": {"title": "Predicted value"},
"yaxis": {"title": "Real value"},
},
}
)
}
)
self.send(event1)
plots = self.api.events.get_task_plots(task=task).plots
self.assertEqual(
{e["plot_str"] for e in (event, event1)}, {p.plot_str for p in plots}
)
self.api.tasks.reset(task=task)
plots = self.api.events.get_task_plots(task=task).plots
self.assertEqual(len(plots), 0)
@unittest.skip("this test will run only if 'validate_plot_str' is set to true")
def test_plots_validation(self):
valid_plot_str = json.dumps({"data": []})
invalid_plot_str = "Not a valid json"
task = self._temp_task()
event = self._create_task_event(
"plot", task, 0, metric="test1", plot_str=valid_plot_str
)
event1 = self._create_task_event(
"plot", task, 100, metric="test2", plot_str=invalid_plot_str
)
self.send_batch([event, event1])
res = self.api.events.get_task_plots(task=task).plots
self.assertEqual(len(res), 1)
self.assertEqual(res[0].metric, "test1")
event = self._create_task_event(
"plot",
task,
0,
metric="test1",
plot_str=valid_plot_str,
skip_validation=True,
)
event1 = self._create_task_event(
"plot",
task,
100,
metric="test2",
plot_str=invalid_plot_str,
skip_validation=True,
)
self.send_batch([event, event1])
res = self.api.events.get_task_plots(task=task).plots
self.assertEqual(len(res), 2)
self.assertEqual(set(r.metric for r in res), {"test1", "test2"})
def send_batch(self, events):
_, data = self.api.send_batch("events.add_batch", events)
return data
def send(self, event):
2022-11-29 15:34:06 +00:00
_, data = self.api.send("events.add", event)
return data
if __name__ == "__main__":
unittest.main()