clearml-server/apiserver/tests/automated/test_task_events.py

766 lines
29 KiB
Python

import json
import operator
import unittest
from statistics import mean
from typing import Sequence, Optional, Tuple
from boltons.iterutils import first
from apiserver.apierrors import errors
from apiserver.es_factory import es_factory
from apiserver.apierrors.errors.bad_request import EventsNotAdded
from apiserver.tests.automated import TestService
class TestTaskEvents(TestService):
delete_params = dict(can_fail=True, force=True)
default_task_name = "test task events"
def _temp_project(self, name=default_task_name):
return self.create_temp(
"projects",
name=name,
description="test",
delete_params=self.delete_params,
)
def _temp_task(self, name=default_task_name, **kwargs):
self.update_missing(kwargs, name=name, type="training")
return self.create_temp(
"tasks", delete_paramse=self.delete_params, **kwargs
)
def _temp_model(self, name="test model events", **kwargs):
self.update_missing(kwargs, name=name, uri="file:///a/b", labels={})
return self.create_temp("models", delete_params=self.delete_params, **kwargs)
@staticmethod
def _create_task_event(type_, task, iteration, **kwargs):
return {
"worker": "test",
"type": type_,
"task": task,
"iter": iteration,
"timestamp": kwargs.get("timestamp") or es_factory.get_timestamp_millis(),
**kwargs,
}
def test_task_metrics(self):
tasks = {
self._temp_task(): {
"Metric1": ["training_debug_image"],
"Metric2": ["training_debug_image", "log"],
},
self._temp_task(): {"Metric3": ["training_debug_image"]},
}
events = [
self._create_task_event(
event_type,
task=task,
iteration=1,
metric=metric,
variant="Test variant",
)
for task, metrics in tasks.items()
for metric, event_types in metrics.items()
for event_type in event_types
]
self.send_batch(events)
self._assert_task_metrics(tasks, "training_debug_image")
self._assert_task_metrics(tasks, "log")
self._assert_task_metrics(tasks, "training_stats_scalar")
self._assert_multitask_metrics(
tasks=list(tasks), metrics=["Metric1", "Metric2", "Metric3"]
)
self._assert_multitask_metrics(
tasks=list(tasks),
event_type="training_debug_image",
metrics=["Metric1", "Metric2", "Metric3"],
)
self._assert_multitask_metrics(tasks=list(tasks), event_type="plot", metrics=[])
def _assert_multitask_metrics(
self, tasks: Sequence[str], metrics: Sequence[str], event_type: str = None
):
res = self.api.events.get_multi_task_metrics(
tasks=tasks,
**({"event_type": event_type} if event_type else {}),
).metrics
self.assertEqual([r.metric for r in res], metrics)
self.assertTrue(all(r.variants == ["Test variant"] for r in res))
def _assert_task_metrics(self, tasks: dict, event_type: str):
res = self.api.events.get_task_metrics(tasks=list(tasks), event_type=event_type)
for task, metrics in tasks.items():
res_metrics = next(
(tm.metrics for tm in res.metrics if tm.task == task), ()
)
self.assertEqual(
set(res_metrics),
set(
metric for metric, events in metrics.items() if event_type in events
),
)
def test_task_single_value_metrics(self):
metric = "Metric1"
variant = "Variant1"
iter_count = 10
task = self._temp_task()
special_iteration = -(2 ** 31)
events = [
{
**self._create_task_event(
"training_stats_scalar", task, iteration or special_iteration
),
"metric": metric,
"variant": variant,
"value": iteration,
}
for iteration in range(iter_count)
]
self.send_batch(events)
# special iteration is present in the events retrieval
metric_param = {"metric": metric, "variants": [variant]}
res = self.api.events.scalar_metrics_iter_raw(
task=task, batch_size=100, metric=metric_param, count_total=True
)
self.assertEqual(res.returned, iter_count)
self.assertEqual(res.total, iter_count)
self.assertEqual(
res.variants[variant]["iter"],
[x or special_iteration for x in range(iter_count)],
)
self.assertEqual(res.variants[variant]["y"], list(range(iter_count)))
# but not in the histogram
data = self.api.events.scalar_metrics_iter_histogram(task=task)
self.assertEqual(data[metric][variant]["x"], list(range(1, iter_count)))
# new api
res = self.api.events.get_task_single_value_metrics(tasks=[task]).tasks
self.assertEqual(len(res), 1)
data = res[0]
self.assertEqual(data.task, task)
self.assertEqual(data.task_name, self.default_task_name)
self.assertEqual(len(data["values"]), 1)
value = data["values"][0]
self.assertEqual(value.metric, metric)
self.assertEqual(value.variant, variant)
self.assertEqual(value.value, 0)
# test metrics parameter
res = self.api.events.get_task_single_value_metrics(
tasks=[task], metrics=[{"metric": metric, "variants": [variant]}]
).tasks
self.assertEqual(len(res), 1)
res = self.api.events.get_task_single_value_metrics(
tasks=[task], metrics=[{"metric": "non_existing", "variants": [variant]}]
).tasks
self.assertEqual(len(res), 0)
# update is working
task_data = self.api.tasks.get_by_id(task=task).task
last_metrics = first(first(task_data.last_metrics.values()).values())
self.assertEqual(last_metrics.value, iter_count - 1)
new_value = 1000
new_event = {
**self._create_task_event("training_stats_scalar", task, special_iteration),
"metric": metric,
"variant": variant,
"value": new_value,
}
self.send(new_event)
res = self.api.events.scalar_metrics_iter_raw(
task=task, batch_size=100, metric=metric_param, count_total=True
)
self.assertEqual(
res.variants[variant]["y"], [y or new_value for y in range(iter_count)],
)
task_data = self.api.tasks.get_by_id(task=task).task
last_metrics = first(first(task_data.last_metrics.values()).values())
self.assertEqual(last_metrics.value, new_value)
data = self.api.events.get_task_single_value_metrics(tasks=[task]).tasks[0]
self.assertEqual(data.task, task)
self.assertEqual(data.task_name, self.default_task_name)
self.assertEqual(len(data["values"]), 1)
value = data["values"][0]
self.assertEqual(value.value, new_value)
def test_last_scalar_metrics(self):
metric = "Metric1"
for variant in ("Variant1", None):
iter_count = 100
task = self._temp_task()
events = [
{
**self._create_task_event("training_stats_scalar", task, iteration),
"metric": metric,
"variant": variant,
"value": iteration,
}
for iteration in range(iter_count)
]
# send 2 batches to check the interaction with already stored db value
# each batch contains multiple iterations
self.send_batch(events[:50])
self.send_batch(events[50:])
task_data = self.api.tasks.get_by_id(task=task).task
metric_data = first(first(task_data.last_metrics.values()).values())
self.assertEqual(iter_count - 1, metric_data.value)
self.assertEqual(iter_count - 1, metric_data.max_value)
self.assertEqual(iter_count - 1, metric_data.max_value_iteration)
self.assertEqual(0, metric_data.min_value)
self.assertEqual(0, metric_data.min_value_iteration)
res = self.api.events.get_task_latest_scalar_values(task=task)
self.assertEqual(iter_count - 1, res.last_iter)
def test_model_events(self):
model = self._temp_model(ready=False)
# task log events are not allowed
log_event = self._create_task_event(
"log",
task=model,
iteration=0,
msg=f"This is a log message",
model_event=True,
)
with self.api.raises(errors.bad_request.EventsNotAdded):
self.send(log_event)
# mixed batch
events = [
{
**self._create_task_event("training_stats_scalar", model, iteration),
"metric": f"Metric{metric_idx}",
"variant": f"Variant{variant_idx}",
"value": iteration,
"model_event": True,
}
for iteration in range(2)
for metric_idx in range(5)
for variant_idx in range(5)
]
task = self._temp_task()
# noinspection PyTypeChecker
events.append(
self._create_task_event(
"log",
task=task,
iteration=0,
msg=f"This is a log message",
metric="Metric0",
variant="Variant0",
allow_locked=True,
)
)
self.send_batch(events)
data = self.api.events.scalar_metrics_iter_histogram(
task=model, model_events=True
)
self.assertEqual(list(data), [f"Metric{idx}" for idx in range(5)])
metric_data = data.Metric0
self.assertEqual(list(metric_data), [f"Variant{idx}" for idx in range(5)])
variant_data = metric_data.Variant0
self.assertEqual(variant_data.x, [0, 1])
self.assertEqual(variant_data.y, [0.0, 1.0])
model_data = self.api.models.get_all_ex(
id=[model], only_fields=["last_metrics", "last_iteration"]
).models[0]
metric_data = first(first(model_data.last_metrics.values()).values())
self.assertEqual(1, model_data.last_iteration)
self.assertEqual(1, metric_data.value)
self.assertEqual(1, metric_data.max_value)
self.assertEqual(1, metric_data.max_value_iteration)
self.assertEqual(0, metric_data.min_value)
self.assertEqual(0, metric_data.min_value_iteration)
self._assert_log_events(task=task, expected_total=1)
metrics = self.api.events.get_multi_task_metrics(
tasks=[model],
event_type="training_stats_scalar",
model_events=True,
).metrics
self.assertEqual([m.metric for m in metrics], [f"Metric{i}" for i in range(5)])
variants = [f"Variant{i}" for i in range(5)]
self.assertTrue(all(m.variants == variants for m in metrics))
def test_error_events(self):
task = self._temp_task()
events = [
self._create_task_event("unknown type", task, iteration=1),
self._create_task_event("training_debug_image", task=None, iteration=1),
self._create_task_event(
"training_debug_image", task="Invalid task", iteration=1
),
]
# failure if no events added
with self.api.raises(EventsNotAdded):
self.send_batch(events)
events.append(
self._create_task_event("training_debug_image", task=task, iteration=1)
)
# success if at least one event added
res = self.send_batch(events)
self.assertEqual(res["added"], 1)
self.assertEqual(res["errors"], 3)
self.assertEqual(len(res["errors_info"]), 3)
res = self.api.events.get_task_events(task=task)
self.assertEqual(len(res.events), 1)
def test_task_logs(self):
task = self._temp_task()
timestamp = es_factory.get_timestamp_millis()
events = [
self._create_task_event(
"log",
task=task,
iteration=iter_,
timestamp=timestamp + iter_ * 1000,
msg=f"This is a log message from test task iter {iter_}",
)
for iter_ in range(10)
]
self.send_batch(events)
# test forward navigation
ftime, ltime = None, None
for page in range(2):
ftime, ltime = self._assert_log_events(
task=task, timestamp=ltime, expected_page=page
)
# test backwards navigation
self._assert_log_events(task=task, timestamp=ftime, navigate_earlier=False)
# test order
self._assert_log_events(task=task, order="asc")
def _assert_log_events(
self,
task,
batch_size: int = 5,
timestamp: Optional[int] = None,
expected_total: int = 10,
expected_page: int = 0,
**extra_params,
) -> Tuple[int, int]:
res = self.api.events.get_task_log(
task=task, batch_size=batch_size, from_timestamp=timestamp, **extra_params,
)
self.assertEqual(res.total, expected_total)
expected_events = max(
0, batch_size - max(0, (expected_page + 1) * batch_size - expected_total)
)
self.assertEqual(res.returned, expected_events)
self.assertEqual(len(res.events), expected_events)
unique_events = len({ev.iter for ev in res.events})
self.assertEqual(len(res.events), unique_events)
if res.events:
cmp_operator = operator.ge
if (
not extra_params.get("navigate_earlier", True)
or extra_params.get("order", None) == "asc"
):
cmp_operator = operator.le
self.assertTrue(
all(
cmp_operator(first.timestamp, second.timestamp)
for first, second in zip(res.events, res.events[1:])
)
)
return (
(res.events[0].timestamp, res.events[-1].timestamp)
if res.events
else (None, None)
)
def test_task_unique_metric_variants(self):
project = self._temp_project()
task1 = self._temp_task(project=project)
task2 = self._temp_task(project=project)
metric1 = "Metric1"
metric2 = "Metric2"
events = [
{
**self._create_task_event("training_stats_scalar", task, 0),
"metric": metric,
"variant": "Variant",
"value": 10,
}
for task, metric in ((task1, metric1), (task2, metric2))
]
self.send_batch(events)
metrics = self.api.projects.get_unique_metric_variants(project=project).metrics
self.assertEqual({m.metric for m in metrics}, {metric1, metric2})
metrics = self.api.projects.get_unique_metric_variants(ids=[task1, task2]).metrics
self.assertEqual({m.metric for m in metrics}, {metric1, metric2})
metrics = self.api.projects.get_unique_metric_variants(ids=[task1]).metrics
self.assertEqual([m.metric for m in metrics], [metric1])
def test_task_metric_value_intervals_keys(self):
metric = "Metric1"
variant = "Variant1"
iter_count = 100
task = self._temp_task()
events = [
{
**self._create_task_event("training_stats_scalar", task, iteration),
"metric": metric,
"variant": variant,
"value": iteration,
}
for iteration in range(iter_count)
]
self.send_batch(events)
for key in None, "iter", "timestamp", "iso_time":
with self.subTest(key=key):
data = self.api.events.scalar_metrics_iter_histogram(
task=task, **(dict(key=key) if key is not None else {})
)
self.assertIn(metric, data)
self.assertIn(variant, data[metric])
self.assertIn("x", data[metric][variant])
self.assertIn("y", data[metric][variant])
def test_multitask_events_many_metrics(self):
tasks = [
self._temp_task(name="test events1"),
self._temp_task(name="test events2"),
]
iter_count = 10
metrics_count = 10
variants_count = 10
events = [
{
**self._create_task_event("training_stats_scalar", task, iteration),
"metric": f"Metric{metric_idx}",
"variant": f"Variant{variant_idx}",
"value": iteration,
}
for iteration in range(iter_count)
for task in tasks
for metric_idx in range(metrics_count)
for variant_idx in range(variants_count)
]
self.send_batch(events)
data = self.api.events.multi_task_scalar_metrics_iter_histogram(tasks=tasks)
self._assert_metrics_and_variants(
data.metrics,
metrics=metrics_count,
variants=variants_count,
tasks=tasks,
iterations=iter_count,
)
# test metrics
data = self.api.events.multi_task_scalar_metrics_iter_histogram(
tasks=tasks,
metrics=[
{
"metric": f"Metric{m_idx}",
"variants": [f"Variant{v_idx}" for v_idx in range(4)],
}
for m_idx in range(2)
],
)
self._assert_metrics_and_variants(
data.metrics,
metrics=2,
variants=4,
tasks=tasks,
iterations=iter_count,
)
def _assert_metrics_and_variants(
self, data: dict, metrics: int, variants: int, tasks: Sequence, iterations: int
):
self.assertEqual(len(data), metrics)
for m in range(metrics):
metric_data = data[f"Metric{m}"]
self.assertEqual(len(metric_data), variants)
for v in range(variants):
variant_data = metric_data[f"Variant{v}"]
self.assertEqual(len(variant_data), len(tasks))
for t in tasks:
task_data = variant_data[t]
self.assertEqual(len(task_data["x"]), iterations)
self.assertEqual(len(task_data["y"]), iterations)
def test_task_metric_raw(self):
metric = "Metric1"
variant = "Variant1"
iter_count = 100
task = self._temp_task()
events = [
{
**self._create_task_event("training_stats_scalar", task, iteration),
"metric": metric,
"variant": variant,
"value": iteration,
}
for iteration in range(iter_count)
]
self.send_batch(events)
batch_size = 15
metric_param = {"metric": metric, "variants": [variant]}
res = self.api.events.scalar_metrics_iter_raw(
task=task, batch_size=batch_size, metric=metric_param, count_total=True
)
self.assertEqual(res.total, len(events))
self.assertTrue(res.scroll_id)
res_iters = []
res_ys = []
calls = 0
while res.returned or calls > 10:
calls += 1
res_iters.extend(res.variants[variant]["iter"])
res_ys.extend(res.variants[variant]["y"])
scroll_id = res.scroll_id
res = self.api.events.scalar_metrics_iter_raw(
task=task, metric=metric_param, scroll_id=scroll_id
)
self.assertEqual(calls, len(events) // batch_size + 1)
self.assertEqual(res_iters, [ev["iter"] for ev in events])
self.assertEqual(res_ys, [ev["value"] for ev in events])
def test_task_metric_value_intervals(self):
metric = "Metric1"
variant = "Variant1"
iter_count = 100
task = self._temp_task()
events = [
{
**self._create_task_event("training_stats_scalar", task, iteration),
"metric": metric,
"variant": variant,
"value": iteration,
}
for iteration in range(iter_count)
]
self.send_batch(events)
data = self.api.events.scalar_metrics_iter_histogram(task=task)
self._assert_metrics_histogram(data[metric][variant], iter_count, 100)
data = self.api.events.scalar_metrics_iter_histogram(task=task, samples=100)
self._assert_metrics_histogram(data[metric][variant], iter_count, 100)
data = self.api.events.scalar_metrics_iter_histogram(task=task, samples=10)
self._assert_metrics_histogram(data[metric][variant], iter_count, 10)
def _assert_metrics_histogram(self, data, iters, samples):
interval = iters // samples
self.assertEqual(len(data["x"]), samples)
self.assertEqual(len(data["y"]), samples)
for curr in range(samples):
self.assertEqual(data["x"][curr], curr * interval)
self.assertEqual(
data["y"][curr],
mean(v for v in range(curr * interval, (curr + 1) * interval)),
)
def test_multitask_plots(self):
task1 = self._temp_task()
events = [
self._create_task_event("plot", task1, 1, metric="A", variant="AX", plot_str="Task1_1_A_AX"),
self._create_task_event("plot", task1, 2, metric="B", variant="BX", plot_str="Task1_2_B_BX"),
self._create_task_event("plot", task1, 3, metric="B", variant="BX", plot_str="Task1_3_B_BX"),
self._create_task_event("plot", task1, 3, metric="C", variant="CX", plot_str="Task1_3_C_CX"),
]
self.send_batch(events)
task2 = self._temp_task()
events = [
self._create_task_event("plot", task2, 1, metric="C", variant="CX", plot_str="Task2_1_C_CX"),
self._create_task_event("plot", task2, 2, metric="A", variant="AY", plot_str="Task2_2_A_AY"),
]
self.send_batch(events)
plots = self.api.events.get_multi_task_plots(tasks=[task1, task2]).plots
self.assertEqual(len(plots), 3)
self.assertEqual(len(plots.A), 2)
self.assertEqual(len(plots.A.AX), 1)
self.assertEqual(len(plots.A.AY), 1)
self.assertEqual(plots.A.AX[task1]["1"]["plots"][0]["plot_str"], "Task1_1_A_AX")
self.assertEqual(plots.A.AY[task2]["2"]["plots"][0]["plot_str"], "Task2_2_A_AY")
self.assertEqual(len(plots.B), 1)
self.assertEqual(len(plots.B.BX), 1)
self.assertEqual(plots.B.BX[task1]["3"]["plots"][0]["plot_str"], "Task1_3_B_BX")
self.assertEqual(len(plots.C), 1)
self.assertEqual(len(plots.C.CX), 2)
self.assertEqual(plots.C.CX[task1]["3"]["plots"][0]["plot_str"], "Task1_3_C_CX")
self.assertEqual(plots.C.CX[task2]["1"]["plots"][0]["plot_str"], "Task2_1_C_CX")
# test metrics
plots = self.api.events.get_multi_task_plots(
tasks=[task1, task2], metrics=[{"metric": "A"}]
).plots
self.assertEqual(len(plots), 1)
self.assertEqual(len(plots.A), 2)
def test_task_plots(self):
task = self._temp_task()
event = self._create_task_event("plot", task, 0)
event["metric"] = "roc"
event.update(
{
"plot_str": json.dumps(
{
"data": [
{
"x": [0, 1, 2, 3, 4, 5, 6, 7, 8],
"y": [0, 1, 2, 3, 4, 5, 6, 7, 8],
"text": [
"Th=0.1",
"Th=0.2",
"Th=0.3",
"Th=0.4",
"Th=0.5",
"Th=0.6",
"Th=0.7",
"Th=0.8",
],
"name": "class1",
},
{
"x": [0, 1, 2, 3, 4, 5, 6, 7, 8],
"y": [2.0, 3.0, 5.0, 8.2, 6.4, 7.5, 9.2, 8.1, 10.0],
"text": [
"Th=0.1",
"Th=0.2",
"Th=0.3",
"Th=0.4",
"Th=0.5",
"Th=0.6",
"Th=0.7",
"Th=0.8",
],
"name": "class2",
},
],
"layout": {
"title": "ROC for iter 0",
"xaxis": {"title": "my x axis"},
"yaxis": {"title": "my y axis"},
},
}
)
}
)
self.send(event)
event1 = self._create_task_event("plot", task, 100)
event1["metric"] = "confusion"
event1.update(
{
"plot_str": json.dumps(
{
"data": [
{
"y": [
"lying",
"sitting",
"standing",
"people",
"backgroun",
],
"x": [
"lying",
"sitting",
"standing",
"people",
"backgroun",
],
"z": [
[758, 163, 0, 0, 23],
[63, 858, 3, 0, 0],
[0, 50, 188, 21, 35],
[0, 22, 8, 40, 4],
[12, 91, 26, 29, 368],
],
"type": "heatmap",
}
],
"layout": {
"title": "Confusion Matrix for iter 100",
"xaxis": {"title": "Predicted value"},
"yaxis": {"title": "Real value"},
},
}
)
}
)
self.send(event1)
plots = self.api.events.get_task_plots(task=task).plots
self.assertEqual(
{e["plot_str"] for e in (event, event1)}, {p.plot_str for p in plots}
)
self.api.tasks.reset(task=task)
plots = self.api.events.get_task_plots(task=task).plots
self.assertEqual(len(plots), 0)
@unittest.skip("this test will run only if 'validate_plot_str' is set to true")
def test_plots_validation(self):
valid_plot_str = json.dumps({"data": []})
invalid_plot_str = "Not a valid json"
task = self._temp_task()
event = self._create_task_event(
"plot", task, 0, metric="test1", plot_str=valid_plot_str
)
event1 = self._create_task_event(
"plot", task, 100, metric="test2", plot_str=invalid_plot_str
)
self.send_batch([event, event1])
res = self.api.events.get_task_plots(task=task).plots
self.assertEqual(len(res), 1)
self.assertEqual(res[0].metric, "test1")
event = self._create_task_event(
"plot",
task,
0,
metric="test1",
plot_str=valid_plot_str,
skip_validation=True,
)
event1 = self._create_task_event(
"plot",
task,
100,
metric="test2",
plot_str=invalid_plot_str,
skip_validation=True,
)
self.send_batch([event, event1])
res = self.api.events.get_task_plots(task=task).plots
self.assertEqual(len(res), 2)
self.assertEqual(set(r.metric for r in res), {"test1", "test2"})
def send_batch(self, events):
_, data = self.api.send_batch("events.add_batch", events)
return data
def send(self, event):
_, data = self.api.send("events.add", event)
return data
if __name__ == "__main__":
unittest.main()