mirror of
https://github.com/clearml/clearml-server
synced 2025-04-16 21:41:37 +00:00
Fix workers.activity_report should return 0s for the time when no workers reported
This commit is contained in:
parent
e713e876eb
commit
e08123fcc0
@ -215,6 +215,10 @@ class WorkerStats:
|
|||||||
"date_histogram": {
|
"date_histogram": {
|
||||||
"field": "timestamp",
|
"field": "timestamp",
|
||||||
"fixed_interval": f"{interval}s",
|
"fixed_interval": f"{interval}s",
|
||||||
|
"extended_bounds": {
|
||||||
|
"min": int(from_date) * 1000,
|
||||||
|
"max": int(to_date) * 1000,
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"aggs": {"workers_count": {"cardinality": {"field": "worker"}}},
|
"aggs": {"workers_count": {"cardinality": {"field": "worker"}}},
|
||||||
}
|
}
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
import time
|
import time
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
from datetime import timedelta
|
|
||||||
from typing import Sequence
|
from typing import Sequence
|
||||||
|
|
||||||
from apiserver.apierrors.errors import bad_request
|
from apiserver.apierrors.errors import bad_request
|
||||||
from apiserver.tests.automated import TestService, utc_now_tz_aware
|
from apiserver.tests.automated import TestService
|
||||||
from apiserver.config_repo import config
|
from apiserver.config_repo import config
|
||||||
|
|
||||||
log = config.logger(__file__)
|
log = config.logger(__file__)
|
||||||
@ -84,7 +83,7 @@ class TestWorkersService(TestService):
|
|||||||
self._check_exists(test_worker, False, tags=["test"])
|
self._check_exists(test_worker, False, tags=["test"])
|
||||||
self._check_exists(test_worker, False, tags=["-application"])
|
self._check_exists(test_worker, False, tags=["-application"])
|
||||||
|
|
||||||
def _simulate_workers(self) -> Sequence[str]:
|
def _simulate_workers(self, start: int) -> Sequence[str]:
|
||||||
"""
|
"""
|
||||||
Two workers writing the same metrics. One for 4 seconds. Another one for 2
|
Two workers writing the same metrics. One for 4 seconds. Another one for 2
|
||||||
The first worker reports a task
|
The first worker reports a task
|
||||||
@ -106,7 +105,7 @@ class TestWorkersService(TestService):
|
|||||||
(workers[0],),
|
(workers[0],),
|
||||||
(workers[0],),
|
(workers[0],),
|
||||||
]
|
]
|
||||||
timestamp = int(utc_now_tz_aware().timestamp() * 1000)
|
timestamp = start * 1000
|
||||||
for ws, stats in zip(workers_activity, workers_stats):
|
for ws, stats in zip(workers_activity, workers_stats):
|
||||||
for w, s in zip(ws, stats):
|
for w, s in zip(ws, stats):
|
||||||
data = dict(
|
data = dict(
|
||||||
@ -130,7 +129,7 @@ class TestWorkersService(TestService):
|
|||||||
return task_id
|
return task_id
|
||||||
|
|
||||||
def test_get_keys(self):
|
def test_get_keys(self):
|
||||||
workers = self._simulate_workers()
|
workers = self._simulate_workers(int(time.time()))
|
||||||
time.sleep(5) # give to es time to refresh
|
time.sleep(5) # give to es time to refresh
|
||||||
res = self.api.workers.get_metric_keys(worker_ids=workers)
|
res = self.api.workers.get_metric_keys(worker_ids=workers)
|
||||||
assert {"cpu", "memory"} == set(c.name for c in res["categories"])
|
assert {"cpu", "memory"} == set(c.name for c in res["categories"])
|
||||||
@ -147,12 +146,12 @@ class TestWorkersService(TestService):
|
|||||||
self.api.workers.get_metric_keys(worker_ids=["Non existing worker id"])
|
self.api.workers.get_metric_keys(worker_ids=["Non existing worker id"])
|
||||||
|
|
||||||
def test_get_stats(self):
|
def test_get_stats(self):
|
||||||
workers = self._simulate_workers()
|
start = int(time.time())
|
||||||
|
workers = self._simulate_workers(start)
|
||||||
to_date = utc_now_tz_aware() + timedelta(seconds=10)
|
|
||||||
from_date = to_date - timedelta(days=1)
|
|
||||||
|
|
||||||
time.sleep(5) # give to ES time to refresh
|
time.sleep(5) # give to ES time to refresh
|
||||||
|
from_date = start
|
||||||
|
to_date = start + 10
|
||||||
# no variants
|
# no variants
|
||||||
res = self.api.workers.get_stats(
|
res = self.api.workers.get_stats(
|
||||||
items=[
|
items=[
|
||||||
@ -161,8 +160,8 @@ class TestWorkersService(TestService):
|
|||||||
dict(key="memory_used", aggregation="max"),
|
dict(key="memory_used", aggregation="max"),
|
||||||
dict(key="memory_used", aggregation="min"),
|
dict(key="memory_used", aggregation="min"),
|
||||||
],
|
],
|
||||||
from_date=from_date.timestamp(),
|
from_date=from_date,
|
||||||
to_date=to_date.timestamp(),
|
to_date=to_date,
|
||||||
# split_by_variant=True,
|
# split_by_variant=True,
|
||||||
interval=1,
|
interval=1,
|
||||||
worker_ids=workers,
|
worker_ids=workers,
|
||||||
@ -186,8 +185,8 @@ class TestWorkersService(TestService):
|
|||||||
# split by variants
|
# split by variants
|
||||||
res = self.api.workers.get_stats(
|
res = self.api.workers.get_stats(
|
||||||
items=[dict(key="cpu_usage", aggregation="avg")],
|
items=[dict(key="cpu_usage", aggregation="avg")],
|
||||||
from_date=from_date.timestamp(),
|
from_date=from_date,
|
||||||
to_date=to_date.timestamp(),
|
to_date=to_date,
|
||||||
split_by_variant=True,
|
split_by_variant=True,
|
||||||
interval=1,
|
interval=1,
|
||||||
worker_ids=workers,
|
worker_ids=workers,
|
||||||
@ -204,8 +203,8 @@ class TestWorkersService(TestService):
|
|||||||
|
|
||||||
res = self.api.workers.get_stats(
|
res = self.api.workers.get_stats(
|
||||||
items=[dict(key="cpu_usage", aggregation="avg")],
|
items=[dict(key="cpu_usage", aggregation="avg")],
|
||||||
from_date=from_date.timestamp(),
|
from_date=from_date,
|
||||||
to_date=to_date.timestamp(),
|
to_date=to_date,
|
||||||
interval=1,
|
interval=1,
|
||||||
worker_ids=["Non existing worker id"],
|
worker_ids=["Non existing worker id"],
|
||||||
)
|
)
|
||||||
@ -223,29 +222,19 @@ class TestWorkersService(TestService):
|
|||||||
# to_timestamp=to_timestamp.timestamp(),
|
# to_timestamp=to_timestamp.timestamp(),
|
||||||
# interval=20,
|
# interval=20,
|
||||||
# )
|
# )
|
||||||
|
start = int(time.time())
|
||||||
self._simulate_workers()
|
self._simulate_workers(int(time.time()))
|
||||||
|
|
||||||
to_date = utc_now_tz_aware() + timedelta(seconds=10)
|
|
||||||
from_date = to_date - timedelta(minutes=1)
|
|
||||||
|
|
||||||
time.sleep(5) # give to es time to refresh
|
time.sleep(5) # give to es time to refresh
|
||||||
# no variants
|
# no variants
|
||||||
res = self.api.workers.get_activity_report(
|
res = self.api.workers.get_activity_report(
|
||||||
from_date=from_date.timestamp(), to_date=to_date.timestamp(), interval=20
|
from_date=start, to_date=start + 10, interval=2
|
||||||
)
|
)
|
||||||
self.assertWorkerSeries(res["total"], 2)
|
self.assertWorkerSeries(res["total"], 2, 5)
|
||||||
self.assertWorkerSeries(res["active"], 1)
|
self.assertWorkerSeries(res["active"], 1, 5)
|
||||||
self.assertTotalSeriesGreaterThenActive(res["total"], res["active"])
|
|
||||||
|
|
||||||
@staticmethod
|
def assertWorkerSeries(self, series_data: dict, count: int, size: int):
|
||||||
def assertTotalSeriesGreaterThenActive(total_data: dict, active_data: dict):
|
self.assertEqual(len(series_data["dates"]), size)
|
||||||
assert total_data["dates"][-1] == active_data["dates"][-1]
|
self.assertEqual(len(series_data["counts"]), size)
|
||||||
assert total_data["counts"][-1] > active_data["counts"][-1]
|
self.assertTrue(any(c == count for c in series_data["counts"]))
|
||||||
|
self.assertTrue(all(c <= count for c in series_data["counts"]))
|
||||||
@staticmethod
|
|
||||||
def assertWorkerSeries(series_data: dict, min_count: int):
|
|
||||||
assert len(series_data["dates"]) == len(series_data["counts"])
|
|
||||||
# check the last 20s aggregation
|
|
||||||
# there may be more workers that we created since we are not filtering by test workers here
|
|
||||||
assert series_data["counts"][-1] >= min_count
|
|
||||||
|
Loading…
Reference in New Issue
Block a user