mirror of
https://github.com/clearml/clearml-server
synced 2025-04-27 17:31:25 +00:00
Add reference field to serving models
This commit is contained in:
parent
0b61ec2a56
commit
77e7fb5c13
@ -1,4 +1,5 @@
|
||||
from enum import Enum
|
||||
from typing import Sequence
|
||||
|
||||
from jsonmodels.models import Base
|
||||
from jsonmodels.fields import (
|
||||
@ -6,8 +7,10 @@ from jsonmodels.fields import (
|
||||
EmbeddedField,
|
||||
DateTimeField,
|
||||
IntField,
|
||||
FloatField, BoolField,
|
||||
FloatField,
|
||||
BoolField,
|
||||
)
|
||||
from jsonmodels import validators
|
||||
from jsonmodels.validators import Min
|
||||
|
||||
from apiserver.apimodels import ListField, JsonSerializableMixin
|
||||
@ -16,6 +19,14 @@ from apiserver.config_repo import config
|
||||
from .workers import MachineStats
|
||||
|
||||
|
||||
class ReferenceItem(Base):
|
||||
type = StringField(
|
||||
required=True,
|
||||
validators=validators.Enum("app_id", "app_instance", "model", "task", "url"),
|
||||
)
|
||||
value = StringField(required=True)
|
||||
|
||||
|
||||
class ServingModel(Base):
|
||||
container_id = StringField(required=True)
|
||||
endpoint_name = StringField(required=True)
|
||||
@ -28,12 +39,15 @@ class ServingModel(Base):
|
||||
input_size = IntField()
|
||||
tags = ListField(str)
|
||||
system_tags = ListField(str)
|
||||
reference: Sequence[ReferenceItem] = ListField(ReferenceItem)
|
||||
|
||||
|
||||
class RegisterRequest(ServingModel):
|
||||
timeout = IntField(
|
||||
default=int(config.get("services.serving.default_container_timeout_sec", 10 * 60)),
|
||||
validators=[Min(1)]
|
||||
default=int(
|
||||
config.get("services.serving.default_container_timeout_sec", 10 * 60)
|
||||
),
|
||||
validators=[Min(1)],
|
||||
)
|
||||
""" registration timeout in seconds (default is 10min) """
|
||||
|
||||
@ -84,7 +98,5 @@ class GetEndpointMetricsHistoryRequest(Base):
|
||||
to_date = FloatField(required=True, validators=Min(0))
|
||||
interval = IntField(required=True, validators=Min(1))
|
||||
endpoint_url = StringField(required=True)
|
||||
metric_type = ActualEnumField(
|
||||
MetricType, default=MetricType.requests
|
||||
)
|
||||
metric_type = ActualEnumField(MetricType, default=MetricType.requests)
|
||||
instance_charts = BoolField(default=True)
|
||||
|
@ -207,7 +207,9 @@ class ServingBLL:
|
||||
if not self._count:
|
||||
return None
|
||||
avg = self._total / self._count
|
||||
return round(avg, self.float_precision) if self.float_precision else round(avg)
|
||||
return (
|
||||
round(avg, self.float_precision) if self.float_precision else round(avg)
|
||||
)
|
||||
|
||||
def _get_summary(self, entries: Sequence[ServingContainerEntry]) -> dict:
|
||||
counters = [
|
||||
@ -263,7 +265,9 @@ class ServingBLL:
|
||||
by_url.pop(None, None)
|
||||
return [self._get_summary(url_entries) for url_entries in by_url.values()]
|
||||
|
||||
def _get_endpoint_entries(self, company_id, endpoint_url: Union[str, None]) -> Sequence[ServingContainerEntry]:
|
||||
def _get_endpoint_entries(
|
||||
self, company_id, endpoint_url: Union[str, None]
|
||||
) -> Sequence[ServingContainerEntry]:
|
||||
url_key = self._get_url_key(company_id, endpoint_url)
|
||||
timestamp = int(time())
|
||||
self.redis.zremrangebyscore(url_key, min=0, max=timestamp)
|
||||
@ -328,7 +332,6 @@ class ServingBLL:
|
||||
"endpoint": entry.endpoint_name,
|
||||
"model": entry.model_name,
|
||||
"url": entry.endpoint_url,
|
||||
|
||||
}
|
||||
)
|
||||
|
||||
@ -352,7 +355,10 @@ class ServingBLL:
|
||||
"requests_min": entry.requests_min,
|
||||
"latency_ms": entry.latency_ms,
|
||||
"last_update": self._naive_time(entry.last_activity_time),
|
||||
"reference": [ref.to_struct() for ref in entry.reference]
|
||||
if isinstance(entry.reference, list)
|
||||
else entry.reference,
|
||||
}
|
||||
for entry in entries
|
||||
]
|
||||
],
|
||||
}
|
||||
|
@ -1,13 +1,33 @@
|
||||
_description: "Serving apis"
|
||||
_definitions {
|
||||
include "_workers_common.conf"
|
||||
reference_item {
|
||||
type: object
|
||||
required = [type, value]
|
||||
properties {
|
||||
type {
|
||||
description: The type of the reference item
|
||||
type: string
|
||||
enum: [app_id, app_instance, model, task, url]
|
||||
}
|
||||
value {
|
||||
description: The reference item value
|
||||
type: string
|
||||
}
|
||||
}
|
||||
}
|
||||
reference {
|
||||
description: Array of reference items provided by the container instance. Can contain multiple reference items with the same type
|
||||
type: array
|
||||
items: ${_definitions.reference_item}
|
||||
}
|
||||
serving_model_report {
|
||||
type: object
|
||||
required: [container_id, endpoint_name, model_name]
|
||||
properties {
|
||||
container_id {
|
||||
type: string
|
||||
description: Container ID
|
||||
description: Container ID. Should uniquely identify a specific container instance
|
||||
}
|
||||
endpoint_name {
|
||||
type: string
|
||||
@ -41,6 +61,7 @@ _definitions {
|
||||
type: integer
|
||||
description: Input size in bytes
|
||||
}
|
||||
reference: ${_definitions.reference}
|
||||
}
|
||||
}
|
||||
endpoint_stats {
|
||||
@ -113,6 +134,8 @@ _definitions {
|
||||
format: "date-time"
|
||||
description: The latest time when the container instance sent update
|
||||
}
|
||||
reference: ${_definitions.reference}
|
||||
|
||||
}
|
||||
}
|
||||
serving_model_info {
|
||||
|
@ -9,6 +9,12 @@ class TestServing(TestService):
|
||||
container_id1 = "container_1"
|
||||
container_id2 = "container_2"
|
||||
url = "http://test_url"
|
||||
reference = [
|
||||
{"type": "app_id", "value": "test"},
|
||||
{"type": "app_instance", "value": "abd478c8"},
|
||||
{"type": "model", "value": "262829d3"},
|
||||
{"type": "model", "value": "7ea29c04"},
|
||||
]
|
||||
container_infos = [
|
||||
{
|
||||
"container_id": container_id, # required
|
||||
@ -22,6 +28,7 @@ class TestServing(TestService):
|
||||
"input_size": 9_000_000, # optional right now, bytes
|
||||
"tags": ["tag1", "tag2"], # optional
|
||||
"system_tags": None, # optional
|
||||
**({"reference": reference} if container_id == container_id1 else {}),
|
||||
}
|
||||
for container_id in (container_id1, container_id2)
|
||||
]
|
||||
@ -61,11 +68,15 @@ class TestServing(TestService):
|
||||
"requests",
|
||||
"requests_min",
|
||||
"latency_ms",
|
||||
"reference",
|
||||
)
|
||||
]
|
||||
for inst in details.instances
|
||||
},
|
||||
{"container_1": [1000, 1000, 5, 100], "container_2": [2000, 2000, 10, 200]},
|
||||
{
|
||||
"container_1": [1000, 1000, 5, 100, reference],
|
||||
"container_2": [2000, 2000, 10, 200, []],
|
||||
},
|
||||
)
|
||||
# make sure that the first call did not invalidate anything
|
||||
new_details = self.api.serving.get_endpoint_details(endpoint_url=url)
|
||||
@ -93,7 +104,7 @@ class TestServing(TestService):
|
||||
self.assertEqual(res.computed_interval, 40)
|
||||
self.assertEqual(res.total.title, title)
|
||||
length = len(res.total.dates)
|
||||
self.assertTrue(3>=length>=1)
|
||||
self.assertTrue(3 >= length >= 1)
|
||||
self.assertEqual(len(res.total["values"]), length)
|
||||
self.assertIn(value, res.total["values"])
|
||||
self.assertEqual(set(res.instances), {container_id1, container_id2})
|
||||
|
Loading…
Reference in New Issue
Block a user