2024-12-05 20:20:11 +00:00
|
|
|
from time import time, sleep
|
|
|
|
|
|
|
|
from apiserver.apierrors import errors
|
|
|
|
from apiserver.tests.automated import TestService
|
|
|
|
|
|
|
|
|
|
|
|
class TestServing(TestService):
|
|
|
|
def test_status_report(self):
|
|
|
|
container_id1 = "container_1"
|
|
|
|
container_id2 = "container_2"
|
|
|
|
url = "http://test_url"
|
2024-12-05 20:24:18 +00:00
|
|
|
reference = [
|
|
|
|
{"type": "app_id", "value": "test"},
|
|
|
|
{"type": "app_instance", "value": "abd478c8"},
|
|
|
|
{"type": "model", "value": "262829d3"},
|
|
|
|
{"type": "model", "value": "7ea29c04"},
|
|
|
|
]
|
2024-12-05 20:20:11 +00:00
|
|
|
container_infos = [
|
|
|
|
{
|
|
|
|
"container_id": container_id, # required
|
|
|
|
"endpoint_name": "my endpoint", # required
|
|
|
|
"endpoint_url": url, # can be omitted for register but required for status report
|
|
|
|
"model_name": "my model", # required
|
|
|
|
"model_source": "s3//my_bucket", # optional right now
|
|
|
|
"model_version": "3.1.0", # optional right now
|
|
|
|
"preprocess_artifact": "some string here", # optional right now
|
|
|
|
"input_type": "another string here", # optional right now
|
|
|
|
"input_size": 9_000_000, # optional right now, bytes
|
|
|
|
"tags": ["tag1", "tag2"], # optional
|
|
|
|
"system_tags": None, # optional
|
2024-12-05 20:24:18 +00:00
|
|
|
**({"reference": reference} if container_id == container_id1 else {}),
|
2024-12-05 20:20:11 +00:00
|
|
|
}
|
|
|
|
for container_id in (container_id1, container_id2)
|
|
|
|
]
|
|
|
|
|
2024-12-05 20:21:12 +00:00
|
|
|
# registering instances
|
2024-12-05 20:20:11 +00:00
|
|
|
for container_info in container_infos:
|
|
|
|
self.api.serving.register_container(
|
|
|
|
**container_info,
|
|
|
|
timeout=100, # expiration timeout in seconds. Optional, the default value is 600
|
|
|
|
)
|
|
|
|
for idx, container_info in enumerate(container_infos):
|
|
|
|
mul = idx + 1
|
|
|
|
self.api.serving.container_status_report(
|
|
|
|
**container_info,
|
|
|
|
uptime_sec=1000 * mul,
|
|
|
|
requests_num=1000 * mul,
|
|
|
|
requests_min=5 * mul, # requests per minute
|
|
|
|
latency_ms=100 * mul, # average latency
|
2024-12-05 20:21:12 +00:00
|
|
|
machine_stats={ # the same structure here as used by worker status_reports
|
2024-12-05 20:20:11 +00:00
|
|
|
"cpu_usage": [10, 20],
|
|
|
|
"memory_used": 50,
|
2024-12-05 20:21:12 +00:00
|
|
|
},
|
2024-12-05 20:20:11 +00:00
|
|
|
)
|
2024-12-05 20:21:12 +00:00
|
|
|
|
|
|
|
# getting endpoints and endpoint details
|
2024-12-05 20:20:11 +00:00
|
|
|
endpoints = self.api.serving.get_endpoints().endpoints
|
2024-12-05 20:21:12 +00:00
|
|
|
self.assertTrue(any(e for e in endpoints if e.url == url))
|
2024-12-05 20:20:11 +00:00
|
|
|
details = self.api.serving.get_endpoint_details(endpoint_url=url)
|
2024-12-05 20:21:12 +00:00
|
|
|
self.assertEqual(details.url, url)
|
|
|
|
self.assertEqual(details.uptime_sec, 2000)
|
|
|
|
self.assertEqual(
|
|
|
|
{
|
|
|
|
inst.id: [
|
|
|
|
inst[field]
|
|
|
|
for field in (
|
|
|
|
"uptime_sec",
|
|
|
|
"requests",
|
|
|
|
"requests_min",
|
|
|
|
"latency_ms",
|
2024-12-05 20:24:18 +00:00
|
|
|
"reference",
|
2024-12-05 20:21:12 +00:00
|
|
|
)
|
|
|
|
]
|
|
|
|
for inst in details.instances
|
|
|
|
},
|
2024-12-05 20:24:18 +00:00
|
|
|
{
|
|
|
|
"container_1": [1000, 1000, 5, 100, reference],
|
|
|
|
"container_2": [2000, 2000, 10, 200, []],
|
|
|
|
},
|
2024-12-05 20:21:12 +00:00
|
|
|
)
|
|
|
|
# make sure that the first call did not invalidate anything
|
|
|
|
new_details = self.api.serving.get_endpoint_details(endpoint_url=url)
|
|
|
|
self.assertEqual(details, new_details)
|
2024-12-05 20:20:11 +00:00
|
|
|
|
2024-12-05 20:21:12 +00:00
|
|
|
# charts
|
2024-12-05 20:20:11 +00:00
|
|
|
sleep(5) # give time to ES to accomodate data
|
|
|
|
to_date = int(time()) + 40
|
|
|
|
from_date = to_date - 100
|
2024-12-05 20:21:12 +00:00
|
|
|
for metric_type, title, value in (
|
|
|
|
(None, "Number of Requests", 3000),
|
|
|
|
("requests_min", "Requests per Minute", 15),
|
|
|
|
("latency_ms", "Average Latency (ms)", 150),
|
|
|
|
("cpu_count", "CPU Count", 4),
|
|
|
|
("cpu_util", "Average CPU Load (%)", 15),
|
|
|
|
("ram_total", "RAM Total (GB)", 100),
|
|
|
|
):
|
|
|
|
res = self.api.serving.get_endpoint_metrics_history(
|
|
|
|
endpoint_url=url,
|
|
|
|
from_date=from_date,
|
|
|
|
to_date=to_date,
|
|
|
|
interval=1,
|
|
|
|
**({"metric_type": metric_type} if metric_type else {}),
|
|
|
|
)
|
|
|
|
self.assertEqual(res.computed_interval, 40)
|
|
|
|
self.assertEqual(res.total.title, title)
|
|
|
|
length = len(res.total.dates)
|
2024-12-05 20:24:18 +00:00
|
|
|
self.assertTrue(3 >= length >= 1)
|
2024-12-05 20:21:12 +00:00
|
|
|
self.assertEqual(len(res.total["values"]), length)
|
|
|
|
self.assertIn(value, res.total["values"])
|
|
|
|
self.assertEqual(set(res.instances), {container_id1, container_id2})
|
|
|
|
for inst in res.instances.values():
|
|
|
|
self.assertEqual(inst.dates, res.total.dates)
|
|
|
|
self.assertEqual(len(inst["values"]), length)
|
2024-12-05 20:20:11 +00:00
|
|
|
|
2024-12-05 20:21:12 +00:00
|
|
|
# unregistering containers
|
2024-12-05 20:20:11 +00:00
|
|
|
for container_id in (container_id1, container_id2):
|
|
|
|
self.api.serving.unregister_container(container_id=container_id)
|
|
|
|
endpoints = self.api.serving.get_endpoints().endpoints
|
2024-12-05 20:21:12 +00:00
|
|
|
self.assertFalse(any(e for e in endpoints if e.url == url))
|
|
|
|
|
2024-12-05 20:20:11 +00:00
|
|
|
with self.api.raises(errors.bad_request.NoContainersForUrl):
|
2024-12-05 20:21:12 +00:00
|
|
|
self.api.serving.get_endpoint_details(endpoint_url=url)
|