mirror of
https://github.com/clearml/clearml-server
synced 2025-01-31 10:56:48 +00:00
437 lines
13 KiB
Plaintext
437 lines
13 KiB
Plaintext
_description: "Serving apis"
|
|
_definitions {
|
|
include "_workers_common.conf"
|
|
reference_item {
|
|
type: object
|
|
required = [type, value]
|
|
properties {
|
|
type {
|
|
description: The type of the reference item
|
|
type: string
|
|
enum: [app_id, app_instance, model, task, url]
|
|
}
|
|
value {
|
|
description: The reference item value
|
|
type: string
|
|
}
|
|
}
|
|
}
|
|
reference {
|
|
description: Array of reference items provided by the container instance. Can contain multiple reference items with the same type
|
|
type: array
|
|
items: ${_definitions.reference_item}
|
|
}
|
|
serving_model_report {
|
|
type: object
|
|
required: [container_id, endpoint_name, model_name]
|
|
properties {
|
|
container_id {
|
|
type: string
|
|
description: Container ID. Should uniquely identify a specific container instance
|
|
}
|
|
endpoint_name {
|
|
type: string
|
|
description: Endpoint name
|
|
}
|
|
endpoint_url {
|
|
type: string
|
|
description: Endpoint URL
|
|
}
|
|
model_name {
|
|
type: string
|
|
description: Model name
|
|
}
|
|
model_source {
|
|
type: string
|
|
description: Model source
|
|
}
|
|
model_version {
|
|
type: string
|
|
description: Model version
|
|
}
|
|
preprocess_artifact {
|
|
type: string
|
|
description: Preprocess Artifact
|
|
}
|
|
input_type {
|
|
type: string
|
|
description: Input type
|
|
}
|
|
input_size {
|
|
type: string
|
|
description: Input size
|
|
}
|
|
reference: ${_definitions.reference}
|
|
}
|
|
}
|
|
endpoint_stats {
|
|
type: object
|
|
properties {
|
|
endpoint {
|
|
type: string
|
|
description: Endpoint name
|
|
}
|
|
model {
|
|
type: string
|
|
description: Model name
|
|
}
|
|
url {
|
|
type: string
|
|
description: Model url
|
|
}
|
|
instances {
|
|
type: integer
|
|
description: The number of model serving instances
|
|
}
|
|
uptime_sec {
|
|
type: integer
|
|
description: Max of model instance uptime in seconds
|
|
}
|
|
requests {
|
|
type: integer
|
|
description: Total requests processed by model instances
|
|
}
|
|
requests_min {
|
|
type: number
|
|
description: Average of request rate of model instances per minute
|
|
}
|
|
latency_ms {
|
|
type: integer
|
|
description: Average of latency of model instances in ms
|
|
}
|
|
last_update {
|
|
type: string
|
|
format: "date-time"
|
|
description: The latest time when one of the model instances was updated
|
|
}
|
|
}
|
|
}
|
|
container_instance_stats {
|
|
type: object
|
|
properties {
|
|
id {
|
|
type: string
|
|
description: Container ID
|
|
}
|
|
uptime_sec {
|
|
type: integer
|
|
description: Uptime in seconds
|
|
}
|
|
requests {
|
|
type: integer
|
|
description: Number of requests
|
|
}
|
|
requests_min {
|
|
type: number
|
|
description: Average requests per minute
|
|
}
|
|
latency_ms {
|
|
type: integer
|
|
description: Average request latency in ms
|
|
}
|
|
last_update {
|
|
type: string
|
|
format: "date-time"
|
|
description: The latest time when the container instance sent update
|
|
}
|
|
cpu_count {
|
|
type: integer
|
|
description: CPU Count
|
|
}
|
|
gpu_count {
|
|
type: integer
|
|
description: GPU Count
|
|
}
|
|
reference: ${_definitions.reference}
|
|
|
|
}
|
|
}
|
|
serving_model_info {
|
|
type: object
|
|
properties {
|
|
endpoint {
|
|
type: string
|
|
description: Endpoint name
|
|
}
|
|
model {
|
|
type: string
|
|
description: Model name
|
|
}
|
|
url {
|
|
type: string
|
|
description: Model url
|
|
}
|
|
model_source {
|
|
type: string
|
|
description: Model source
|
|
}
|
|
model_version {
|
|
type: string
|
|
description: Model version
|
|
}
|
|
preprocess_artifact {
|
|
type: string
|
|
description: Preprocess Artifact
|
|
}
|
|
input_type {
|
|
type: string
|
|
description: Input type
|
|
}
|
|
input_size {
|
|
type: string
|
|
description: Input size
|
|
}
|
|
}
|
|
}
|
|
container_info: ${_definitions.serving_model_info} {
|
|
properties {
|
|
id {
|
|
type: string
|
|
description: Container ID
|
|
}
|
|
uptime_sec {
|
|
type: integer
|
|
description: Model instance uptime in seconds
|
|
}
|
|
last_update {
|
|
type: string
|
|
format: "date-time"
|
|
description: The latest time when the container instance sent update
|
|
}
|
|
age_sec {
|
|
type: integer
|
|
description: Amount of seconds since the container registration
|
|
}
|
|
}
|
|
}
|
|
metrics_history_series {
|
|
type: object
|
|
properties {
|
|
title {
|
|
type: string
|
|
description: "The title of the series"
|
|
}
|
|
dates {
|
|
type: array
|
|
description: "List of timestamps (in seconds from epoch) in the acceding order. The timestamps are separated by the requested interval."
|
|
items {type: integer}
|
|
}
|
|
values {
|
|
type: array
|
|
description: "List of values corresponding to the timestamps in the dates list."
|
|
items {type: number}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
register_container {
|
|
"2.31" {
|
|
description: Register container
|
|
request = ${_definitions.serving_model_report} {
|
|
properties {
|
|
timeout {
|
|
description: "Registration timeout in seconds. If timeout seconds have passed since the service container last call to register or status_report, the container is automatically removed from the list of registered containers."
|
|
type: integer
|
|
default: 600
|
|
}
|
|
}
|
|
}
|
|
response {
|
|
type: object
|
|
additionalProperties: false
|
|
}
|
|
}
|
|
}
|
|
unregister_container {
|
|
"2.31" {
|
|
description: Unregister container
|
|
request {
|
|
type: object
|
|
required: [container_id]
|
|
properties {
|
|
container_id {
|
|
type: string
|
|
description: Container ID
|
|
}
|
|
}
|
|
}
|
|
response {
|
|
type: object
|
|
additionalProperties: false
|
|
}
|
|
}
|
|
}
|
|
container_status_report {
|
|
"2.31" {
|
|
description: Container status report
|
|
request = ${_definitions.serving_model_report} {
|
|
properties {
|
|
uptime_sec {
|
|
type: integer
|
|
description: Uptime in seconds
|
|
}
|
|
requests_num {
|
|
type: integer
|
|
description: Number of requests
|
|
}
|
|
requests_min {
|
|
type: number
|
|
description: Average requests per minute
|
|
}
|
|
latency_ms {
|
|
type: integer
|
|
description: Average request latency in ms
|
|
}
|
|
machine_stats {
|
|
description: "The machine statistics"
|
|
"$ref": "#/definitions/machine_stats"
|
|
}
|
|
}
|
|
}
|
|
response {
|
|
type: object
|
|
additionalProperties: false
|
|
}
|
|
}
|
|
}
|
|
get_endpoints {
|
|
"2.31" {
|
|
description: Get all the registered endpoints
|
|
request {
|
|
type: object
|
|
additionalProperties: false
|
|
}
|
|
response {
|
|
type: object
|
|
properties {
|
|
endpoints {
|
|
type: array
|
|
items { "$ref": "#/definitions/endpoint_stats" }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
get_loading_instances {
|
|
"2.31" {
|
|
description: "Get loading instances (enpoint_url not set yet)"
|
|
request {
|
|
type: object
|
|
additionalProperties: false
|
|
}
|
|
response {
|
|
type: object
|
|
properties {
|
|
instances {
|
|
type: array
|
|
items { "$ref": "#/definitions/container_info" }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
get_endpoint_details {
|
|
"2.31" {
|
|
description: Get endpoint details
|
|
request {
|
|
type: object
|
|
required: [endpoint_url]
|
|
properties {
|
|
endpoint_url {
|
|
type: string
|
|
description: Endpoint URL
|
|
}
|
|
}
|
|
}
|
|
response: ${_definitions.serving_model_info} {
|
|
properties {
|
|
uptime_sec {
|
|
type: integer
|
|
description: Max of model instance uptime in seconds
|
|
}
|
|
last_update {
|
|
type: string
|
|
format: "date-time"
|
|
description: The latest time when one of the model instances was updated
|
|
}
|
|
instances {
|
|
type: array
|
|
items {"$ref": "#/definitions/container_instance_stats"}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
get_endpoint_metrics_history {
|
|
"2.31" {
|
|
description: Get endpoint charts
|
|
request {
|
|
type: object
|
|
required: [endpoint_url, from_date, to_date, interval]
|
|
properties {
|
|
endpoint_url {
|
|
description: Endpoint Url
|
|
type: string
|
|
}
|
|
from_date {
|
|
description: "Starting time (in seconds from epoch) for collecting statistics"
|
|
type: number
|
|
}
|
|
to_date {
|
|
description: "Ending time (in seconds from epoch) for collecting statistics"
|
|
type: number
|
|
}
|
|
interval {
|
|
description: "Time interval in seconds for a single statistics point. The minimal value is 1"
|
|
type: integer
|
|
}
|
|
metric_type {
|
|
description: The type of the metrics to return on the chart
|
|
type: string
|
|
default: requests
|
|
enum: [
|
|
requests
|
|
requests_min
|
|
latency_ms
|
|
cpu_count
|
|
gpu_count
|
|
cpu_util
|
|
gpu_util
|
|
ram_total
|
|
ram_used
|
|
ram_free
|
|
gpu_ram_total
|
|
gpu_ram_used
|
|
gpu_ram_free
|
|
network_rx
|
|
network_tx
|
|
]
|
|
}
|
|
instance_charts {
|
|
type: boolean
|
|
default: true
|
|
description: If set then return instance charts and total. Otherwise total only
|
|
}
|
|
}
|
|
}
|
|
response {
|
|
type: object
|
|
properties {
|
|
computed_interval {
|
|
description: The inteval that was actually used for the histogram. May be larger then the requested one
|
|
type: integer
|
|
}
|
|
total: ${_definitions.metrics_history_series} {
|
|
properties {
|
|
description: The total histogram
|
|
}
|
|
}
|
|
instances {
|
|
description: Instance charts
|
|
type: object
|
|
additionalProperties: ${_definitions.metrics_history_series}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} |