clearml-server/apiserver/schema/services/serving.conf

_description: "Serving apis"
_definitions {
    include "_workers_common.conf"
    reference_item {
        type: object
        required = [type, value]
        properties {
            type {
                description: The type of the reference item
                type: string
                enum: [app_id, app_instance, model, task, url]
            }
            value {
                description: The reference item value
                type: string
            }
        }
    }
    reference {
        description: Array of reference items provided by the container instance. Can contain multiple reference items with the same type
        type: array
        items: ${_definitions.reference_item}
    }
    serving_model_report {
        type: object
        required: [container_id, endpoint_name, model_name]
        properties {
            container_id {
                type: string
                description: Container ID. Should uniquely identify a specific container instance
            }
            endpoint_name {
                type: string
                description: Endpoint name
            }
            endpoint_url {
                type: string
                description: Endpoint URL
            }
            model_name {
                type: string
                description: Model name
            }
            model_source {
                type: string
                description: Model source
            }
            model_version {
                type: string
                description: Model version
            }
            preprocess_artifact {
                type: string
                description: Preprocess Artifact
            }
            input_type {
                type: string
                description: Input type
            }
            input_size {
                type: string
                description: Input size
            }
            reference: ${_definitions.reference}
        }
    }
    endpoint_stats {
        type: object
        properties {
            endpoint {
                type: string
                description: Endpoint name
            }
            model {
                type: string
                description: Model name
            }
            url {
                type: string
                description: Model url
            }
            instances {
                type: integer
                description: The number of model serving instances
            }
            uptime_sec {
                type: integer
                description: Max of model instance uptime in seconds
            }
            requests {
                type: integer
                description: Total requests processed by model instances
            }
            requests_min {
                type: number
                description: Average of request rate of model instances per minute
            }
            latency_ms {
                type: integer
                description: Average of latency of model instances in ms
            }
            last_update {
                type: string
                format: "date-time"
                description: The latest time when one of the model instances was updated
            }
        }
    }
    container_instance_stats {
        type: object
        properties {
            id {
                type: string
                description: Container ID
            }
            uptime_sec {
                type: integer
                description: Uptime in seconds
            }
            requests {
                type: integer
                description: Number of requests
            }
            requests_min {
                type: number
                description: Average requests per minute
            }
            latency_ms {
                type: integer
                description: Average request latency in ms
            }
            last_update {
                type: string
                format: "date-time"
                description: The latest time when the container instance sent update
            }
            cpu_count {
                type: integer
                description: CPU Count
            }
            gpu_count {
                type: integer
                description: GPU Count
            }
            reference: ${_definitions.reference}

        }
    }
    serving_model_info {
        type: object
        properties {
            endpoint {
                type: string
                description: Endpoint name
            }
            model {
                type: string
                description: Model name
            }
            url {
                type: string
                description: Model url
            }
            model_source {
                type: string
                description: Model source
            }
            model_version {
                type: string
                description: Model version
            }
            preprocess_artifact {
                type: string
                description: Preprocess Artifact
            }
            input_type {
                type: string
                description: Input type
            }
            input_size {
                type: string
                description: Input size
            }
        }
    }
    container_info: ${_definitions.serving_model_info} {
        properties {
            id {
                type: string
                description: Container ID
            }
            uptime_sec {
                type: integer
                description: Model instance uptime in seconds
            }
            last_update {
                type: string
                format: "date-time"
                description: The latest time when the container instance sent update
            }
            age_sec {
                type: integer
                description: Amount of seconds since the container registration
            }
        }
    }
    metrics_history_series {
        type: object
        properties {
            title {
                type: string
                description: "The title of the series"
            }
            dates {
                type: array
                description: "List of timestamps (in seconds from epoch) in the acceding order. The timestamps are separated by the requested interval."
                items {type: integer}
            }
            values {
                type: array
                description: "List of values corresponding to the timestamps in the dates list."
                items {type: number}
            }
        }
    }
}
register_container {
    "2.31" {
        description: Register container
        request = ${_definitions.serving_model_report} {
            properties {
                timeout {
                    description: "Registration timeout in seconds. If timeout seconds have passed since the service container last call to register or status_report, the container is automatically removed from the list of registered containers."
                    type: integer
                    default: 600
                }
            }
        }
        response {
            type: object
            additionalProperties: false
        }
    }
}
unregister_container {
    "2.31" {
        description: Unregister container
        request {
            type: object
            required: [container_id]
            properties {
                container_id {
                    type: string
                    description: Container ID
                }
            }
        }
        response {
            type: object
            additionalProperties: false
        }
    }
}
container_status_report {
    "2.31" {
        description: Container status report
        request = ${_definitions.serving_model_report} {
            properties {
                uptime_sec {
                    type: integer
                    description: Uptime in seconds
                }
                requests_num {
                    type: integer
                    description: Number of requests
                }
                requests_min {
                    type: number
                    description: Average requests per minute
                }
                latency_ms {
                    type: integer
                    description: Average request latency in ms
                }
                machine_stats {
                    description: "The machine statistics"
                    "$ref": "#/definitions/machine_stats"
                }
            }
        }
        response {
            type: object
            additionalProperties: false
        }
    }
}
get_endpoints {
    "2.31" {
        description: Get all the registered endpoints
        request {
            type: object
            additionalProperties: false
        }
        response {
            type: object
            properties {
                endpoints {
                    type: array
                    items { "$ref": "#/definitions/endpoint_stats" }
                }
            }
        }
    }
}
get_loading_instances {
    "2.31" {
        description: "Get loading instances (enpoint_url not set yet)"
        request {
            type: object
            additionalProperties: false
        }
        response {
            type: object
            properties {
                instances {
                    type: array
                    items { "$ref": "#/definitions/container_info" }
                }
            }
        }
    }
}
get_endpoint_details {
    "2.31" {
        description: Get endpoint details
        request {
            type: object
            required: [endpoint_url]
            properties {
                endpoint_url {
                    type: string
                    description: Endpoint URL
                }
            }
        }
        response: ${_definitions.serving_model_info} {
            properties {
                uptime_sec {
                    type: integer
                    description: Max of model instance uptime in seconds
                }
                last_update {
                    type: string
                    format: "date-time"
                    description: The latest time when one of the model instances was updated
                }
                instances {
                    type: array
                    items {"$ref": "#/definitions/container_instance_stats"}
                }
            }
        }
    }
}
get_endpoint_metrics_history {
    "2.31" {
        description: Get endpoint charts
        request {
            type: object
            required: [endpoint_url, from_date, to_date, interval]
            properties {
                endpoint_url {
                    description: Endpoint Url
                    type: string
                }
                from_date {
                    description: "Starting time (in seconds from epoch) for collecting statistics"
                    type: number
                }
                to_date {
                    description: "Ending time (in seconds from epoch) for collecting statistics"
                    type: number
                }
                interval {
                    description: "Time interval in seconds for a single statistics point. The minimal value is 1"
                    type: integer
                }
                metric_type {
                    description: The type of the metrics to return on the chart
                    type: string
                    default: requests
                    enum: [
                        requests
                        requests_min
                        latency_ms
                        cpu_count
                        gpu_count
                        cpu_util
                        gpu_util
                        ram_total
                        ram_used
                        ram_free
                        gpu_ram_total
                        gpu_ram_used
                        gpu_ram_free
                        network_rx
                        network_tx
                    ]
                }
                instance_charts {
                    type: boolean
                    default: true
                    description: If set then return instance charts and total. Otherwise total only
                }
            }
        }
        response {
            type: object
            properties {
                computed_interval {
                    description: The inteval that was actually used for the histogram. May be larger then the requested one
                    type: integer
                }
                total: ${_definitions.metrics_history_series} {
                    properties {
                        description: The total histogram
                    }
                }
                instances {
                    description: Instance charts
                    type: object
                    additionalProperties: ${_definitions.metrics_history_series}
                }
            }
        }
    }
}