_description: "Serving apis" _definitions { include "_workers_common.conf" reference_item { type: object required = [type, value] properties { type { description: The type of the reference item type: string enum: [app_id, app_instance, model, task, url] } value { description: The reference item value type: string } } } reference { description: Array of reference items provided by the container instance. Can contain multiple reference items with the same type type: array items: ${_definitions.reference_item} } serving_model_report { type: object required: [container_id, endpoint_name, model_name] properties { container_id { type: string description: Container ID. Should uniquely identify a specific container instance } endpoint_name { type: string description: Endpoint name } endpoint_url { type: string description: Endpoint URL } model_name { type: string description: Model name } model_source { type: string description: Model source } model_version { type: string description: Model version } preprocess_artifact { type: string description: Preprocess Artifact } input_type { type: string description: Input type } input_size { type: string description: Input size } reference: ${_definitions.reference} } } endpoint_stats { type: object properties { endpoint { type: string description: Endpoint name } model { type: string description: Model name } url { type: string description: Model url } instances { type: integer description: The number of model serving instances } uptime_sec { type: integer description: Max of model instance uptime in seconds } requests { type: integer description: Total requests processed by model instances } requests_min { type: number description: Average of request rate of model instances per minute } latency_ms { type: integer description: Average of latency of model instances in ms } last_update { type: string format: "date-time" description: The latest time when one of the model instances was updated } } } container_instance_stats { type: object properties { id { type: string description: Container ID } uptime_sec { type: integer description: Uptime in seconds } requests { type: integer description: Number of requests } requests_min { type: number description: Average requests per minute } latency_ms { type: integer description: Average request latency in ms } last_update { type: string format: "date-time" description: The latest time when the container instance sent update } cpu_count { type: integer description: CPU Count } gpu_count { type: integer description: GPU Count } reference: ${_definitions.reference} } } serving_model_info { type: object properties { endpoint { type: string description: Endpoint name } model { type: string description: Model name } url { type: string description: Model url } model_source { type: string description: Model source } model_version { type: string description: Model version } preprocess_artifact { type: string description: Preprocess Artifact } input_type { type: string description: Input type } input_size { type: string description: Input size } } } container_info: ${_definitions.serving_model_info} { properties { id { type: string description: Container ID } uptime_sec { type: integer description: Model instance uptime in seconds } last_update { type: string format: "date-time" description: The latest time when the container instance sent update } age_sec { type: integer description: Amount of seconds since the container registration } } } metrics_history_series { type: object properties { title { type: string description: "The title of the series" } dates { type: array description: "List of timestamps (in seconds from epoch) in the acceding order. The timestamps are separated by the requested interval." items {type: integer} } values { type: array description: "List of values corresponding to the timestamps in the dates list." items {type: number} } } } } register_container { "2.31" { description: Register container request = ${_definitions.serving_model_report} { properties { timeout { description: "Registration timeout in seconds. If timeout seconds have passed since the service container last call to register or status_report, the container is automatically removed from the list of registered containers." type: integer default: 600 } } } response { type: object additionalProperties: false } } } unregister_container { "2.31" { description: Unregister container request { type: object required: [container_id] properties { container_id { type: string description: Container ID } } } response { type: object additionalProperties: false } } } container_status_report { "2.31" { description: Container status report request = ${_definitions.serving_model_report} { properties { uptime_sec { type: integer description: Uptime in seconds } requests_num { type: integer description: Number of requests } requests_min { type: number description: Average requests per minute } latency_ms { type: integer description: Average request latency in ms } machine_stats { description: "The machine statistics" "$ref": "#/definitions/machine_stats" } } } response { type: object additionalProperties: false } } } get_endpoints { "2.31" { description: Get all the registered endpoints request { type: object additionalProperties: false } response { type: object properties { endpoints { type: array items { "$ref": "#/definitions/endpoint_stats" } } } } } } get_loading_instances { "2.31" { description: "Get loading instances (enpoint_url not set yet)" request { type: object additionalProperties: false } response { type: object properties { instances { type: array items { "$ref": "#/definitions/container_info" } } } } } } get_endpoint_details { "2.31" { description: Get endpoint details request { type: object required: [endpoint_url] properties { endpoint_url { type: string description: Endpoint URL } } } response: ${_definitions.serving_model_info} { properties { uptime_sec { type: integer description: Max of model instance uptime in seconds } last_update { type: string format: "date-time" description: The latest time when one of the model instances was updated } instances { type: array items {"$ref": "#/definitions/container_instance_stats"} } } } } } get_endpoint_metrics_history { "2.31" { description: Get endpoint charts request { type: object required: [endpoint_url, from_date, to_date, interval] properties { endpoint_url { description: Endpoint Url type: string } from_date { description: "Starting time (in seconds from epoch) for collecting statistics" type: number } to_date { description: "Ending time (in seconds from epoch) for collecting statistics" type: number } interval { description: "Time interval in seconds for a single statistics point. The minimal value is 1" type: integer } metric_type { description: The type of the metrics to return on the chart type: string default: requests enum: [ requests requests_min latency_ms cpu_count gpu_count cpu_util gpu_util ram_total ram_used ram_free gpu_ram_total gpu_ram_used gpu_ram_free network_rx network_tx ] } instance_charts { type: boolean default: true description: If set then return instance charts and total. Otherwise total only } } } response { type: object properties { computed_interval { description: The inteval that was actually used for the histogram. May be larger then the requested one type: integer } total: ${_definitions.metrics_history_series} { properties { description: The total histogram } } instances { description: Instance charts type: object additionalProperties: ${_definitions.metrics_history_series} } } } } }