Enterprise create queue (#171)

* Fixed: typo in env example

* Added: create queues switch

* Added: force configuration file mount

* Changed: bump version

* Fixed: helm docs
This commit is contained in:
Valeriano Manassero 2023-02-20 13:52:36 +01:00 committed by GitHub
parent 013734c184
commit e1fcc5b466
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 28 additions and 8 deletions

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: clearml-agent
description: MLOps platform Task running agent
type: application
version: "3.5.0"
version: "3.6.0"
appVersion: "1.24"
kubeVersion: ">= 1.21.0-0 < 1.27.0-0"
home: https://clear.ml

View File

@ -1,6 +1,6 @@
# ClearML Kubernetes Agent
![Version: 3.5.0](https://img.shields.io/badge/Version-3.5.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square)
![Version: 3.6.0](https://img.shields.io/badge/Version-3.6.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square)
MLOps platform Task running agent
@ -75,9 +75,10 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0`
| clearml.clearmlConfig | string | `"sdk {\n}"` | ClearML configuration file |
| clearml.existingAgentk8sglueSecret | string | `""` | If this is set, chart will not generate a secret but will use what is defined here |
| clearml.existingClearmlConfigSecret | string | `""` | If this is set, chart will not generate a secret but will use what is defined here |
| enterpriseFeatures | object | `{"agentImageTagOverride":"1.24-57","applyVaultEnvVars":true,"enabled":false,"maxPods":10,"monitoredResources":{"maxResources":0,"maxResourcesFieldName":"resources|limits|nvidia.com/gpu","minResourcesFieldName":"resources|limits|nvidia.com/gpu"},"queues":null,"serviceAccountClusterAccess":false,"useOwnerToken":true}` | Enterprise features (work only with an Enterprise license) |
| enterpriseFeatures.agentImageTagOverride | string | `"1.24-57"` | Image tag override for enterprise version |
| enterpriseFeatures | object | `{"agentImageTagOverride":"1.24-58","applyVaultEnvVars":true,"createQueues":false,"enabled":false,"maxPods":10,"monitoredResources":{"maxResources":0,"maxResourcesFieldName":"resources|limits|nvidia.com/gpu","minResourcesFieldName":"resources|limits|nvidia.com/gpu"},"queues":null,"serviceAccountClusterAccess":false,"useOwnerToken":true}` | Enterprise features (work only with an Enterprise license) |
| enterpriseFeatures.agentImageTagOverride | string | `"1.24-58"` | Image tag override for enterprise version |
| enterpriseFeatures.applyVaultEnvVars | bool | `true` | push env vars from Clear.ML Vault to task pods |
| enterpriseFeatures.createQueues | bool | `false` | Create queues if they don't exist |
| enterpriseFeatures.enabled | bool | `false` | Enable/Disable Enterprise features |
| enterpriseFeatures.maxPods | int | `10` | maximum concurrent consume ClearML Task pod |
| enterpriseFeatures.monitoredResources | object | `{"maxResources":0,"maxResourcesFieldName":"resources|limits|nvidia.com/gpu","minResourcesFieldName":"resources|limits|nvidia.com/gpu"}` | GPU resource general counters |

View File

@ -72,6 +72,17 @@ Create secret to access docker registry
{{- end }}
{{- end }}
{{/*
Create a queues parameter
*/}}
{{- define "agentk8sglue.createQueues" -}}
{{- if .Values.enterpriseFeatures.createQueues }}
{{- printf "%d" 1}}
{{- else }}
{{- printf "%d" 0 }}
{{- end }}
{{- end }}
{{/*
Create a string composed by queue names
*/}}

View File

@ -113,6 +113,10 @@ spec:
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml \
--max-pods {{.Values.enterpriseFeatures.maxPods}}{{ if .Values.enterpriseFeatures.enabled }}{{ if .Values.enterpriseFeatures.useOwnerToken }} --use-owner-token{{ end }}{{ end }}"
{{- end }}
{{- if .Values.clearml.clearmlConfig }}
- name: CLEARML_CONFIG_FILE
value: /root/clearml.conf
{{- end }}
- name: CLEARML_K8S_GLUE_LIMIT_POD_LABEL
value: "ai.allegro.agent.serial=pod-{pod_number}"
- name: CLEARML_K8S_SECRETS_LIST_FILE
@ -168,14 +172,16 @@ spec:
{{- if .Values.enterpriseFeatures.enabled }}
- name: K8S_GLUE_QUEUE
value: {{ include "agentk8sglue.queues" . | quote }}
- name: CLEARML_K8S_GLUE_CREATE_QUEUE
value: {{ include "agentk8sglue.createQueues" . | quote }}
- name: CLEARML_K8S_GLUE_APPLY_VAULT_ENV_VARS
value: {{ .Values.enterpriseFeatures.applyVaultEnvVars | quote }}
- name: "CLEARML_K8S_GLUE_POD_MIN_RES_FIELD"
value: {{.Values.enterpriseFeatures.monitoredResources.minResourcesFieldName}}
value: {{ .Values.enterpriseFeatures.monitoredResources.minResourcesFieldName }}
- name: "CLEARML_K8S_GLUE_MAX_RESOURCES"
value: "{{.Values.enterpriseFeatures.monitoredResources.maxResources}}"
- name: "CLEARML_K8S_GLUE_POD_MAX_RES_FIELD"
value: {{.Values.enterpriseFeatures.monitoredResources.maxResourcesFieldName}}
value: {{ .Values.enterpriseFeatures.monitoredResources.maxResourcesFieldName }}
{{- else }}
- name: K8S_GLUE_QUEUE
value: {{ .Values.agentk8sglue.queue }}

View File

@ -163,7 +163,7 @@ agentk8sglue:
# - name: CURL_CA_BUNDLE
# value: ""
# - name: PYTHONWARNINGS
# value: "=\"ignore:Unverified HTTPS request\""
# value: "ignore:Unverified HTTPS request"
# -- resources declaration for pods spawned to consume ClearML Task (example in values.yaml comments)
resources: {}
# limits:
@ -215,7 +215,7 @@ enterpriseFeatures:
# -- Enable/Disable Enterprise features
enabled: false
# -- Image tag override for enterprise version
agentImageTagOverride: "1.24-57"
agentImageTagOverride: "1.24-58"
# -- service account access every namespace flag
serviceAccountClusterAccess: false
# -- push env vars from Clear.ML Vault to task pods
@ -232,6 +232,8 @@ enterpriseFeatures:
maxPods: 10
# -- Agent must use owner Token
useOwnerToken: true
# -- Create queues if they don't exist
createQueues: false
# -- ClearML queues and related template OVERRIDES used this agent will consume
queues:
# -- name of the queue will be used for this template