Agent chart annotations, labels and sa improvements (#122)

* Added: sa reference name in task pod

* Changed: version bump

* Added: annotations generator

* Added: annotations

* Aded: labels and annotations

* Added: annotations and labels

* Added: agent node-selector

* Fixed: annotations generation
This commit is contained in:
Valeriano Manassero 2023-01-04 12:01:24 +01:00 committed by GitHub
parent 7041c62f44
commit 622ec331ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 58 additions and 9 deletions

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: clearml-agent
description: MLOps platform
type: application
version: "3.0.0"
version: "3.1.0"
appVersion: "1.24"
kubeVersion: ">= 1.19.0-0 < 1.26.0-0"
home: https://clear.ml

View File

@ -1,6 +1,6 @@
# ClearML Kubernetes Agent
![Version: 3.0.0](https://img.shields.io/badge/Version-3.0.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square)
![Version: 3.1.0](https://img.shields.io/badge/Version-3.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square)
MLOps platform
@ -30,9 +30,11 @@ Kubernetes: `>= 1.19.0-0 < 1.26.0-0`
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| agentk8sglue | object | `{"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"queue":"default","replicaCount":1,"serviceExistingAccountName":"","volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
| agentk8sglue | object | `{"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"serviceExistingAccountName":"","volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
| agentk8sglue.annotations | object | `{}` | annotations setup for Agent pod (example in values.yaml comments) |
| agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url |
| agentk8sglue.basePodTemplate | object | `{"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task |
| agentk8sglue.basePodTemplate | object | `{"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task |
| agentk8sglue.basePodTemplate.annotations | object | `{}` | annotations setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
| agentk8sglue.basePodTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) |
| agentk8sglue.basePodTemplate.fileMounts | list | `[]` | file definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
| agentk8sglue.basePodTemplate.hostAliases | object | `{}` | hostAliases setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
@ -47,13 +49,14 @@ Kubernetes: `>= 1.19.0-0 < 1.26.0-0`
| agentk8sglue.basePodTemplate.volumes | list | `[]` | volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
| agentk8sglue.clearmlcheckCertificate | bool | `true` | Check certificates validity for evefry UrlReference below. |
| agentk8sglue.containerCustomBashScript | string | `""` | Custom Bash script for the Task Pods ran by Glue Agent |
| agentk8sglue.customBashScript | string | `""` | Custom Bash script for the Glue Agent |
| agentk8sglue.debugMode | bool | `false` | Enable Debugging logs for Agent pod |
| agentk8sglue.defaultContainerImage | string | `"ubuntu:18.04"` | default container image for ClearML Task pod |
| agentk8sglue.extraEnvs | list | `[]` | Extra Environment variables for Glue Agent |
| agentk8sglue.fileMounts | list | `[]` | file definition for Glue Agent (example in values.yaml comments) |
| agentk8sglue.fileServerUrlReference | string | `"https://files.clear.ml"` | Reference to File server url |
| agentk8sglue.image | object | `{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"}` | Glue Agent image configuration |
| agentk8sglue.labels | object | `{}` | labels setup for Agent pod (example in values.yaml comments) |
| agentk8sglue.nodeSelector | object | `{}` | nodeSelector setup for Agent pod (example in values.yaml comments) |
| agentk8sglue.queue | string | `"default"` | ClearML queue this agent will consume |
| agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods |
| agentk8sglue.serviceExistingAccountName | string | `""` | if set, don't create a serviceAccountName but use defined existing one |

View File

@ -22,6 +22,18 @@ helm.sh/chart: {{ include "clearml.chart" . }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- if $.Values.agentk8sglue.labels }}
{{ toYaml $.Values.agentk8sglue.labels }}
{{- end }}
{{- end }}
{{/*
Common annotations
*/}}
{{- define "clearml.annotations" -}}
{{- if $.Values.agentk8sglue.annotations }}
{{ toYaml $.Values.agentk8sglue.annotations }}
{{- end }}
{{- end }}
{{/*

View File

@ -17,6 +17,13 @@ data:
labels:
{{- toYaml $.Values.agentk8sglue.basePodTemplate.labels | nindent 10 }}
{{- end}}
{{- if $value.templateOverrides.annotations }}
annotations:
{{- toYaml $value.templateOverrides.annotations | nindent 10 }}
{{- else if $.Values.agentk8sglue.basePodTemplate.annotations }}
annotations:
{{- toYaml $.Values.agentk8sglue.basePodTemplate.annotations | nindent 10 }}
{{- end}}
spec:
{{- if $.Values.imageCredentials.enabled }}
imagePullSecrets:
@ -65,6 +72,9 @@ data:
secret:
secretName: {{ include "clearml.name" $ }}-fm
{{- end }}
{{- if not $.Values.enterpriseFeatures.serviceAccountClusterAccess }}
serviceAccountName: {{ include "clearml.serviceAccountName" $ }}
{{- end }}
{{- if $value.templateOverrides.initContainers }}
initContainers:
{{- toYaml $value.templateOverrides.initContainers | nindent 10 }}
@ -177,6 +187,10 @@ data:
apiVersion: v1
metadata:
namespace: {{ .Release.Namespace }}
labels:
{{- toYaml $.Values.agentk8sglue.basePodTemplate.labels | nindent 8 }}
annotations:
{{- toYaml $.Values.agentk8sglue.basePodTemplate.annotations | nindent 8 }}
spec:
{{- if .Values.imageCredentials.enabled }}
imagePullSecrets:
@ -190,6 +204,7 @@ data:
volumes:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "clearml.serviceAccountName" $ }}
containers:
- resources:
{{- toYaml .Values.agentk8sglue.basePodTemplate.resources | nindent 10 }}

View File

@ -4,6 +4,8 @@ metadata:
name: {{ include "clearml.name" . }}
labels:
{{- include "clearml.labels" . | nindent 4 }}
annotations:
{{- include "clearml.annotations" . | nindent 4 }}
spec:
replicas: {{ .Values.agentk8sglue.replicaCount }}
selector:
@ -13,8 +15,9 @@ spec:
metadata:
annotations:
checksum/config: {{ printf "%s%s" .Values.clearml .Values.agentk8sglue | sha256sum }}
{{- include "clearml.annotations" . | nindent 8 }}
labels:
{{- include "agentk8sglue.selectorLabels" . | nindent 8 }}
{{- include "clearml.labels" . | nindent 8 }}
spec:
{{- if .Values.imageCredentials.enabled }}
imagePullSecrets:
@ -161,6 +164,10 @@ spec:
- name: K8S_GLUE_QUEUE
value: {{ .Values.agentk8sglue.queue }}
{{- end }}
{{- with .Values.agentk8sglue.basePodTemplate.nodeSelector}}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
- name: {{ include "clearml.name" . }}-pt
configMap:

View File

@ -62,6 +62,12 @@ agentk8sglue:
# -- ClearML queue this agent will consume
queue: default
# -- Custom Bash script for the Glue Agent
# -- labels setup for Agent pod (example in values.yaml comments)
labels: {}
# schedulerName: scheduler
# -- annotations setup for Agent pod (example in values.yaml comments)
annotations: {}
# key1: value1
customBashScript: ""
# -- Custom Bash script for the Task Pods ran by Glue Agent
containerCustomBashScript: ""
@ -70,6 +76,9 @@ agentk8sglue:
# - name: PYTHONPATH
# value: "somepath"
# -- nodeSelector setup for Agent pod (example in values.yaml comments)
nodeSelector: {}
# fleet: agent-nodes
# -- volumes definition for Glue Agent (example in values.yaml comments)
volumes: []
# - name: "yourvolume"
@ -97,6 +106,12 @@ agentk8sglue:
# -- base template for pods spawned to consume ClearML Task
basePodTemplate:
# -- labels setup for pods spawned to consume ClearML Task (example in values.yaml comments)
labels: {}
# schedulerName: scheduler
# -- annotations setup for pods spawned to consume ClearML Task (example in values.yaml comments)
annotations: {}
# key1: value1
# -- initContainers definition for pods spawned to consume ClearML Task (example in values.yaml comments)
initContainers: []
# - name: volume-dirs-init-cntr
@ -106,9 +121,6 @@ agentk8sglue:
# - -c
# - >
# /bin/echo "this is an init";
# -- labels setup for pods spawned to consume ClearML Task (example in values.yaml comments)
labels: {}
# schedulerName: scheduler
# -- schedulerName setup for pods spawned to consume ClearML Task
schedulerName: ""
# -- volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments)