146 agentadd affinity config (#147)

* Added: affinity parameter

* Changed: bump version
This commit is contained in:
Valeriano Manassero 2023-02-02 12:20:06 +01:00 committed by GitHub
parent 12baef0d75
commit c7b3a28989
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 40 additions and 10 deletions

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: clearml-agent
description: MLOps platform Task running agent
type: application
version: "3.2.0"
version: "3.3.0"
appVersion: "1.24"
kubeVersion: ">= 1.21.0-0 < 1.27.0-0"
home: https://clear.ml
@ -21,6 +21,4 @@ keywords:
annotations:
artifacthub.io/changes: |
- kind: added
description: securityContext parameter for agent pod
- kind: added
description: support for kubernetes 1.26
description: affinity parameter

View File

@ -1,6 +1,6 @@
# ClearML Kubernetes Agent
![Version: 3.2.0](https://img.shields.io/badge/Version-3.2.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square)
![Version: 3.3.0](https://img.shields.io/badge/Version-3.3.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square)
MLOps platform Task running agent
@ -30,10 +30,12 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0`
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| agentk8sglue | object | `{"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"securityContext":{},"serviceExistingAccountName":"","volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
| agentk8sglue | object | `{"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"securityContext":{},"serviceExistingAccountName":"","tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
| agentk8sglue.affinity | object | `{}` | affinity setup for Agent pod (example in values.yaml comments) |
| agentk8sglue.annotations | object | `{}` | annotations setup for Agent pod (example in values.yaml comments) |
| agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url |
| agentk8sglue.basePodTemplate | object | `{"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task |
| agentk8sglue.basePodTemplate | object | `{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task |
| agentk8sglue.basePodTemplate.affinity | object | `{}` | affinity setup for pods spawned to consume ClearML Task |
| agentk8sglue.basePodTemplate.annotations | object | `{}` | annotations setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
| agentk8sglue.basePodTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) |
| agentk8sglue.basePodTemplate.fileMounts | list | `[]` | file definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
@ -61,6 +63,7 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0`
| agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods |
| agentk8sglue.securityContext | object | `{}` | Web Server pod security context |
| agentk8sglue.serviceExistingAccountName | string | `""` | if set, don't create a serviceAccountName but use defined existing one |
| agentk8sglue.tolerations | list | `[]` | tolerations setup for Agent pod (example in values.yaml comments) |
| agentk8sglue.volumeMounts | list | `[]` | volume mounts definition for Glue Agent (example in values.yaml comments) |
| agentk8sglue.volumes | list | `[]` | volumes definition for Glue Agent (example in values.yaml comments) |
| agentk8sglue.webServerUrlReference | string | `"https://app.clear.ml"` | Reference to Web server url |

View File

@ -172,6 +172,17 @@ data:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- end }}
{{- if $value.templateOverrides.affinity }}
{{- with $value.templateOverrides.affinity }}
affinity:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- else if $.Values.agentk8sglue.basePodTemplate.affinity }}
{{- with $.Values.agentk8sglue.basePodTemplate.affinity }}
affinity:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- end }}
{{- end }}
secrets.yaml: |
{{- range $key, $value := $.Values.enterpriseFeatures.queues }}
@ -250,6 +261,10 @@ data:
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.agentk8sglue.basePodTemplate.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- if .Values.sessions.portModeEnabled }}
{{- range untilStep 1 ( ( add .Values.sessions.maxServices 1 ) | int ) 1 }}

View File

@ -177,6 +177,14 @@ spec:
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.agentk8sglue.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.agentk8sglue.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
- name: {{ include "clearml.name" . }}-pt
configMap:

View File

@ -82,6 +82,10 @@ agentk8sglue:
# -- nodeSelector setup for Agent pod (example in values.yaml comments)
nodeSelector: {}
# fleet: agent-nodes
# -- tolerations setup for Agent pod (example in values.yaml comments)
tolerations: []
# -- affinity setup for Agent pod (example in values.yaml comments)
affinity: {}
# -- volumes definition for Glue Agent (example in values.yaml comments)
volumes: []
# - name: "yourvolume"
@ -162,14 +166,16 @@ agentk8sglue:
resources: {}
# limits:
# nvidia.com/gpu: 1
# -- nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments)
nodeSelector: {}
# fleet: gpu-nodes
# -- tolerations setup for pods spawned to consume ClearML Task (example in values.yaml comments)
tolerations: []
# - key: "nvidia.com/gpu"
# operator: Exists
# effect: "NoSchedule"
# -- nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments)
nodeSelector: {}
# fleet: gpu-nodes
# -- affinity setup for pods spawned to consume ClearML Task
affinity: {}
# -- securityContext setup for pods spawned to consume ClearML Task (example in values.yaml comments)
securityContext: {}
# runAsUser: 1001