mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-04-17 01:31:13 +00:00
146 agentadd affinity config (#147)
* Added: affinity parameter * Changed: bump version
This commit is contained in:
parent
12baef0d75
commit
c7b3a28989
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: clearml-agent
|
||||
description: MLOps platform Task running agent
|
||||
type: application
|
||||
version: "3.2.0"
|
||||
version: "3.3.0"
|
||||
appVersion: "1.24"
|
||||
kubeVersion: ">= 1.21.0-0 < 1.27.0-0"
|
||||
home: https://clear.ml
|
||||
@ -21,6 +21,4 @@ keywords:
|
||||
annotations:
|
||||
artifacthub.io/changes: |
|
||||
- kind: added
|
||||
description: securityContext parameter for agent pod
|
||||
- kind: added
|
||||
description: support for kubernetes 1.26
|
||||
description: affinity parameter
|
||||
|
@ -1,6 +1,6 @@
|
||||
# ClearML Kubernetes Agent
|
||||
|
||||
  
|
||||
  
|
||||
|
||||
MLOps platform Task running agent
|
||||
|
||||
@ -30,10 +30,12 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0`
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| agentk8sglue | object | `{"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"securityContext":{},"serviceExistingAccountName":"","volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
|
||||
| agentk8sglue | object | `{"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"securityContext":{},"serviceExistingAccountName":"","tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
|
||||
| agentk8sglue.affinity | object | `{}` | affinity setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.annotations | object | `{}` | annotations setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url |
|
||||
| agentk8sglue.basePodTemplate | object | `{"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate | object | `{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate.affinity | object | `{}` | affinity setup for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate.annotations | object | `{}` | annotations setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.fileMounts | list | `[]` | file definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
@ -61,6 +63,7 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0`
|
||||
| agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods |
|
||||
| agentk8sglue.securityContext | object | `{}` | Web Server pod security context |
|
||||
| agentk8sglue.serviceExistingAccountName | string | `""` | if set, don't create a serviceAccountName but use defined existing one |
|
||||
| agentk8sglue.tolerations | list | `[]` | tolerations setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.volumeMounts | list | `[]` | volume mounts definition for Glue Agent (example in values.yaml comments) |
|
||||
| agentk8sglue.volumes | list | `[]` | volumes definition for Glue Agent (example in values.yaml comments) |
|
||||
| agentk8sglue.webServerUrlReference | string | `"https://app.clear.ml"` | Reference to Web server url |
|
||||
|
@ -172,6 +172,17 @@ data:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if $value.templateOverrides.affinity }}
|
||||
{{- with $value.templateOverrides.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.affinity }}
|
||||
{{- with $.Values.agentk8sglue.basePodTemplate.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
secrets.yaml: |
|
||||
{{- range $key, $value := $.Values.enterpriseFeatures.queues }}
|
||||
@ -250,6 +261,10 @@ data:
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.basePodTemplate.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.sessions.portModeEnabled }}
|
||||
{{- range untilStep 1 ( ( add .Values.sessions.maxServices 1 ) | int ) 1 }}
|
||||
|
@ -177,6 +177,14 @@ spec:
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: {{ include "clearml.name" . }}-pt
|
||||
configMap:
|
||||
|
@ -82,6 +82,10 @@ agentk8sglue:
|
||||
# -- nodeSelector setup for Agent pod (example in values.yaml comments)
|
||||
nodeSelector: {}
|
||||
# fleet: agent-nodes
|
||||
# -- tolerations setup for Agent pod (example in values.yaml comments)
|
||||
tolerations: []
|
||||
# -- affinity setup for Agent pod (example in values.yaml comments)
|
||||
affinity: {}
|
||||
# -- volumes definition for Glue Agent (example in values.yaml comments)
|
||||
volumes: []
|
||||
# - name: "yourvolume"
|
||||
@ -162,14 +166,16 @@ agentk8sglue:
|
||||
resources: {}
|
||||
# limits:
|
||||
# nvidia.com/gpu: 1
|
||||
# -- nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
nodeSelector: {}
|
||||
# fleet: gpu-nodes
|
||||
# -- tolerations setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
tolerations: []
|
||||
# - key: "nvidia.com/gpu"
|
||||
# operator: Exists
|
||||
# effect: "NoSchedule"
|
||||
# -- nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
nodeSelector: {}
|
||||
# fleet: gpu-nodes
|
||||
# -- affinity setup for pods spawned to consume ClearML Task
|
||||
affinity: {}
|
||||
# -- securityContext setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
securityContext: {}
|
||||
# runAsUser: 1001
|
||||
|
Loading…
Reference in New Issue
Block a user