mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-04-17 01:31:13 +00:00
Changed: agent split securityContexts
This commit is contained in:
parent
1c5c439556
commit
98b2878fc2
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: clearml-agent
|
||||
description: MLOps platform Task running agent
|
||||
type: application
|
||||
version: "3.7.0"
|
||||
version: "4.0.0"
|
||||
appVersion: "1.24"
|
||||
kubeVersion: ">= 1.21.0-0 < 1.27.0-0"
|
||||
home: https://clear.ml
|
||||
@ -21,4 +21,4 @@ keywords:
|
||||
annotations:
|
||||
artifacthub.io/changes: |
|
||||
- kind: added
|
||||
description: support for existing rolebindings and clusterrolebindings
|
||||
description: podSecurityContext/containerSecurityContext split
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# ClearML Kubernetes Agent
|
||||
|
||||
  
|
||||
  
|
||||
|
||||
MLOps platform Task running agent
|
||||
|
||||
@ -17,6 +17,29 @@ MLOps platform Task running agent
|
||||
The **clearml-agent** is the Kubernetes agent for for [ClearML](https://github.com/allegroai/clearml).
|
||||
It allows you to schedule distributed experiments on a Kubernetes cluster.
|
||||
|
||||
# Upgrading Chart
|
||||
|
||||
## Upgrades/ Values upgrades
|
||||
|
||||
Updating to latest version of this chart can be done in two steps:
|
||||
|
||||
```
|
||||
helm repo update
|
||||
helm upgrade clearml-agent allegroai/clearml-agent
|
||||
```
|
||||
|
||||
Changing values on existing installation can be done with:
|
||||
|
||||
```
|
||||
helm upgrade clearml-agent allegroai/clearml-agent --version <CURRENT CHART VERSION> -f custom_values.yaml
|
||||
```
|
||||
|
||||
### Major upgrade from 5.* to 6.*
|
||||
|
||||
Before issuing helm upgrade:
|
||||
|
||||
* if using securityContexts check for new value form in values.yaml (podSecurityContext and containerSecurityContext)
|
||||
|
||||
## Source Code
|
||||
|
||||
* <https://github.com/allegroai/clearml-helm-charts>
|
||||
@ -30,30 +53,32 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0`
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| agentk8sglue | object | `{"additionalClusterRoleBindings":[],"additionalRoleBindings":[],"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"priorityClassName":"","resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"securityContext":{},"serviceExistingAccountName":"","taskAsJob":false,"tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
|
||||
| agentk8sglue | object | `{"additionalClusterRoleBindings":[],"additionalRoleBindings":[],"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"containerSecurityContext":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"podSecurityContext":{},"priorityClassName":"","resources":{},"schedulerName":"","tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","containerSecurityContext":{},"customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"podSecurityContext":{},"queue":"default","replicaCount":1,"serviceExistingAccountName":"","taskAsJob":false,"tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
|
||||
| agentk8sglue.additionalClusterRoleBindings | list | `[]` | additional existing ClusterRoleBindings |
|
||||
| agentk8sglue.additionalRoleBindings | list | `[]` | additional existing RoleBindings |
|
||||
| agentk8sglue.affinity | object | `{}` | affinity setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.annotations | object | `{}` | annotations setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url |
|
||||
| agentk8sglue.basePodTemplate | object | `{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"priorityClassName":"","resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate | object | `{"affinity":{},"annotations":{},"containerSecurityContext":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"podSecurityContext":{},"priorityClassName":"","resources":{},"schedulerName":"","tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate.affinity | object | `{}` | affinity setup for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate.annotations | object | `{}` | annotations setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.containerSecurityContext | object | `{}` | securityContext setup for containers spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.fileMounts | list | `[]` | file definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.hostAliases | list | `[]` | hostAliases setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.initContainers | list | `[]` | initContainers definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.labels | object | `{}` | labels setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.nodeSelector | object | `{}` | nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.podSecurityContext | object | `{}` | securityContext setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.priorityClassName | string | `""` | priorityClassName setup for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate.resources | object | `{}` | resources declaration for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.schedulerName | string | `""` | schedulerName setup for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate.securityContext | object | `{}` | securityContext setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.tolerations | list | `[]` | tolerations setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.volumeMounts | list | `[]` | volume mounts definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.volumes | list | `[]` | volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.clearmlcheckCertificate | bool | `true` | Check certificates validity for evefry UrlReference below. |
|
||||
| agentk8sglue.containerCustomBashScript | string | `""` | Custom Bash script for the Task Pods ran by Glue Agent |
|
||||
| agentk8sglue.containerSecurityContext | object | `{}` | container securityContext setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.debugMode | bool | `false` | Enable Debugging logs for Agent pod |
|
||||
| agentk8sglue.defaultContainerImage | string | `"ubuntu:18.04"` | default container image for ClearML Task pod |
|
||||
| agentk8sglue.extraEnvs | list | `[]` | Extra Environment variables for Glue Agent |
|
||||
@ -62,9 +87,9 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0`
|
||||
| agentk8sglue.image | object | `{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"}` | Glue Agent image configuration |
|
||||
| agentk8sglue.labels | object | `{}` | labels setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.nodeSelector | object | `{}` | nodeSelector setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.podSecurityContext | object | `{}` | container securityContext setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.queue | string | `"default"` | ClearML queue this agent will consume |
|
||||
| agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods |
|
||||
| agentk8sglue.securityContext | object | `{}` | Web Server pod security context |
|
||||
| agentk8sglue.serviceExistingAccountName | string | `""` | if set, don't create a serviceAccountName but use defined existing one |
|
||||
| agentk8sglue.taskAsJob | bool | `false` | ClearML spawn tasks as jobs instead of pods |
|
||||
| agentk8sglue.tolerations | list | `[]` | tolerations setup for Agent pod (example in values.yaml comments) |
|
||||
@ -106,27 +131,3 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0`
|
||||
| sessions.startingPort | int | `30000` | starting range of exposed NodePorts |
|
||||
| sessions.svcAnnotations | object | `{}` | specific annotations for session services |
|
||||
| sessions.svcType | string | `"NodePort"` | service type ("NodePort" or "ClusterIP" or "LoadBalancer") |
|
||||
|
||||
# Upgrading Chart
|
||||
|
||||
### From v1.x to v2.x
|
||||
|
||||
Chart 1.x was under the assumption that all mounted volumes would be PVC's. Version > 2.x allows for more flexibility and will inject the yaml from podTemplate.volumes and podtemplate.volumeMounts directly.
|
||||
|
||||
v1.x
|
||||
```
|
||||
volumes:
|
||||
- name: "yourvolume"
|
||||
path: "/yourpath"
|
||||
```
|
||||
|
||||
v2.x
|
||||
```
|
||||
volumes:
|
||||
- name: "yourvolume"
|
||||
persistentVolumeClaim:
|
||||
claimName: "yourvolume"
|
||||
volumeMounts:
|
||||
- name: "yourvolume"
|
||||
mountPath: "/yourpath"
|
||||
```
|
||||
|
||||
@ -14,32 +14,31 @@
|
||||
The **clearml-agent** is the Kubernetes agent for for [ClearML](https://github.com/allegroai/clearml).
|
||||
It allows you to schedule distributed experiments on a Kubernetes cluster.
|
||||
|
||||
# Upgrading Chart
|
||||
|
||||
## Upgrades/ Values upgrades
|
||||
|
||||
Updating to latest version of this chart can be done in two steps:
|
||||
|
||||
```
|
||||
helm repo update
|
||||
helm upgrade clearml-agent allegroai/clearml-agent
|
||||
```
|
||||
|
||||
Changing values on existing installation can be done with:
|
||||
|
||||
```
|
||||
helm upgrade clearml-agent allegroai/clearml-agent --version <CURRENT CHART VERSION> -f custom_values.yaml
|
||||
```
|
||||
|
||||
### Major upgrade from 5.* to 6.*
|
||||
|
||||
Before issuing helm upgrade:
|
||||
|
||||
* if using securityContexts check for new value form in values.yaml (podSecurityContext and containerSecurityContext)
|
||||
|
||||
{{ template "chart.sourcesSection" . }}
|
||||
|
||||
{{ template "chart.requirementsSection" . }}
|
||||
|
||||
{{ template "chart.valuesSection" . }}
|
||||
|
||||
# Upgrading Chart
|
||||
|
||||
### From v1.x to v2.x
|
||||
|
||||
Chart 1.x was under the assumption that all mounted volumes would be PVC's. Version > 2.x allows for more flexibility and will inject the yaml from podTemplate.volumes and podtemplate.volumeMounts directly.
|
||||
|
||||
v1.x
|
||||
```
|
||||
volumes:
|
||||
- name: "yourvolume"
|
||||
path: "/yourpath"
|
||||
```
|
||||
|
||||
v2.x
|
||||
```
|
||||
volumes:
|
||||
- name: "yourvolume"
|
||||
persistentVolumeClaim:
|
||||
claimName: "yourvolume"
|
||||
volumeMounts:
|
||||
- name: "yourvolume"
|
||||
mountPath: "/yourpath"
|
||||
```
|
||||
|
||||
@ -105,7 +105,7 @@ imagePullSecrets:
|
||||
schedulerName: {{ .value.templateOverrides.schedulerName | default (.main.Values.agentk8sglue.basePodTemplate.schedulerName) }}
|
||||
restartPolicy: Never
|
||||
securityContext:
|
||||
{{- .value.templateOverrides.securityContext | default .main.Values.agentk8sglue.basePodTemplate.securityContext | toYaml | nindent 2 }}
|
||||
{{- .value.templateOverrides.podSecurityContext | default .main.Values.agentk8sglue.basePodTemplate.podSecurityContext | toYaml | nindent 2 }}
|
||||
hostAliases:
|
||||
{{- .value.templateOverrides.hostAliases | default .main.Values.agentk8sglue.basePodTemplate.hostAliases | toYaml | nindent 2 }}
|
||||
volumes:
|
||||
@ -129,6 +129,8 @@ priorityClassName: {{ .value.templateOverrides.priorityClassName | default .main
|
||||
containers:
|
||||
- resources:
|
||||
{{- .value.templateOverrides.resources | default .main.Values.agentk8sglue.basePodTemplate.resources | toYaml | nindent 4 }}
|
||||
securityContext:
|
||||
{{- .value.templateOverrides.containerSecurityContext | default .main.Values.agentk8sglue.basePodTemplate.containerSecurityContext | toYaml | nindent 4 }}
|
||||
ports:
|
||||
- containerPort: 10022
|
||||
volumeMounts:
|
||||
|
||||
@ -42,10 +42,14 @@ data:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "clearmlAgent.serviceAccountName" $ }}
|
||||
securityContext:
|
||||
{{ toYaml .Values.agentk8sglue.basePodTemplate.podSecurityContext | nindent 8 }}
|
||||
priorityClassName: {{ .Values.agentk8sglue.basePodTemplate.priorityClassName }}
|
||||
containers:
|
||||
- resources:
|
||||
{{- toYaml .Values.agentk8sglue.basePodTemplate.resources | nindent 10 }}
|
||||
securityContext:
|
||||
{{ toYaml .Values.agentk8sglue.basePodTemplate.containerSecurityContext | nindent 10 }}
|
||||
ports:
|
||||
- containerPort: 10022
|
||||
{{- with .Values.agentk8sglue.basePodTemplate.volumeMounts }}
|
||||
|
||||
@ -28,176 +28,181 @@ spec:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "clearmlAgent.serviceAccountName" . }}
|
||||
securityContext: {{ toYaml .Values.agentk8sglue.securityContext | nindent 8 }}
|
||||
securityContext:
|
||||
{{ toYaml .Values.agentk8sglue.podSecurityContext | nindent 8 }}
|
||||
initContainers:
|
||||
- name: init-k8s-glue
|
||||
{{- if .Values.enterpriseFeatures.enabled }}
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.enterpriseFeatures.agentImageTagOverride }}"
|
||||
{{- else }}
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
|
||||
{{- end }}
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- >
|
||||
set -x;
|
||||
while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.apiServerUrlReference}}/debug.ping" -o /dev/null) -ne 200 ] ; do
|
||||
echo "waiting for apiserver" ;
|
||||
sleep 5 ;
|
||||
done;
|
||||
while [[ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.fileServerUrlReference}}/" -o /dev/null) =~ 403|405 ]] ; do
|
||||
echo "waiting for fileserver" ;
|
||||
sleep 5 ;
|
||||
done;
|
||||
while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.webServerUrlReference}}/" -o /dev/null) -ne 200 ] ; do
|
||||
echo "waiting for webserver" ;
|
||||
sleep 5 ;
|
||||
done
|
||||
containers:
|
||||
- name: k8s-glue
|
||||
{{- if .Values.enterpriseFeatures.enabled }}
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.enterpriseFeatures.agentImageTagOverride }}"
|
||||
{{- else }}
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
|
||||
{{- end }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/bash
|
||||
- -c
|
||||
- >
|
||||
export PATH=$PATH:$HOME/bin;
|
||||
source /root/.bashrc && /root/entrypoint.sh
|
||||
volumeMounts:
|
||||
- name: {{ include "clearmlAgent.name" . }}-pt
|
||||
mountPath: /root/template
|
||||
{{ if .Values.clearml.clearmlConfig }}
|
||||
- name: k8sagent-clearml-conf-volume
|
||||
mountPath: /root/clearml.conf
|
||||
subPath: clearml.conf
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
{{- if .Values.agentk8sglue.volumeMounts }}
|
||||
{{- toYaml .Values.agentk8sglue.volumeMounts | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- range .Values.agentk8sglue.fileMounts }}
|
||||
- name: filemounts
|
||||
mountPath: "{{ .folderPath }}/{{ .name }}"
|
||||
subPath: "{{ .name }}"
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
env:
|
||||
- name: CLEARML_API_HOST
|
||||
value: "{{.Values.agentk8sglue.apiServerUrlReference}}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "{{.Values.agentk8sglue.webServerUrlReference}}"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "{{.Values.agentk8sglue.fileServerUrlReference}}"
|
||||
{{- if not .Values.agentk8sglue.clearmlcheckCertificate }}
|
||||
- name: CLEARML_API_HOST_VERIFY_CERT
|
||||
value: "false"
|
||||
{{- end }}
|
||||
{{- if .Values.sessions.portModeEnabled }}
|
||||
- name: K8S_GLUE_EXTRA_ARGS
|
||||
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml \
|
||||
--ports-mode --num-of-services {{ .Values.sessions.maxServices }} \
|
||||
--base-port {{ .Values.sessions.startingPort }} \
|
||||
--gateway-address {{ .Values.sessions.externalIP }}{{ if .Values.enterpriseFeatures.enabled }}{{ if .Values.enterpriseFeatures.useOwnerToken }} --use-owner-token{{ end }}{{ end }}"
|
||||
{{- if .Values.sessions.dynamicSvcs }}
|
||||
- name: CLEARML_K8S_GLUE_POD_POST_APPLY_CMD
|
||||
value: "kubectl -n {namespace} apply -f ~/template/services-{pod_number}.yaml ; kubectl -n {namespace} label svc clearml-session-{pod_number} service-for={pod_name}"
|
||||
- name: CLEARML_K8S_GLUE_POD_POST_DELETE_CMD
|
||||
value: "kubectl -n {namespace} delete svc -l service-for={pod_name}"
|
||||
{{- end }}
|
||||
{{- else}}
|
||||
- name: K8S_GLUE_EXTRA_ARGS
|
||||
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml \
|
||||
--max-pods {{.Values.enterpriseFeatures.maxPods}}{{ if .Values.enterpriseFeatures.enabled }}{{ if .Values.enterpriseFeatures.useOwnerToken }} --use-owner-token{{ end }}{{ end }}"
|
||||
{{- end }}
|
||||
{{- if .Values.clearml.clearmlConfig }}
|
||||
- name: CLEARML_CONFIG_FILE
|
||||
value: /root/clearml.conf
|
||||
{{- end }}
|
||||
- name: CLEARML_K8S_GLUE_LIMIT_POD_LABEL
|
||||
value: "ai.allegro.agent.serial=pod-{pod_number}"
|
||||
- name: CLEARML_K8S_SECRETS_LIST_FILE
|
||||
value: /root/template/secrets.yaml
|
||||
- name: K8S_DEFAULT_NAMESPACE
|
||||
value: "{{ .Release.Namespace }}"
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "clearmlAgent.name" . }}-ac
|
||||
key: agentk8sglue_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "clearmlAgent.name" . }}-ac
|
||||
key: agentk8sglue_secret
|
||||
- name: CLEARML_WORKER_ID
|
||||
value: {{ include "clearmlAgent.name" . }}
|
||||
- name: CLEARML_AGENT_UPDATE_REPO
|
||||
value: ""
|
||||
- name: FORCE_CLEARML_AGENT_REPO
|
||||
value: ""
|
||||
- name: CLEARML_DOCKER_IMAGE
|
||||
value: "{{.Values.agentk8sglue.defaultContainerImage}}"
|
||||
{{ if .Values.agentk8sglue.customBashScript }}
|
||||
- name: CLEARML_K8S_GLUE_EXTRA_BASH_SCRIPT
|
||||
value: "{{.Values.agentk8sglue.customBashScript}}"
|
||||
{{- end }}
|
||||
{{ if .Values.agentk8sglue.containerCustomBashScript }}
|
||||
- name: CLEARML_K8S_GLUE_POD_BASH_SCRIPT
|
||||
value: "{{.Values.agentk8sglue.containerCustomBashScript}}"
|
||||
{{- end }}
|
||||
{{- if .Values.agentk8sglue.debugMode }}
|
||||
- name: "CLEARML_K8S_GLUE_DEBUG"
|
||||
value: "1"
|
||||
{{- end }}
|
||||
{{- if .Values.agentk8sglue.extraEnvs }}
|
||||
{{ toYaml .Values.agentk8sglue.extraEnvs | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- if .Values.sessions.portModeEnabled }}
|
||||
{{- if .Values.sessions.setInteractiveQueuesTag }}
|
||||
- name: "CLEARML_K8S_GLUE_SET_QUEUE_SYSTEM_TAGS"
|
||||
value: "interactive"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.agentk8sglue.taskAsJob }}
|
||||
- name: "CLEARML_K8S_GLUE_KIND"
|
||||
value: "job"
|
||||
{{- else }}
|
||||
- name: "CLEARML_K8S_GLUE_KIND"
|
||||
value: "pod"
|
||||
{{- end }}
|
||||
- name: init-k8s-glue
|
||||
{{- if .Values.enterpriseFeatures.enabled }}
|
||||
- name: K8S_GLUE_QUEUE
|
||||
value: {{ include "agentk8sglue.queues" . | quote }}
|
||||
- name: CLEARML_K8S_GLUE_CREATE_QUEUE
|
||||
value: {{ include "agentk8sglue.createQueues" . | quote }}
|
||||
- name: CLEARML_K8S_GLUE_APPLY_VAULT_ENV_VARS
|
||||
value: {{ .Values.enterpriseFeatures.applyVaultEnvVars | quote }}
|
||||
- name: "CLEARML_K8S_GLUE_POD_MIN_RES_FIELD"
|
||||
value: {{ .Values.enterpriseFeatures.monitoredResources.minResourcesFieldName }}
|
||||
- name: "CLEARML_K8S_GLUE_MAX_RESOURCES"
|
||||
value: "{{.Values.enterpriseFeatures.monitoredResources.maxResources}}"
|
||||
- name: "CLEARML_K8S_GLUE_POD_MAX_RES_FIELD"
|
||||
value: {{ .Values.enterpriseFeatures.monitoredResources.maxResourcesFieldName }}
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.enterpriseFeatures.agentImageTagOverride }}"
|
||||
{{- else }}
|
||||
- name: K8S_GLUE_QUEUE
|
||||
value: {{ .Values.agentk8sglue.queue }}
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.nodeSelector}}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- >
|
||||
set -x;
|
||||
while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.apiServerUrlReference}}/debug.ping" -o /dev/null) -ne 200 ] ; do
|
||||
echo "waiting for apiserver" ;
|
||||
sleep 5 ;
|
||||
done;
|
||||
while [[ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.fileServerUrlReference}}/" -o /dev/null) =~ 403|405 ]] ; do
|
||||
echo "waiting for fileserver" ;
|
||||
sleep 5 ;
|
||||
done;
|
||||
while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.webServerUrlReference}}/" -o /dev/null) -ne 200 ] ; do
|
||||
echo "waiting for webserver" ;
|
||||
sleep 5 ;
|
||||
done
|
||||
securityContext:
|
||||
{{ toYaml .Values.agentk8sglue.containerSecurityContext | nindent 12 }}
|
||||
containers:
|
||||
- name: k8s-glue
|
||||
{{- if .Values.enterpriseFeatures.enabled }}
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.enterpriseFeatures.agentImageTagOverride }}"
|
||||
{{- else }}
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
|
||||
{{- end }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/bash
|
||||
- -c
|
||||
- >
|
||||
export PATH=$PATH:$HOME/bin;
|
||||
source /root/.bashrc && /root/entrypoint.sh
|
||||
volumeMounts:
|
||||
- name: {{ include "clearmlAgent.name" . }}-pt
|
||||
mountPath: /root/template
|
||||
{{ if .Values.clearml.clearmlConfig }}
|
||||
- name: k8sagent-clearml-conf-volume
|
||||
mountPath: /root/clearml.conf
|
||||
subPath: clearml.conf
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
{{- if .Values.agentk8sglue.volumeMounts }}
|
||||
{{- toYaml .Values.agentk8sglue.volumeMounts | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- range .Values.agentk8sglue.fileMounts }}
|
||||
- name: filemounts
|
||||
mountPath: "{{ .folderPath }}/{{ .name }}"
|
||||
subPath: "{{ .name }}"
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
env:
|
||||
- name: CLEARML_API_HOST
|
||||
value: "{{.Values.agentk8sglue.apiServerUrlReference}}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "{{.Values.agentk8sglue.webServerUrlReference}}"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "{{.Values.agentk8sglue.fileServerUrlReference}}"
|
||||
{{- if not .Values.agentk8sglue.clearmlcheckCertificate }}
|
||||
- name: CLEARML_API_HOST_VERIFY_CERT
|
||||
value: "false"
|
||||
{{- end }}
|
||||
{{- if .Values.sessions.portModeEnabled }}
|
||||
- name: K8S_GLUE_EXTRA_ARGS
|
||||
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml \
|
||||
--ports-mode --num-of-services {{ .Values.sessions.maxServices }} \
|
||||
--base-port {{ .Values.sessions.startingPort }} \
|
||||
--gateway-address {{ .Values.sessions.externalIP }}{{ if .Values.enterpriseFeatures.enabled }}{{ if .Values.enterpriseFeatures.useOwnerToken }} --use-owner-token{{ end }}{{ end }}"
|
||||
{{- if .Values.sessions.dynamicSvcs }}
|
||||
- name: CLEARML_K8S_GLUE_POD_POST_APPLY_CMD
|
||||
value: "kubectl -n {namespace} apply -f ~/template/services-{pod_number}.yaml ; kubectl -n {namespace} label svc clearml-session-{pod_number} service-for={pod_name}"
|
||||
- name: CLEARML_K8S_GLUE_POD_POST_DELETE_CMD
|
||||
value: "kubectl -n {namespace} delete svc -l service-for={pod_name}"
|
||||
{{- end }}
|
||||
{{- else}}
|
||||
- name: K8S_GLUE_EXTRA_ARGS
|
||||
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml \
|
||||
--max-pods {{.Values.enterpriseFeatures.maxPods}}{{ if .Values.enterpriseFeatures.enabled }}{{ if .Values.enterpriseFeatures.useOwnerToken }} --use-owner-token{{ end }}{{ end }}"
|
||||
{{- end }}
|
||||
{{- if .Values.clearml.clearmlConfig }}
|
||||
- name: CLEARML_CONFIG_FILE
|
||||
value: /root/clearml.conf
|
||||
{{- end }}
|
||||
- name: CLEARML_K8S_GLUE_LIMIT_POD_LABEL
|
||||
value: "ai.allegro.agent.serial=pod-{pod_number}"
|
||||
- name: CLEARML_K8S_SECRETS_LIST_FILE
|
||||
value: /root/template/secrets.yaml
|
||||
- name: K8S_DEFAULT_NAMESPACE
|
||||
value: "{{ .Release.Namespace }}"
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "clearmlAgent.name" . }}-ac
|
||||
key: agentk8sglue_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "clearmlAgent.name" . }}-ac
|
||||
key: agentk8sglue_secret
|
||||
- name: CLEARML_WORKER_ID
|
||||
value: {{ include "clearmlAgent.name" . }}
|
||||
- name: CLEARML_AGENT_UPDATE_REPO
|
||||
value: ""
|
||||
- name: FORCE_CLEARML_AGENT_REPO
|
||||
value: ""
|
||||
- name: CLEARML_DOCKER_IMAGE
|
||||
value: "{{.Values.agentk8sglue.defaultContainerImage}}"
|
||||
{{- if .Values.agentk8sglue.customBashScript }}
|
||||
- name: CLEARML_K8S_GLUE_EXTRA_BASH_SCRIPT
|
||||
value: "{{.Values.agentk8sglue.customBashScript}}"
|
||||
{{- end }}
|
||||
{{- if .Values.agentk8sglue.containerCustomBashScript }}
|
||||
- name: CLEARML_K8S_GLUE_POD_BASH_SCRIPT
|
||||
value: "{{.Values.agentk8sglue.containerCustomBashScript}}"
|
||||
{{- end }}
|
||||
{{- if .Values.agentk8sglue.debugMode }}
|
||||
- name: "CLEARML_K8S_GLUE_DEBUG"
|
||||
value: "1"
|
||||
{{- end }}
|
||||
{{- if .Values.sessions.portModeEnabled }}
|
||||
{{- if .Values.sessions.setInteractiveQueuesTag }}
|
||||
- name: "CLEARML_K8S_GLUE_SET_QUEUE_SYSTEM_TAGS"
|
||||
value: "interactive"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.agentk8sglue.taskAsJob }}
|
||||
- name: "CLEARML_K8S_GLUE_KIND"
|
||||
value: "job"
|
||||
{{- else }}
|
||||
- name: "CLEARML_K8S_GLUE_KIND"
|
||||
value: "pod"
|
||||
{{- end }}
|
||||
{{- if .Values.enterpriseFeatures.enabled }}
|
||||
- name: K8S_GLUE_QUEUE
|
||||
value: {{ include "agentk8sglue.queues" . | quote }}
|
||||
- name: CLEARML_K8S_GLUE_CREATE_QUEUE
|
||||
value: {{ include "agentk8sglue.createQueues" . | quote }}
|
||||
- name: CLEARML_K8S_GLUE_APPLY_VAULT_ENV_VARS
|
||||
value: {{ .Values.enterpriseFeatures.applyVaultEnvVars | quote }}
|
||||
- name: "CLEARML_K8S_GLUE_POD_MIN_RES_FIELD"
|
||||
value: {{ .Values.enterpriseFeatures.monitoredResources.minResourcesFieldName }}
|
||||
- name: "CLEARML_K8S_GLUE_MAX_RESOURCES"
|
||||
value: "{{.Values.enterpriseFeatures.monitoredResources.maxResources}}"
|
||||
- name: "CLEARML_K8S_GLUE_POD_MAX_RES_FIELD"
|
||||
value: {{ .Values.enterpriseFeatures.monitoredResources.maxResourcesFieldName }}
|
||||
{{- else }}
|
||||
- name: K8S_GLUE_QUEUE
|
||||
value: {{ .Values.agentk8sglue.queue }}
|
||||
{{- end }}
|
||||
{{- if .Values.agentk8sglue.extraEnvs }}
|
||||
{{ toYaml .Values.agentk8sglue.extraEnvs | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.nodeSelector}}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
securityContext:
|
||||
{{ toYaml .Values.agentk8sglue.containerSecurityContext | nindent 12 }}
|
||||
volumes:
|
||||
- name: {{ include "clearmlAgent.name" . }}-pt
|
||||
configMap:
|
||||
@ -210,11 +215,11 @@ spec:
|
||||
- key: clearml.conf
|
||||
path: clearml.conf
|
||||
{{ end }}
|
||||
{{- if .Values.agentk8sglue.volumes }}
|
||||
{{- toYaml .Values.agentk8sglue.volumes | nindent 8 }}
|
||||
{{- end }}
|
||||
{{ if .Values.agentk8sglue.fileMounts }}
|
||||
- name: filemounts
|
||||
secret:
|
||||
secretName: {{ include "clearmlAgent.name" . }}-afm
|
||||
{{ end }}
|
||||
{{- if .Values.agentk8sglue.volumes }}
|
||||
{{- toYaml .Values.agentk8sglue.volumes | nindent 8 }}
|
||||
{{- end }}
|
||||
|
||||
@ -77,8 +77,12 @@ agentk8sglue:
|
||||
extraEnvs: []
|
||||
# - name: PYTHONPATH
|
||||
# value: "somepath"
|
||||
# -- Web Server pod security context
|
||||
securityContext: {}
|
||||
# -- container securityContext setup for Agent pod (example in values.yaml comments)
|
||||
podSecurityContext: {}
|
||||
# runAsUser: 1001
|
||||
# fsGroup: 1001
|
||||
# -- container securityContext setup for Agent pod (example in values.yaml comments)
|
||||
containerSecurityContext: {}
|
||||
# runAsUser: 1001
|
||||
# fsGroup: 1001
|
||||
# -- additional existing ClusterRoleBindings
|
||||
@ -187,7 +191,11 @@ agentk8sglue:
|
||||
# -- affinity setup for pods spawned to consume ClearML Task
|
||||
affinity: {}
|
||||
# -- securityContext setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
securityContext: {}
|
||||
podSecurityContext: {}
|
||||
# runAsUser: 1001
|
||||
# fsGroup: 1001
|
||||
# -- securityContext setup for containers spawned to consume ClearML Task (example in values.yaml comments)
|
||||
containerSecurityContext: {}
|
||||
# runAsUser: 1001
|
||||
# fsGroup: 1001
|
||||
# -- hostAliases setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user