Upgrade to app version 1.5.0 (#81)

* Changed: upgrade to 1.5.0

* Fixed: inject after ct check

* Fixed: list changd

* Fixed: typo
This commit is contained in:
Valeriano Manassero 2022-06-23 07:49:45 +02:00 committed by GitHub
parent 84a003b7bc
commit e22bd30764
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 65 additions and 671 deletions

View File

@ -34,16 +34,19 @@ jobs:
node_image: kindest/node:${{ matrix.k8s }}
- name: Set up chart-testing
uses: helm/chart-testing-action@v2.2.1
- name: Inject secrets
run: |
find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUEKEY/${{ secrets.agentk8sglueKey }}/g" {} \;
find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUESECRET/${{ secrets.agentk8sglueSecret }}/g" {} \;
- name: Run chart-testing (list-changed)
id: list-changed
run: |
changed=$(ct list-changed --chart-dirs=charts --target-branch=main)
if [[ -n "$changed" ]]; then
echo "::set-output name=changed::true"
echo "::set-output name=changed_charts::\"${changed//$'\n'/,}\""
fi
- name: Inject secrets
run: |
find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUEKEY/${{ secrets.agentk8sglueKey }}/g" {} \;
find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUESECRET/${{ secrets.agentk8sglueSecret }}/g" {} \;
if: steps.list-changed.outputs.changed == 'true'
- name: Run chart-testing (lint and install)
run: ct lint-and-install --chart-dirs=charts --target-branch=main --helm-extra-args="--timeout=15m" --debug=true
run: ct lint-and-install --chart-dirs=charts --target-branch=main --helm-extra-args="--timeout=15m" --charts=${{steps.list-changed.outputs.changed_charts}} --debug=true
if: steps.list-changed.outputs.changed == 'true'

View File

@ -2,8 +2,8 @@ apiVersion: v2
name: clearml
description: MLOps platform
type: application
version: "3.10.5"
appVersion: "1.4.0"
version: "4.0.0"
appVersion: "1.5.0"
home: https://clear.ml
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
sources:

View File

@ -1,6 +1,6 @@
# ClearML Ecosystem for Kubernetes
![Version: 3.10.5](https://img.shields.io/badge/Version-3.10.5-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.4.0](https://img.shields.io/badge/AppVersion-1.4.0-informational?style=flat-square)
![Version: 4.0.0](https://img.shields.io/badge/Version-4.0.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.5.0](https://img.shields.io/badge/AppVersion-1.5.0-informational?style=flat-square)
MLOps platform
@ -129,94 +129,6 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| agentGroups.agent-group-cpu.affinity | object | `{}` | |
| agentGroups.agent-group-cpu.agentVersion | string | `""` | |
| agentGroups.agent-group-cpu.awsAccessKeyId | string | `nil` | |
| agentGroups.agent-group-cpu.awsDefaultRegion | string | `nil` | |
| agentGroups.agent-group-cpu.awsSecretAccessKey | string | `nil` | |
| agentGroups.agent-group-cpu.azureStorageAccount | string | `nil` | |
| agentGroups.agent-group-cpu.azureStorageKey | string | `nil` | |
| agentGroups.agent-group-cpu.clearmlAccessKey | string | `nil` | |
| agentGroups.agent-group-cpu.clearmlConfig | string | `"sdk {\n}"` | |
| agentGroups.agent-group-cpu.clearmlGitPassword | string | `nil` | |
| agentGroups.agent-group-cpu.clearmlGitUser | string | `nil` | |
| agentGroups.agent-group-cpu.clearmlSecretKey | string | `nil` | |
| agentGroups.agent-group-cpu.enabled | bool | `false` | |
| agentGroups.agent-group-cpu.extraEnvs | list | `[]` | |
| agentGroups.agent-group-cpu.image.pullPolicy | string | `"IfNotPresent"` | |
| agentGroups.agent-group-cpu.image.repository | string | `"ubuntu"` | |
| agentGroups.agent-group-cpu.image.tag | string | `"18.04"` | |
| agentGroups.agent-group-cpu.name | string | `"agent-group-cpu"` | |
| agentGroups.agent-group-cpu.nodeSelector | object | `{}` | |
| agentGroups.agent-group-cpu.nvidiaGpusPerAgent | int | `0` | |
| agentGroups.agent-group-cpu.podAnnotations | object | `{}` | |
| agentGroups.agent-group-cpu.queues | string | `"default"` | |
| agentGroups.agent-group-cpu.replicaCount | int | `1` | |
| agentGroups.agent-group-cpu.tolerations | list | `[]` | |
| agentGroups.agent-group-cpu.updateStrategy | string | `"Recreate"` | |
| agentGroups.agent-group-gpu.affinity | object | `{}` | |
| agentGroups.agent-group-gpu.agentVersion | string | `""` | |
| agentGroups.agent-group-gpu.awsAccessKeyId | string | `nil` | |
| agentGroups.agent-group-gpu.awsDefaultRegion | string | `nil` | |
| agentGroups.agent-group-gpu.awsSecretAccessKey | string | `nil` | |
| agentGroups.agent-group-gpu.azureStorageAccount | string | `nil` | |
| agentGroups.agent-group-gpu.azureStorageKey | string | `nil` | |
| agentGroups.agent-group-gpu.clearmlAccessKey | string | `nil` | |
| agentGroups.agent-group-gpu.clearmlConfig | string | `"sdk {\n}"` | |
| agentGroups.agent-group-gpu.clearmlGitPassword | string | `nil` | |
| agentGroups.agent-group-gpu.clearmlGitUser | string | `nil` | |
| agentGroups.agent-group-gpu.clearmlSecretKey | string | `nil` | |
| agentGroups.agent-group-gpu.enabled | bool | `false` | |
| agentGroups.agent-group-gpu.image.pullPolicy | string | `"IfNotPresent"` | |
| agentGroups.agent-group-gpu.image.repository | string | `"nvidia/cuda"` | |
| agentGroups.agent-group-gpu.image.tag | string | `"11.0-base-ubuntu18.04"` | |
| agentGroups.agent-group-gpu.name | string | `"agent-group-gpu"` | |
| agentGroups.agent-group-gpu.nodeSelector | object | `{}` | |
| agentGroups.agent-group-gpu.nvidiaGpusPerAgent | int | `1` | |
| agentGroups.agent-group-gpu.podAnnotations | object | `{}` | |
| agentGroups.agent-group-gpu.queues | string | `"default"` | |
| agentGroups.agent-group-gpu.replicaCount | int | `0` | |
| agentGroups.agent-group-gpu.tolerations | list | `[]` | |
| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | |
| agentk8sglue.defaultDockerImage | string | `"nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04"` | |
| agentk8sglue.enabled | bool | `true` | |
| agentk8sglue.id | string | `"k8s-agent"` | |
| agentk8sglue.image.repository | string | `"allegroai/clearml-agent-k8s"` | |
| agentk8sglue.image.tag | string | `"base-1.21"` | |
| agentk8sglue.maxPods | int | `10` | |
| agentk8sglue.podTemplate.env | list | `[]` | |
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | |
| agentk8sglue.podTemplate.resources | object | `{}` | |
| agentk8sglue.podTemplate.tolerations | list | `[]` | |
| agentk8sglue.podTemplate.volumes | list | `[]` | |
| agentk8sglue.queue | string | `"default"` | |
| agentk8sglue.serviceAccountName | string | `"default"` | |
| agentservices.affinity | object | `{}` | |
| agentservices.agentVersion | string | `""` | |
| agentservices.awsAccessKeyId | string | `nil` | |
| agentservices.awsDefaultRegion | string | `nil` | |
| agentservices.awsSecretAccessKey | string | `nil` | |
| agentservices.azureStorageAccount | string | `nil` | |
| agentservices.azureStorageKey | string | `nil` | |
| agentservices.clearmlFilesHost | string | `nil` | |
| agentservices.clearmlGitPassword | string | `nil` | |
| agentservices.clearmlGitUser | string | `nil` | |
| agentservices.clearmlHostIp | string | `nil` | |
| agentservices.clearmlWebHost | string | `nil` | |
| agentservices.clearmlWorkerId | string | `"clearml-services"` | |
| agentservices.enabled | bool | `false` | |
| agentservices.extraEnvs | list | `[]` | |
| agentservices.googleCredentials | string | `nil` | |
| agentservices.image.pullPolicy | string | `"IfNotPresent"` | |
| agentservices.image.repository | string | `"allegroai/clearml-agent-services"` | |
| agentservices.image.tag | string | `"latest"` | |
| agentservices.nodeSelector | object | `{}` | |
| agentservices.podAnnotations | object | `{}` | |
| agentservices.replicaCount | int | `1` | |
| agentservices.resources | object | `{}` | |
| agentservices.storage.data.class | string | `""` | |
| agentservices.storage.data.size | string | `"50Gi"` | |
| agentservices.tolerations | list | `[]` | |
| apiserver.additionalConfigs | object | `{}` | additional configurations that can be used by api server; check examples in values.yaml file |
| apiserver.affinity | object | `{}` | |
| apiserver.authCookiesMaxAge | int | `864000` | Amount of seconds the authorization cookie will last in user browser |
@ -224,7 +136,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| apiserver.extraEnvs | list | `[]` | |
| apiserver.image.pullPolicy | string | `"IfNotPresent"` | |
| apiserver.image.repository | string | `"allegroai/clearml"` | |
| apiserver.image.tag | string | `"1.4.0"` | |
| apiserver.image.tag | string | `"1.5.0"` | |
| apiserver.livenessDelay | int | `60` | |
| apiserver.nodeSelector | object | `{}` | |
| apiserver.podAnnotations | object | `{}` | |
@ -238,7 +150,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| apiserver.service.port | int | `8008` | |
| apiserver.service.type | string | `"NodePort"` | This will set to service's spec.type field |
| apiserver.tolerations | list | `[]` | |
| clearml.defaultCompany | string | `"d1bd92a3b039400cbafc60a7a5b1e52b"` | |
| clearml | object | `{"defaultCompany":"d1bd92a3b039400cbafc60a7a5b1e52b"}` | ClearMl generic configurations |
| elasticsearch.clusterHealthCheckParams | string | `"wait_for_status=yellow&timeout=1s"` | |
| elasticsearch.clusterName | string | `"clearml-elastic"` | |
| elasticsearch.enabled | bool | `true` | |
@ -284,7 +196,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| fileserver.extraEnvs | list | `[]` | |
| fileserver.image.pullPolicy | string | `"IfNotPresent"` | |
| fileserver.image.repository | string | `"allegroai/clearml"` | |
| fileserver.image.tag | string | `"1.4.0"` | |
| fileserver.image.tag | string | `"1.5.0"` | |
| fileserver.nodeSelector | object | `{}` | |
| fileserver.podAnnotations | object | `{}` | |
| fileserver.replicaCount | int | `1` | |
@ -295,6 +207,13 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| fileserver.storage.data.class | string | `""` | |
| fileserver.storage.data.size | string | `"50Gi"` | |
| fileserver.tolerations | list | `[]` | |
| imageCredentials | object | `{"email":"someone@host.com","enabled":false,"existingSecret":"","password":"pwd","registry":"docker.io","username":"someone"}` | Private image registry configuration |
| imageCredentials.email | string | `"someone@host.com"` | Email |
| imageCredentials.enabled | bool | `false` | Use private authentication mode |
| imageCredentials.existingSecret | string | `""` | If this is set, chart will not generate a secret but will use what is defined here |
| imageCredentials.password | string | `"pwd"` | Registry password |
| imageCredentials.registry | string | `"docker.io"` | Registry name |
| imageCredentials.username | string | `"someone"` | Registry username |
| ingress.annotations | object | `{}` | |
| ingress.api.annotations | object | `{}` | |
| ingress.api.enabled | bool | `false` | |
@ -343,7 +262,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| webserver.extraEnvs | list | `[]` | |
| webserver.image.pullPolicy | string | `"IfNotPresent"` | |
| webserver.image.repository | string | `"allegroai/clearml"` | |
| webserver.image.tag | string | `"1.4.0"` | |
| webserver.image.tag | string | `"1.5.0"` | |
| webserver.nodeSelector | object | `{}` | |
| webserver.podAnnotations | object | `{}` | |
| webserver.replicaCount | int | `1` | |

View File

@ -1,57 +0,0 @@
{{- if .Values.agentk8sglue.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: k8sagent-pod-template
data:
template.yaml: |
apiVersion: v1
metadata:
namespace: {{ .Release.Namespace }}
spec:
serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }}
volumes:
{{- range .Values.agentk8sglue.podTemplate.volumes }}
- name: {{ .name }}
persistentVolumeClaim:
claimName: {{ .name }}
{{- end }}
containers:
- resources:
{{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }}
ports:
- containerPort: 10022
volumeMounts:
{{- range .Values.agentk8sglue.podTemplate.volumes }}
- mountPath: {{ .path }}
name: {{ .name }}
{{- end }}
env:
- name: CLEARML_API_HOST
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
- name: CLEARML_WEB_HOST
value: "http://{{ include "clearml.fullname" . }}-webserver"
- name: CLEARML_FILES_HOST
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: apiserver_key
- name: CLEARML_API_SECRET_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: apiserver_secret
{{- if .Values.agentk8sglue.podTemplate.env }}
{{ toYaml .Values.agentk8sglue.podTemplate.env | nindent 8 }}
{{- end }}
{{- with .Values.agentk8sglue.podTemplate.nodeSelector}}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.agentk8sglue.podTemplate.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View File

@ -1,122 +0,0 @@
{{- range $key, $value := .Values.agentGroups }}
{{- with $value }}
{{- if .enabled }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "clearml.fullname" $ }}-{{ .name }}-agent
labels:
{{- include "clearml.labels" $ | nindent 4 }}
spec:
replicas: {{ .replicaCount }}
strategy:
type: {{ .updateStrategy }}
selector:
matchLabels:
{{- include "clearml.selectorLabelsAgent" $ | nindent 6 }}
template:
metadata:
annotations:
checksum/secret: {{ include (print $.Template.BasePath "/secrets.yaml") $ | sha256sum }}
{{- with .podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "clearml.selectorLabelsAgent" $ | nindent 8 }}
spec:
volumes:
{{ if .clearmlConfig }}
- name: agent-clearml-conf-volume
secret:
secretName: {{ .name }}-conf
items:
- key: clearml.conf
path: clearml.conf
{{ end }}
initContainers:
- name: init-agent-{{ .name }}
image: "{{ .image.repository }}:{{ .image.tag | default $.Chart.AppVersion }}"
command:
- /bin/sh
- -c
- >
set -x;
while [ $(curl -sw '%{http_code}' "{{ include "clearml.serviceApi" $ }}/debug.ping" -o /dev/null) -ne 200 ] ; do
echo "waiting for apiserver" ;
sleep 5 ;
done
containers:
- name: {{ $.Chart.Name }}-{{ .name }}
image: "{{ .image.repository }}:{{ .image.tag }}"
imagePullPolicy: {{ .image.pullPolicy }}
securityContext:
privileged: true
resources:
limits:
nvidia.com/gpu:
{{ .nvidiaGpusPerAgent }}
env:
- name: CLEARML_API_HOST
value: {{ include "clearml.serviceApi" $ }}
- name: CLEARML_WEB_HOST
value: {{ include "clearml.serviceApp" $ }}
- name: CLEARML_FILES_HOST
value: {{ include "clearml.serviceFiles" $ }}
- name: CLEARML_AGENT_GIT_USER
value: {{ .clearmlGitUser}}
- name: CLEARML_AGENT_GIT_PASS
value: {{ .clearmlGitPassword}}
- name: AWS_ACCESS_KEY_ID
value: {{ .awsAccessKeyId}}
- name: AWS_SECRET_ACCESS_KEY
value: {{ .awsSecretAccessKey}}
- name: AWS_DEFAULT_REGION
value: {{ .awsDefaultRegion}}
- name: AZURE_STORAGE_ACCOUNT
value: {{ .azureStorageAccount}}
- name: AZURE_STORAGE_KEY
value: {{ .azureStorageKey}}
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_key
- name: CLEARML_API_SECRET_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_secret
{{- if .extraEnvs }}
{{ toYaml .extraEnvs | nindent 10 }}
{{- end }}
command:
- /bin/sh
- -c
- "apt-get update ;
apt-get install -y curl python3-pip git;
python3 -m pip install -U pip ;
python3 -m pip install clearml-agent{{ .agentVersion}} ;
CLEARML_AGENT_K8S_HOST_MOUNT=/root/.clearml:/root/.clearml clearml-agent daemon --foreground --queue {{ .queues}}"
{{ if .clearmlConfig }}
volumeMounts:
- name: agent-clearml-conf-volume
mountPath: /root/clearml.conf
subPath: clearml.conf
readOnly: true
{{- end }}
{{- with .nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -1,64 +0,0 @@
{{- if .Values.agentk8sglue.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: "{{ include "clearml.fullname" . }}-k8sagent"
labels:
app: k8sagent
spec:
replicas: 1
selector:
matchLabels:
app: k8sagent
template:
metadata:
labels:
app: k8sagent
spec:
containers:
- name: k8s-glue-container
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
imagePullPolicy: IfNotPresent
command: ["/bin/bash", "-c", "export PATH=$PATH:$HOME/bin; source /root/.bashrc && /root/entrypoint.sh"]
volumeMounts:
- name: k8sagent-pod-template
mountPath: /root/template
env:
- name: CLEARML_API_HOST
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
- name: CLEARML_WEB_HOST
value: "http://{{ include "clearml.fullname" . }}-webserver"
- name: CLEARML_FILES_HOST
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
- name: K8S_GLUE_MAX_PODS
value: "{{.Values.agentk8sglue.maxPods}}"
- name: K8S_GLUE_QUEUE
value: "{{.Values.agentk8sglue.queue}}"
- name: K8S_GLUE_EXTRA_ARGS
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml"
- name: K8S_DEFAULT_NAMESPACE
value: "{{ .Release.Namespace }}"
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: apiserver_key
- name: CLEARML_API_SECRET_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: apiserver_secret
- name: CLEARML_WORKER_ID
value: "{{.Values.agentk8sglue.id}}"
- name: CLEARML_AGENT_UPDATE_REPO
value: ""
- name: FORCE_CLEARML_AGENT_REPO
value: ""
- name: CLEARML_DOCKER_IMAGE
value: "{{.Values.agentk8sglue.defaultDockerImage}}"
volumes:
- name: k8sagent-pod-template
configMap:
name: k8sagent-pod-template
{{- end }}

View File

@ -1,106 +0,0 @@
{{- if .Values.agentservices.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "clearml.fullname" . }}-agentservices
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.agentservices.replicaCount }}
selector:
matchLabels:
{{- include "clearml.selectorLabelsAgentServices" . | nindent 6 }}
template:
metadata:
annotations:
checksum/secret: {{ include (print $.Template.BasePath "/secrets.yaml") . | sha256sum }}
{{- with .Values.agentservices.podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "clearml.selectorLabelsAgentServices" . | nindent 8 }}
spec:
volumes:
- name: agentservices-data
persistentVolumeClaim:
claimName: {{ include "clearml.fullname" . }}-agentservices-data
initContainers:
- name: init-agentservices
image: "{{ .Values.agentservices.image.repository }}:{{ .Values.agentservices.image.tag | default .Chart.AppVersion }}"
command:
- /bin/sh
- -c
- >
set -x;
while [ $(curl -sw '%{http_code}' "{{ include "clearml.serviceApi" $ }}/debug.ping" -o /dev/null) -ne 200 ] ; do
echo "waiting for apiserver" ;
sleep 5 ;
done
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.agentservices.image.repository }}:{{ .Values.agentservices.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.agentservices.image.pullPolicy }}
env:
- name: CLEARML_HOST_IP
value: {{ .Values.agentservices.clearmlHostIp }}
- name: CLEARML_API_HOST
value: {{ include "clearml.serviceApi" $ }}
- name: CLEARML_WEB_HOST
value: {{ .Values.agentservices.clearmlWebHost }}
- name: CLEARML_FILES_HOST
value: {{ .Values.agentservices.clearmlFilesHost }}
- name: CLEARML_AGENT_GIT_USER
value: {{ .Values.agentservices.clearmlGitUser }}
- name: CLEARML_AGENT_GIT_PASS
value: {{ .Values.agentservices.clearmlGitPassword }}
- name: CLEARML_AGENT_UPDATE_VERSION
value: {{ .Values.agentservices.agentVersion }}
- name: CLEARML_AGENT_DEFAULT_BASE_DOCKER
value: {{ .Values.agentservices.defaultBaseDocker }}
- name: AWS_ACCESS_KEY_ID
value: {{ .Values.agentservices.awsAccessKeyId }}
- name: AWS_SECRET_ACCESS_KEY
value: {{ .Values.agentservices.awsSecretAccessKey }}
- name: AWS_DEFAULT_REGION
value: {{ .Values.agentservices.awsDefaultRegion }}
- name: AZURE_STORAGE_ACCOUNT
value: {{ .Values.agentservices.azureStorageAccount }}
- name: AZURE_STORAGE_KEY
value: {{ .Values.agentservices.azureStorageKey }}
- name: GOOGLE_APPLICATION_CREDENTIALS
value: {{ .Values.agentservices.googleCredentials }}
- name: CLEARML_WORKER_ID
value: {{ .Values.agentservices.clearmlWorkerId }}
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_key
- name: CLEARML_API_SECRET_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_secret
{{- if .Values.agentservices.extraEnvs }}
{{ toYaml .Values.agentservices.extraEnvs | nindent 10 }}
{{- end }}
args:
- agentservices
volumeMounts:
- name: agentservices-data
mountPath: /root/.clearml
resources:
{{- toYaml .Values.agentservices.resources | nindent 12 }}
{{- with .Values.agentservices.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.agentservices.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.agentservices.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View File

@ -19,7 +19,14 @@ spec:
labels:
{{- include "clearml.selectorLabelsApiServer" . | nindent 8 }}
spec:
{{- include "clearml.imagePullSecrets" . | indent 6 }}
{{- if .Values.imageCredentials.enabled }}
imagePullSecrets:
{{- if .Values.imageCredentials.existingSecret }}
- name: .Values.imageCredentials.existingSecret
{{- else }}
- name: clearml-agent-registry-key
{{- end }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.apiserver.image.repository }}:{{ .Values.apiserver.image.tag | default .Chart.AppVersion }}"

View File

@ -22,7 +22,14 @@ spec:
- name: fileserver-data
persistentVolumeClaim:
claimName: {{ include "clearml.fullname" . }}-fileserver-data
{{- include "clearml.imagePullSecrets" . | indent 6 }}
{{- if .Values.imageCredentials.enabled }}
imagePullSecrets:
{{- if .Values.imageCredentials.existingSecret }}
- name: .Values.imageCredentials.existingSecret
{{- else }}
- name: clearml-agent-registry-key
{{- end }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.fileserver.image.repository }}:{{ .Values.fileserver.image.tag | default .Chart.AppVersion }}"

View File

@ -18,7 +18,14 @@ spec:
labels:
{{- include "clearml.selectorLabelsWebServer" . | nindent 8 }}
spec:
{{- include "clearml.imagePullSecrets" . | indent 6 }}
{{- if .Values.imageCredentials.enabled }}
imagePullSecrets:
{{- if .Values.imageCredentials.existingSecret }}
- name: .Values.imageCredentials.existingSecret
{{- else }}
- name: clearml-agent-registry-key
{{- end }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Chart.AppVersion }}"

View File

@ -1,17 +0,0 @@
{{- if .Values.agentservices.enabled }}
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: {{ include "clearml.fullname" . }}-agentservices-data
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: {{ .Values.agentservices.storage.data.size | quote }}
{{- if .Values.agentservices.storage.data.class -}}
storageClassName: {{ .Values.agentservices.storage.data.class | quote }}
{{- end -}}
{{- end }}

View File

@ -1,25 +0,0 @@
{{- if .Values.agentk8sglue.enabled }}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: k8sagent-pods-access
rules:
- apiGroups:
- ""
resources:
- pods
verbs: ["get", "list", "watch", "create", "patch", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: k8sagent-pods-access
subjects:
- kind: ServiceAccount
name: default
namespace: {{ .Release.Namespace }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: k8sagent-pods-access
{{- end }}

View File

@ -1,13 +0,0 @@
{{- range $key, $value := .Values.agentGroups }}
{{- with $value }}
---
{{ if .clearmlConfig }}
apiVersion: v1
kind: Secret
metadata:
name: {{ .name }}-conf
data:
clearml.conf: {{ .clearmlConfig | b64enc }}
{{ end }}
{{- end }}
{{- end }}

View File

@ -1,6 +1,19 @@
# global:
# imagePullSecrets:
# - docker-cfg
# -- Private image registry configuration
imageCredentials:
# -- Use private authentication mode
enabled: false
# -- If this is set, chart will not generate a secret but will use what is defined here
existingSecret: ""
# -- Registry name
registry: docker.io
# -- Registry username
username: someone
# -- Registry password
password: pwd
# -- Email
email: someone@host.com
# -- ClearMl generic configurations
clearml:
defaultCompany: "d1bd92a3b039400cbafc60a7a5b1e52b"
ingress:
@ -67,7 +80,7 @@ apiserver:
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.4.0"
tag: "1.5.0"
extraEnvs: []
@ -136,7 +149,7 @@ fileserver:
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.4.0"
tag: "1.5.0"
extraEnvs: []
@ -181,7 +194,7 @@ webserver:
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.4.0"
tag: "1.5.0"
podAnnotations: {}
@ -205,164 +218,6 @@ webserver:
additionalConfigs: {}
agentservices:
enabled: false
clearmlHostIp: null
agentVersion: ""
clearmlWebHost: null
clearmlFilesHost: null
clearmlGitUser: null
clearmlGitPassword: null
awsAccessKeyId: null
awsSecretAccessKey: null
awsDefaultRegion: null
azureStorageAccount: null
azureStorageKey: null
googleCredentials: null
clearmlWorkerId: "clearml-services"
replicaCount: 1
image:
repository: "allegroai/clearml-agent-services"
pullPolicy: IfNotPresent
tag: "latest"
extraEnvs: []
podAnnotations: {}
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
nodeSelector: {}
tolerations: []
affinity: {}
storage:
data:
class: ""
size: 50Gi
agentGroups:
agent-group-cpu:
enabled: false
name: agent-group-cpu
replicaCount: 1
updateStrategy: Recreate
nvidiaGpusPerAgent: 0
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")
clearmlGitUser: null
clearmlGitPassword: null
clearmlAccessKey: null
clearmlSecretKey: null
awsAccessKeyId: null
awsSecretAccessKey: null
awsDefaultRegion: null
azureStorageAccount: null
azureStorageKey: null
clearmlConfig: |-
sdk {
}
image:
repository: "ubuntu"
pullPolicy: IfNotPresent
tag: "18.04"
extraEnvs: []
podAnnotations: {}
nodeSelector: {}
tolerations: []
affinity: {}
agent-group-gpu:
enabled: false
name: agent-group-gpu
replicaCount: 0
updateStrategy: Recreate
nvidiaGpusPerAgent: 1
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")
clearmlGitUser: null
clearmlGitPassword: null
clearmlAccessKey: null
clearmlSecretKey: null
awsAccessKeyId: null
awsSecretAccessKey: null
awsDefaultRegion: null
azureStorageAccount: null
azureStorageKey: null
clearmlConfig: |-
sdk {
}
image:
repository: "nvidia/cuda"
pullPolicy: IfNotPresent
tag: "11.0-base-ubuntu18.04"
podAnnotations: {}
nodeSelector: {}
tolerations: []
affinity: {}
# This agent will spawn queued experiments in new pods, a good use case is to combine this with
# GPU autoscaling nodes.
# https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue
agentk8sglue:
enabled: true
image:
repository: "allegroai/clearml-agent-k8s"
tag: "base-1.21"
serviceAccountName: default
maxPods: 10
defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04 # default docker image that is spawned as new pod
queue: default
id: k8s-agent
podTemplate:
volumes: []
# - name: "yourvolume"
# path: "/yourpath"
env: []
# # to setup access to private repo, setup secret with git credentials:
# - name: CLEARML_AGENT_GIT_USER
# value: mygitusername
# - name: CLEARML_AGENT_GIT_PASS
# valueFrom:
# secretKeyRef:
# name: git-password
# key: git-password
resources: {}
# limits:
# nvidia.com/gpu: 1
tolerations: []
# - key: "nvidia.com/gpu"
# operator: Exists
# effect: "NoSchedule"
nodeSelector: {}
# fleet: gpu-nodes
externalServices:
# -- Existing ElasticSearch Hostname to use if elasticsearch.enabled is false
elasticsearchHost: ""