Compare commits

...

6 Commits

Author SHA1 Message Date
Valeriano Manassero
d7c3b9d5d9 Added: upgrade procedures (#61)
* Added: upgrade procedures

* Changed: template

* Changed: updated chart version
2022-04-04 10:32:51 +02:00
Valeriano Manassero
e16060f2ad Fix empty glue configs (#59)
* Added: use empty values without breaking glue agent

* Added: release namespace

* Changed: bump up version
2022-03-30 16:33:06 +02:00
Valeriano Manassero
27a666d2ae Clarml app 1.3.0 (#57)
* Changed: clarml app version

* Changed: chart version bump

* Added: comment on additional configs
2022-03-28 09:29:04 +02:00
Valeriano Manassero
d7bef0ff9d Add authentication example (#56)
* Added: auth enabled example in additionalConfigs

* Changed: bump up version

* Fixed: remove trailing spaces
2022-03-25 10:27:40 +01:00
Zied ANDOLSI
049e609ce0 add image pull secret + add ingress path (#55) 2022-03-16 18:04:56 +01:00
Niels ten Boom
fa3739b643 Improvements k8sagent (#54) 2022-03-01 17:48:33 +01:00
15 changed files with 184 additions and 41 deletions

View File

@@ -2,8 +2,8 @@ apiVersion: v2
name: clearml
description: MLOps platform
type: application
version: "3.5.1"
appVersion: "1.2.0"
version: "3.8.2"
appVersion: "1.3.0"
home: https://clear.ml
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
sources:

View File

@@ -1,6 +1,6 @@
# ClearML Ecosystem for Kubernetes
![Version: 3.5.1](https://img.shields.io/badge/Version-3.5.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.2.0](https://img.shields.io/badge/AppVersion-1.2.0-informational?style=flat-square)
![Version: 3.8.2](https://img.shields.io/badge/Version-3.8.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.3.0](https://img.shields.io/badge/AppVersion-1.3.0-informational?style=flat-square)
MLOps platform
@@ -31,9 +31,7 @@ For development/evaluation it's possible to use [kind](https://kind.sigs.k8s.io)
After installation, following commands will create a complete ClearML insatllation:
```
mkdir -pm 777 /tmp/clearml-kind
cat <<EOF > /tmp/clearml-kind.yaml
cat <<EOF | kind create cluster --config=- ─╯
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
@@ -62,8 +60,6 @@ nodes:
containerPath: /var/local-path-provisioner
EOF
kind create cluster --config /tmp/clearml-kind.yaml
helm install clearml allegroai/clearml
```
@@ -89,6 +85,24 @@ This will create 3 ingress rules:
Just pointing the domain records to the IP where ingress controller is responding will complete the deployment process.
## Upgrades/ Values upgrades
Updating to latest version of this chart can be done in two steps:
```
helm repo update
helm upgrade clearml allegroai/clearml
```
Changing values on existing installation can be done with:
```
helm upgrade clearml allegroai/clearml --version <CURRENT CHART VERSION> -f custom_values.yaml
```
Please note: updating values only should always be done setting explicit chart version to avoid a possible chart update.
Keeping separate updates procedures between version and values can be a good practice to seprate potential concerns.
## Additional Configuration for ClearML Server
You can also configure the **clearml-server** for:
@@ -163,16 +177,19 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| agentGroups.agent-group-gpu.replicaCount | int | `0` | |
| agentGroups.agent-group-gpu.tolerations | list | `[]` | |
| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | |
| agentk8sglue.defaultDockerImage | string | `"nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04"` | |
| agentk8sglue.defaultDockerImage | string | `"nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04"` | |
| agentk8sglue.enabled | bool | `false` | |
| agentk8sglue.id | string | `"k8s-agent"` | |
| agentk8sglue.image.repository | string | `"allegroai/clearml-agent-k8s"` | |
| agentk8sglue.image.tag | string | `"aws-latest-1.21"` | |
| agentk8sglue.maxPods | int | `10` | |
| agentk8sglue.podTemplate.env | list | `[]` | |
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | |
| agentk8sglue.podTemplate.resources | object | `{}` | |
| agentk8sglue.podTemplate.tolerations | object | `{}` | |
| agentk8sglue.podTemplate.tolerations | list | `[]` | |
| agentk8sglue.podTemplate.volumes | list | `[]` | |
| agentk8sglue.queue | string | `"aws-instances"` | |
| agentk8sglue.serviceAccountName | string | `"default"` | |
| agentservices.affinity | object | `{}` | |
| agentservices.agentVersion | string | `""` | |
| agentservices.awsAccessKeyId | string | `nil` | |
@@ -199,14 +216,14 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| agentservices.storage.data.class | string | `"standard"` | |
| agentservices.storage.data.size | string | `"50Gi"` | |
| agentservices.tolerations | list | `[]` | |
| apiserver.additionalConfigs | object | `{}` | |
| apiserver.additionalConfigs | object | `{}` | additional configurations that can be used by api server; check examples in values.yaml file |
| apiserver.affinity | object | `{}` | |
| apiserver.authCookiesMaxAge | int | `864000` | Amount of seconds the authorization cookie will last in user browser |
| apiserver.configDir | string | `"/opt/clearml/config"` | |
| apiserver.extraEnvs | list | `[]` | |
| apiserver.image.pullPolicy | string | `"IfNotPresent"` | |
| apiserver.image.repository | string | `"allegroai/clearml"` | |
| apiserver.image.tag | string | `"1.2.0"` | |
| apiserver.image.tag | string | `"1.3.0"` | |
| apiserver.livenessDelay | int | `60` | |
| apiserver.nodeSelector | object | `{}` | |
| apiserver.podAnnotations | object | `{}` | |
@@ -266,7 +283,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| fileserver.extraEnvs | list | `[]` | |
| fileserver.image.pullPolicy | string | `"IfNotPresent"` | |
| fileserver.image.repository | string | `"allegroai/clearml"` | |
| fileserver.image.tag | string | `"1.2.0"` | |
| fileserver.image.tag | string | `"1.3.0"` | |
| fileserver.nodeSelector | object | `{}` | |
| fileserver.podAnnotations | object | `{}` | |
| fileserver.replicaCount | int | `1` | |
@@ -279,14 +296,19 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| fileserver.tolerations | list | `[]` | |
| ingress.annotations | object | `{}` | |
| ingress.api.annotations | object | `{}` | |
| ingress.api.enabled | bool | `false` | |
| ingress.api.hostName | string | `"api.clearml.127-0-0-1.nip.io"` | |
| ingress.api.path | string | `"/"` | |
| ingress.api.tlsSecretName | string | `""` | |
| ingress.app.annotations | object | `{}` | |
| ingress.app.enabled | bool | `false` | |
| ingress.app.hostName | string | `"app.clearml.127-0-0-1.nip.io"` | |
| ingress.app.path | string | `"/"` | |
| ingress.app.tlsSecretName | string | `""` | |
| ingress.enabled | bool | `false` | |
| ingress.files.annotations | object | `{}` | |
| ingress.files.enabled | bool | `false` | |
| ingress.files.hostName | string | `"files.clearml.127-0-0-1.nip.io"` | |
| ingress.files.path | string | `"/"` | |
| ingress.files.tlsSecretName | string | `""` | |
| ingress.name | string | `"clearml-server-ingress"` | |
| mongodb.architecture | string | `"standalone"` | |
@@ -315,11 +337,12 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| secret.credentials.tests.accessKey | string | `"ENP39EQM4SLACGD5FXB7"` | Set for tests_user_key field |
| secret.credentials.tests.secretKey | string | `"lPcm0imbcBZ8mwgO7tpadutiS3gnJD05x9j7afwXPS35IKbpiQ"` | Set for tests_user_secret field |
| secret.httpSession | string | `"9Tw20RbhJ1bLBiHEOWXvhplKGUbTgLzAtwFN2oLQvWwS0uRpD5"` | Set for http_session field |
| webserver.additionalConfigs | object | `{}` | |
| webserver.affinity | object | `{}` | |
| webserver.extraEnvs | list | `[]` | |
| webserver.image.pullPolicy | string | `"IfNotPresent"` | |
| webserver.image.repository | string | `"allegroai/clearml"` | |
| webserver.image.tag | string | `"1.2.0"` | |
| webserver.image.tag | string | `"1.3.0"` | |
| webserver.nodeSelector | object | `{}` | |
| webserver.podAnnotations | object | `{}` | |
| webserver.replicaCount | int | `1` | |

View File

@@ -28,9 +28,7 @@ For development/evaluation it's possible to use [kind](https://kind.sigs.k8s.io)
After installation, following commands will create a complete ClearML insatllation:
```
mkdir -pm 777 /tmp/clearml-kind
cat <<EOF > /tmp/clearml-kind.yaml
cat <<EOF | kind create cluster --config=- ─╯
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
@@ -59,8 +57,6 @@ nodes:
containerPath: /var/local-path-provisioner
EOF
kind create cluster --config /tmp/clearml-kind.yaml
helm install clearml allegroai/clearml
```
@@ -86,6 +82,24 @@ This will create 3 ingress rules:
Just pointing the domain records to the IP where ingress controller is responding will complete the deployment process.
## Upgrades/ Values upgrades
Updating to latest version of this chart can be done in two steps:
```
helm repo update
helm upgrade clearml allegroai/clearml
```
Changing values on existing installation can be done with:
```
helm upgrade clearml allegroai/clearml --version <CURRENT CHART VERSION> -f custom_values.yaml
```
Please note: updating values only should always be done setting explicit chart version to avoid a possible chart update.
Keeping separate updates procedures between version and values can be a good practice to seprate potential concerns.
## Additional Configuration for ClearML Server
You can also configure the **clearml-server** for:

View File

@@ -140,3 +140,17 @@ Create the name of the Files service to use
{{- printf "%s%s%s%s" "http://" (include "clearml.fullname" .) "-fileserver:" (.Values.fileserver.service.port | toString) }}
{{- end }}
{{- end }}
{{/*
Return the proper Docker Image Registry Secret Names
*/}}
{{- define "clearml.imagePullSecrets" -}}
{{- if .Values.global }}
{{- if .Values.global.imagePullSecrets }}
imagePullSecrets:
{{- range .Values.global.imagePullSecrets }}
- name: {{ . }}
{{- end }}
{{- end -}}
{{- end -}}
{{- end -}}

View File

@@ -9,10 +9,24 @@ data:
metadata:
namespace: {{ .Release.namespace }}
spec:
serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }}
volumes:
{{- range .Values.agentk8sglue.podTemplate.volumes }}
- name: {{ .name }}
persistentVolumeClaim:
claimName: {{ .name }}
{{- end }}
containers:
- resources:
{{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }}
env:
ports:
- containerPort: 10022
volumeMounts:
{{- range .Values.agentk8sglue.podTemplate.volumes }}
- mountPath: {{ .path }}
name: {{ .name }}
{{- end }}
env:
- name: CLEARML_API_HOST
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
- name: CLEARML_WEB_HOST
@@ -29,9 +43,15 @@ data:
secretKeyRef:
name: clearml-conf
key: apiserver_secret
tolerations:
{{- toYaml .Values.agentk8sglue.podTemplate.tolerations | nindent 8 }}
nodeSelector:
{{- toYaml .Values.agentk8sglue.podTemplate.nodeSelector | nindent 8 }}
{{- if .Values.agentk8sglue.podTemplate.env }}
{{ toYaml .Values.agentk8sglue.podTemplate.env | nindent 8 }}
{{- end }}
{{- with .Values.agentk8sglue.podTemplate.nodeSelector}}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.agentk8sglue.podTemplate.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,13 @@
{{- if .Values.webserver.additionalConfigs -}}
apiVersion: v1
kind: ConfigMap
metadata:
name: "{{ include "clearml.fullname" . }}-webserver-configmap"
labels:
{{- include "clearml.labels" . | nindent 4 }}
data:
{{- range $key, $val := .Values.webserver.additionalConfigs }}
{{ $key }}: |
{{- $val | nindent 4 }}
{{- end }}
{{- end -}}

View File

@@ -36,6 +36,8 @@ spec:
value: "{{.Values.agentk8sglue.queue}}"
- name: K8S_GLUE_EXTRA_ARGS
value: "--template-yaml /root/template/template.yaml"
- name: K8S_DEFAULT_NAMESPACE
value: "{{ .Release.namespace }}"
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef:

View File

@@ -19,6 +19,7 @@ spec:
labels:
{{- include "clearml.selectorLabelsApiServer" . | nindent 8 }}
spec:
{{- include "clearml.imagePullSecrets" . | indent 6 }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.apiserver.image.repository }}:{{ .Values.apiserver.image.tag | default .Chart.AppVersion }}"

View File

@@ -22,6 +22,7 @@ spec:
- name: fileserver-data
persistentVolumeClaim:
claimName: {{ include "clearml.fullname" . }}-fileserver-data
{{- include "clearml.imagePullSecrets" . | indent 6 }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.fileserver.image.repository }}:{{ .Values.fileserver.image.tag | default .Chart.AppVersion }}"

View File

@@ -18,6 +18,7 @@ spec:
labels:
{{- include "clearml.selectorLabelsWebServer" . | nindent 8 }}
spec:
{{- include "clearml.imagePullSecrets" . | indent 6 }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Chart.AppVersion }}"
@@ -38,6 +39,11 @@ spec:
- curl
- -X OPTIONS
- http://0.0.0.0:80/
{{- if .Values.webserver.additionalConfigs }}
volumeMounts:
- name: webserver-config
mountPath: /opt/clearml/config
{{- end }}
env:
- name: NGINX_APISERVER_ADDRESS
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
@@ -50,6 +56,12 @@ spec:
- webserver
resources:
{{- toYaml .Values.webserver.resources | nindent 12 }}
{{- if .Values.webserver.additionalConfigs }}
volumes:
- name: webserver-config
configMap:
name: "{{ include "clearml.fullname" . }}-webserver-configmap"
{{- end }}
{{- with .Values.webserver.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
@@ -61,4 +73,4 @@ spec:
{{- with .Values.webserver.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View File

@@ -1,4 +1,4 @@
{{- if .Values.ingress.enabled -}}
{{- if .Values.ingress.api.enabled -}}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
@@ -29,7 +29,7 @@ spec:
- host: {{ .Values.ingress.api.hostName }}
http:
paths:
- path: "/"
- path: {{ .Values.ingress.api.path }}
{{ if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
pathType: Prefix
backend:

View File

@@ -1,4 +1,4 @@
{{- if .Values.ingress.enabled -}}
{{- if .Values.ingress.app.enabled -}}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
@@ -28,7 +28,7 @@ spec:
- host: {{ .Values.ingress.app.hostName }}
http:
paths:
- path: "/"
- path: {{ .Values.ingress.app.path }}
{{ if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
pathType: Prefix
backend:

View File

@@ -1,4 +1,4 @@
{{- if .Values.ingress.enabled -}}
{{- if .Values.ingress.files.enabled -}}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
@@ -28,7 +28,7 @@ spec:
- host: {{ .Values.ingress.files.hostName }}
http:
paths:
- path: "/"
- path: {{ .Values.ingress.files.path }}
{{ if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
pathType: Prefix
backend:

View File

@@ -8,7 +8,7 @@ rules:
- ""
resources:
- pods
verbs: ["get", "list", "watch", "create", "patch"]
verbs: ["get", "list", "watch", "create", "patch", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding

57
charts/clearml/values.yaml Normal file → Executable file
View File

@@ -1,21 +1,29 @@
# global:
# imagePullSecrets:
# - docker-cfg
clearml:
defaultCompany: "d1bd92a3b039400cbafc60a7a5b1e52b"
ingress:
enabled: false
name: clearml-server-ingress
annotations: {}
app:
enabled: false
hostName: "app.clearml.127-0-0-1.nip.io"
tlsSecretName: ""
annotations: {}
path: "/"
api:
enabled: false
hostName: "api.clearml.127-0-0-1.nip.io"
tlsSecretName: ""
annotations: {}
path: "/"
files:
enabled: false
hostName: "files.clearml.127-0-0-1.nip.io"
tlsSecretName: ""
annotations: {}
path: "/"
secret:
# -- Set for http_session field
@@ -59,7 +67,7 @@ apiserver:
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.2.0"
tag: "1.3.0"
extraEnvs: []
@@ -83,6 +91,7 @@ apiserver:
affinity: {}
# -- additional configurations that can be used by api server; check examples in values.yaml file
additionalConfigs: {}
# services.conf: |
# tasks {
@@ -93,6 +102,25 @@ apiserver:
# watch_interval_sec: 900
# }
# }
# apiserver.conf: |
# auth {
# fixed_users {
# enabled: true
# pass_hashed: false
# users: [
# {
# username: "jane"
# password: "12345678"
# name: "Jane Doe"
# },
# {
# username: "john"
# password: "12345678"
# name: "John Doe"
# },
# ]
# }
# }
fileserver:
service:
@@ -108,7 +136,7 @@ fileserver:
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.2.0"
tag: "1.3.0"
extraEnvs: []
@@ -153,7 +181,7 @@ webserver:
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.2.0"
tag: "1.3.0"
podAnnotations: {}
@@ -175,6 +203,8 @@ webserver:
affinity: {}
additionalConfigs: {}
agentservices:
enabled: false
clearmlHostIp: null
@@ -302,15 +332,28 @@ agentk8sglue:
image:
repository: "allegroai/clearml-agent-k8s"
tag: "aws-latest-1.21"
serviceAccountName: default
maxPods: 10
defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04 # default docker image that is spawned as new pod
queue: aws-instances # create this queue manually in the UI first for it to work
defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04 # default docker image that is spawned as new pod
queue: aws-instances # create this queue manually in the UI first for it to work
id: k8s-agent
podTemplate:
volumes: []
# - name: "yourvolume"
# path: "/yourpath"
env: []
# # to setup access to private repo, setup secret with git credentials:
# - name: CLEARML_AGENT_GIT_USER
# value: mygitusername
# - name: CLEARML_AGENT_GIT_PASS
# valueFrom:
# secretKeyRef:
# name: git-password
# key: git-password
resources: {}
# limits:
# nvidia.com/gpu: 1
tolerations: {}
tolerations: []
# - key: "nvidia.com/gpu"
# operator: Exists
# effect: "NoSchedule"