mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-04-17 01:31:13 +00:00
Compare commits
6 Commits
clearml-3.
...
clearml-3.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
06070a5c20 | ||
|
|
5972fd8e5f | ||
|
|
7a7bd930f8 | ||
|
|
25dfbd12d6 | ||
|
|
d7c3b9d5d9 | ||
|
|
e16060f2ad |
2
.github/helm-docs.sh
vendored
2
.github/helm-docs.sh
vendored
@@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
CHART_DIRS="$(git diff --find-renames --name-only "$(git rev-parse --abbrev-ref HEAD)" remotes/origin/main -- 'charts' | grep '[cC]hart.yaml' | sed -e 's#/[Cc]hart.yaml##g')"
|
||||
HELM_DOCS_VERSION="1.7.0"
|
||||
HELM_DOCS_VERSION="1.8.1"
|
||||
|
||||
curl --silent --show-error --fail --location --output /tmp/helm-docs.tar.gz https://github.com/norwoodj/helm-docs/releases/download/v"${HELM_DOCS_VERSION}"/helm-docs_"${HELM_DOCS_VERSION}"_Linux_x86_64.tar.gz
|
||||
tar -xf /tmp/helm-docs.tar.gz helm-docs
|
||||
|
||||
@@ -2,8 +2,8 @@ apiVersion: v2
|
||||
name: clearml
|
||||
description: MLOps platform
|
||||
type: application
|
||||
version: "3.8.0"
|
||||
appVersion: "1.3.0"
|
||||
version: "3.10.2"
|
||||
appVersion: "1.4.0"
|
||||
home: https://clear.ml
|
||||
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
|
||||
sources:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# ClearML Ecosystem for Kubernetes
|
||||
|
||||
  
|
||||
  
|
||||
|
||||
MLOps platform
|
||||
|
||||
@@ -10,7 +10,7 @@ MLOps platform
|
||||
|
||||
| Name | Email | Url |
|
||||
| ---- | ------ | --- |
|
||||
| valeriano-manassero | | https://github.com/valeriano-manassero |
|
||||
| valeriano-manassero | | <https://github.com/valeriano-manassero> |
|
||||
|
||||
## Introduction
|
||||
|
||||
@@ -31,9 +31,7 @@ For development/evaluation it's possible to use [kind](https://kind.sigs.k8s.io)
|
||||
After installation, following commands will create a complete ClearML insatllation:
|
||||
|
||||
```
|
||||
mkdir -pm 777 /tmp/clearml-kind
|
||||
|
||||
cat <<EOF > /tmp/clearml-kind.yaml
|
||||
cat <<EOF | kind create cluster --config=- ─╯
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
@@ -62,8 +60,6 @@ nodes:
|
||||
containerPath: /var/local-path-provisioner
|
||||
EOF
|
||||
|
||||
kind create cluster --config /tmp/clearml-kind.yaml
|
||||
|
||||
helm install clearml allegroai/clearml
|
||||
```
|
||||
|
||||
@@ -89,6 +85,24 @@ This will create 3 ingress rules:
|
||||
|
||||
Just pointing the domain records to the IP where ingress controller is responding will complete the deployment process.
|
||||
|
||||
## Upgrades/ Values upgrades
|
||||
|
||||
Updating to latest version of this chart can be done in two steps:
|
||||
|
||||
```
|
||||
helm repo update
|
||||
helm upgrade clearml allegroai/clearml
|
||||
```
|
||||
|
||||
Changing values on existing installation can be done with:
|
||||
|
||||
```
|
||||
helm upgrade clearml allegroai/clearml --version <CURRENT CHART VERSION> -f custom_values.yaml
|
||||
```
|
||||
|
||||
Please note: updating values only should always be done setting explicit chart version to avoid a possible chart update.
|
||||
Keeping separate updates procedures between version and values can be a good practice to seprate potential concerns.
|
||||
|
||||
## Additional Configuration for ClearML Server
|
||||
|
||||
You can also configure the **clearml-server** for:
|
||||
@@ -127,7 +141,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-cpu.clearmlGitPassword | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.clearmlGitUser | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.clearmlSecretKey | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.enabled | bool | `true` | |
|
||||
| agentGroups.agent-group-cpu.enabled | bool | `false` | |
|
||||
| agentGroups.agent-group-cpu.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| agentGroups.agent-group-cpu.image.repository | string | `"ubuntu"` | |
|
||||
| agentGroups.agent-group-cpu.image.tag | string | `"18.04"` | |
|
||||
@@ -151,7 +165,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-gpu.clearmlGitPassword | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.clearmlGitUser | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.clearmlSecretKey | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.enabled | bool | `true` | |
|
||||
| agentGroups.agent-group-gpu.enabled | bool | `false` | |
|
||||
| agentGroups.agent-group-gpu.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| agentGroups.agent-group-gpu.image.repository | string | `"nvidia/cuda"` | |
|
||||
| agentGroups.agent-group-gpu.image.tag | string | `"11.0-base-ubuntu18.04"` | |
|
||||
@@ -164,17 +178,17 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-gpu.tolerations | list | `[]` | |
|
||||
| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | |
|
||||
| agentk8sglue.defaultDockerImage | string | `"nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04"` | |
|
||||
| agentk8sglue.enabled | bool | `false` | |
|
||||
| agentk8sglue.enabled | bool | `true` | |
|
||||
| agentk8sglue.id | string | `"k8s-agent"` | |
|
||||
| agentk8sglue.image.repository | string | `"allegroai/clearml-agent-k8s"` | |
|
||||
| agentk8sglue.image.tag | string | `"aws-latest-1.21"` | |
|
||||
| agentk8sglue.image.tag | string | `"latest"` | |
|
||||
| agentk8sglue.maxPods | int | `10` | |
|
||||
| agentk8sglue.podTemplate.env | list | `[]` | |
|
||||
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | |
|
||||
| agentk8sglue.podTemplate.resources | object | `{}` | |
|
||||
| agentk8sglue.podTemplate.tolerations | list | `[]` | |
|
||||
| agentk8sglue.podTemplate.volumes | list | `[]` | |
|
||||
| agentk8sglue.queue | string | `"aws-instances"` | |
|
||||
| agentk8sglue.queue | string | `"default"` | |
|
||||
| agentk8sglue.serviceAccountName | string | `"default"` | |
|
||||
| agentservices.affinity | object | `{}` | |
|
||||
| agentservices.agentVersion | string | `""` | |
|
||||
@@ -199,7 +213,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentservices.podAnnotations | object | `{}` | |
|
||||
| agentservices.replicaCount | int | `1` | |
|
||||
| agentservices.resources | object | `{}` | |
|
||||
| agentservices.storage.data.class | string | `"standard"` | |
|
||||
| agentservices.storage.data.class | string | `""` | |
|
||||
| agentservices.storage.data.size | string | `"50Gi"` | |
|
||||
| agentservices.tolerations | list | `[]` | |
|
||||
| apiserver.additionalConfigs | object | `{}` | additional configurations that can be used by api server; check examples in values.yaml file |
|
||||
@@ -209,7 +223,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| apiserver.extraEnvs | list | `[]` | |
|
||||
| apiserver.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| apiserver.image.repository | string | `"allegroai/clearml"` | |
|
||||
| apiserver.image.tag | string | `"1.3.0"` | |
|
||||
| apiserver.image.tag | string | `"1.4.0"` | |
|
||||
| apiserver.livenessDelay | int | `60` | |
|
||||
| apiserver.nodeSelector | object | `{}` | |
|
||||
| apiserver.podAnnotations | object | `{}` | |
|
||||
@@ -269,7 +283,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| fileserver.extraEnvs | list | `[]` | |
|
||||
| fileserver.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| fileserver.image.repository | string | `"allegroai/clearml"` | |
|
||||
| fileserver.image.tag | string | `"1.3.0"` | |
|
||||
| fileserver.image.tag | string | `"1.4.0"` | |
|
||||
| fileserver.nodeSelector | object | `{}` | |
|
||||
| fileserver.podAnnotations | object | `{}` | |
|
||||
| fileserver.replicaCount | int | `1` | |
|
||||
@@ -277,7 +291,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| fileserver.service.nodePort | int | `30081` | If service.type set to NodePort, this will be set to service's nodePort field. If service.type is set to others, this field will be ignored |
|
||||
| fileserver.service.port | int | `8081` | |
|
||||
| fileserver.service.type | string | `"NodePort"` | This will set to service's spec.type field |
|
||||
| fileserver.storage.data.class | string | `"standard"` | |
|
||||
| fileserver.storage.data.class | string | `""` | |
|
||||
| fileserver.storage.data.size | string | `"50Gi"` | |
|
||||
| fileserver.tolerations | list | `[]` | |
|
||||
| ingress.annotations | object | `{}` | |
|
||||
@@ -328,7 +342,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| webserver.extraEnvs | list | `[]` | |
|
||||
| webserver.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| webserver.image.repository | string | `"allegroai/clearml"` | |
|
||||
| webserver.image.tag | string | `"1.3.0"` | |
|
||||
| webserver.image.tag | string | `"1.4.0"` | |
|
||||
| webserver.nodeSelector | object | `{}` | |
|
||||
| webserver.podAnnotations | object | `{}` | |
|
||||
| webserver.replicaCount | int | `1` | |
|
||||
|
||||
@@ -28,9 +28,7 @@ For development/evaluation it's possible to use [kind](https://kind.sigs.k8s.io)
|
||||
After installation, following commands will create a complete ClearML insatllation:
|
||||
|
||||
```
|
||||
mkdir -pm 777 /tmp/clearml-kind
|
||||
|
||||
cat <<EOF > /tmp/clearml-kind.yaml
|
||||
cat <<EOF | kind create cluster --config=- ─╯
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
@@ -59,8 +57,6 @@ nodes:
|
||||
containerPath: /var/local-path-provisioner
|
||||
EOF
|
||||
|
||||
kind create cluster --config /tmp/clearml-kind.yaml
|
||||
|
||||
helm install clearml allegroai/clearml
|
||||
```
|
||||
|
||||
@@ -86,6 +82,24 @@ This will create 3 ingress rules:
|
||||
|
||||
Just pointing the domain records to the IP where ingress controller is responding will complete the deployment process.
|
||||
|
||||
## Upgrades/ Values upgrades
|
||||
|
||||
Updating to latest version of this chart can be done in two steps:
|
||||
|
||||
```
|
||||
helm repo update
|
||||
helm upgrade clearml allegroai/clearml
|
||||
```
|
||||
|
||||
Changing values on existing installation can be done with:
|
||||
|
||||
```
|
||||
helm upgrade clearml allegroai/clearml --version <CURRENT CHART VERSION> -f custom_values.yaml
|
||||
```
|
||||
|
||||
Please note: updating values only should always be done setting explicit chart version to avoid a possible chart update.
|
||||
Keeping separate updates procedures between version and values can be a good practice to seprate potential concerns.
|
||||
|
||||
## Additional Configuration for ClearML Server
|
||||
|
||||
You can also configure the **clearml-server** for:
|
||||
|
||||
@@ -7,7 +7,7 @@ data:
|
||||
template.yaml: |
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
namespace: {{ .Release.namespace }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }}
|
||||
volumes:
|
||||
@@ -43,10 +43,15 @@ data:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_secret
|
||||
{{- toYaml .Values.agentk8sglue.podTemplate.env | nindent 8 }}
|
||||
tolerations:
|
||||
{{- toYaml .Values.agentk8sglue.podTemplate.tolerations | nindent 8 }}
|
||||
{{- if .Values.agentk8sglue.podTemplate.env }}
|
||||
{{ toYaml .Values.agentk8sglue.podTemplate.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.podTemplate.nodeSelector}}
|
||||
nodeSelector:
|
||||
{{- toYaml .Values.agentk8sglue.podTemplate.nodeSelector | nindent 8 }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.podTemplate.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
@@ -35,7 +35,9 @@ spec:
|
||||
- name: K8S_GLUE_QUEUE
|
||||
value: "{{.Values.agentk8sglue.queue}}"
|
||||
- name: K8S_GLUE_EXTRA_ARGS
|
||||
value: "--template-yaml /root/template/template.yaml"
|
||||
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml"
|
||||
- name: K8S_DEFAULT_NAMESPACE
|
||||
value: "{{ .Release.Namespace }}"
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
|
||||
@@ -11,5 +11,7 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.agentservices.storage.data.size | quote }}
|
||||
{{- if .Values.agentservices.storage.data.class -}}
|
||||
storageClassName: {{ .Values.agentservices.storage.data.class | quote }}
|
||||
{{- end -}}
|
||||
{{- end }}
|
||||
|
||||
@@ -10,4 +10,7 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.fileserver.storage.data.size | quote }}
|
||||
storageClassName: {{ .Values.fileserver.storage.data.class | quote }}
|
||||
{{- if .Values.fileserver.storage.data.class -}}
|
||||
storageClassName: {{ .Values.fileserver.storage.data.class | quote }}
|
||||
{{- end -}}
|
||||
|
||||
@@ -17,7 +17,7 @@ metadata:
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: default
|
||||
namespace: {{ .Release.namespace }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
|
||||
@@ -67,7 +67,7 @@ apiserver:
|
||||
image:
|
||||
repository: "allegroai/clearml"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "1.3.0"
|
||||
tag: "1.4.0"
|
||||
|
||||
extraEnvs: []
|
||||
|
||||
@@ -136,7 +136,7 @@ fileserver:
|
||||
image:
|
||||
repository: "allegroai/clearml"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "1.3.0"
|
||||
tag: "1.4.0"
|
||||
|
||||
extraEnvs: []
|
||||
|
||||
@@ -162,7 +162,7 @@ fileserver:
|
||||
|
||||
storage:
|
||||
data:
|
||||
class: "standard"
|
||||
class: ""
|
||||
size: 50Gi
|
||||
|
||||
webserver:
|
||||
@@ -181,7 +181,7 @@ webserver:
|
||||
image:
|
||||
repository: "allegroai/clearml"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "1.3.0"
|
||||
tag: "1.4.0"
|
||||
|
||||
podAnnotations: {}
|
||||
|
||||
@@ -252,12 +252,12 @@ agentservices:
|
||||
|
||||
storage:
|
||||
data:
|
||||
class: "standard"
|
||||
class: ""
|
||||
size: 50Gi
|
||||
|
||||
agentGroups:
|
||||
agent-group-cpu:
|
||||
enabled: true
|
||||
enabled: false
|
||||
name: agent-group-cpu
|
||||
replicaCount: 1
|
||||
updateStrategy: Recreate
|
||||
@@ -291,7 +291,7 @@ agentGroups:
|
||||
affinity: {}
|
||||
|
||||
agent-group-gpu:
|
||||
enabled: true
|
||||
enabled: false
|
||||
name: agent-group-gpu
|
||||
replicaCount: 0
|
||||
updateStrategy: Recreate
|
||||
@@ -328,14 +328,14 @@ agentGroups:
|
||||
# GPU autoscaling nodes.
|
||||
# https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue
|
||||
agentk8sglue:
|
||||
enabled: false
|
||||
enabled: true
|
||||
image:
|
||||
repository: "allegroai/clearml-agent-k8s"
|
||||
tag: "aws-latest-1.21"
|
||||
tag: "latest"
|
||||
serviceAccountName: default
|
||||
maxPods: 10
|
||||
defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04 # default docker image that is spawned as new pod
|
||||
queue: aws-instances # create this queue manually in the UI first for it to work
|
||||
queue: default
|
||||
id: k8s-agent
|
||||
podTemplate:
|
||||
volumes: []
|
||||
|
||||
Reference in New Issue
Block a user