mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-04-17 01:31:13 +00:00
Compare commits
6 Commits
clearml-2.
...
clearml-2.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aa761dd450 | ||
|
|
7ff2f94d1a | ||
|
|
618a269c97 | ||
|
|
3f215d2d90 | ||
|
|
03223fc1c1 | ||
|
|
898089b7fb |
@@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: clearml
|
||||
description: MLOps platform
|
||||
type: application
|
||||
version: "2.1.0"
|
||||
version: "2.4.0"
|
||||
appVersion: "1.1.1"
|
||||
home: https://clear.ml
|
||||
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# ClearML Ecosystem for Kubernetes
|
||||
|
||||
  
|
||||
  
|
||||
|
||||
MLOps platform
|
||||
|
||||
@@ -121,6 +121,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-cpu.clearmlGitPassword | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.clearmlGitUser | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.clearmlSecretKey | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.enabled | bool | `true` | |
|
||||
| agentGroups.agent-group-cpu.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| agentGroups.agent-group-cpu.image.repository | string | `"ubuntu"` | |
|
||||
| agentGroups.agent-group-cpu.image.tag | string | `"18.04"` | |
|
||||
@@ -131,6 +132,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-cpu.queues | string | `"default"` | |
|
||||
| agentGroups.agent-group-cpu.replicaCount | int | `1` | |
|
||||
| agentGroups.agent-group-cpu.tolerations | list | `[]` | |
|
||||
| agentGroups.agent-group-cpu.updateStrategy | string | `"Recreate"` | |
|
||||
| agentGroups.agent-group-gpu.affinity | object | `{}` | |
|
||||
| agentGroups.agent-group-gpu.agentVersion | string | `""` | |
|
||||
| agentGroups.agent-group-gpu.awsAccessKeyId | string | `nil` | |
|
||||
@@ -143,6 +145,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-gpu.clearmlGitPassword | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.clearmlGitUser | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.clearmlSecretKey | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.enabled | bool | `true` | |
|
||||
| agentGroups.agent-group-gpu.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| agentGroups.agent-group-gpu.image.repository | string | `"nvidia/cuda"` | |
|
||||
| agentGroups.agent-group-gpu.image.tag | string | `"11.0-base-ubuntu18.04"` | |
|
||||
@@ -153,6 +156,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-gpu.queues | string | `"default"` | |
|
||||
| agentGroups.agent-group-gpu.replicaCount | int | `0` | |
|
||||
| agentGroups.agent-group-gpu.tolerations | list | `[]` | |
|
||||
| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | |
|
||||
| agentservices.affinity | object | `{}` | |
|
||||
| agentservices.agentVersion | string | `""` | |
|
||||
| agentservices.awsAccessKeyId | string | `nil` | |
|
||||
@@ -178,6 +182,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentservices.storage.data.class | string | `"standard"` | |
|
||||
| agentservices.storage.data.size | string | `"50Gi"` | |
|
||||
| agentservices.tolerations | list | `[]` | |
|
||||
| apiserver.additionalConfigs | object | `{}` | |
|
||||
| apiserver.affinity | object | `{}` | |
|
||||
| apiserver.configDir | string | `"/opt/clearml/config"` | |
|
||||
| apiserver.extraEnvs | list | `[]` | |
|
||||
@@ -195,9 +200,6 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| apiserver.resources | object | `{}` | |
|
||||
| apiserver.service.port | int | `8008` | |
|
||||
| apiserver.service.type | string | `"NodePort"` | |
|
||||
| apiserver.storage.config.class | string | `"standard"` | |
|
||||
| apiserver.storage.config.size | string | `"1Gi"` | |
|
||||
| apiserver.storage.enableConfigVolume | bool | `false` | |
|
||||
| apiserver.tolerations | list | `[]` | |
|
||||
| clearml.defaultCompany | string | `"d1bd92a3b039400cbafc60a7a5b1e52b"` | |
|
||||
| elasticsearch.clusterHealthCheckParams | string | `"wait_for_status=yellow&timeout=1s"` | |
|
||||
|
||||
@@ -95,3 +95,48 @@ Create the name of the service account to use
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the App service to use
|
||||
*/}}
|
||||
{{- define "clearml.serviceApp" -}}
|
||||
{{- if .Values.ingress.enabled }}
|
||||
{{- if .Values.ingress.tls.secretName }}
|
||||
{{- printf "%s%s%s" "https://" .Values.ingress.hostPrefixApp .Values.ingress.host }}
|
||||
{{- else }}
|
||||
{{- printf "%s%s%s" "http://" .Values.ingress.hostPrefixApp .Values.ingress.host }}
|
||||
{{- end }}
|
||||
{{- else }}
|
||||
{{- printf "%s%s%s%s" "http://" (include "clearml.fullname" .) "-webserver:" (.Values.webserver.service.port | toString) }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the Api service to use
|
||||
*/}}
|
||||
{{- define "clearml.serviceApi" -}}
|
||||
{{- if .Values.ingress.enabled }}
|
||||
{{- if .Values.ingress.tls.secretName }}
|
||||
{{- printf "%s%s%s" "https://" .Values.ingress.hostPrefixApi .Values.ingress.host }}
|
||||
{{- else }}
|
||||
{{- printf "%s%s%s" "http://" .Values.ingress.hostPrefixApi .Values.ingress.host }}
|
||||
{{- end }}
|
||||
{{- else }}
|
||||
{{- printf "%s%s%s%s" "http://" (include "clearml.fullname" .) "-apiserver:" (.Values.apiserver.service.port | toString) }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the Files service to use
|
||||
*/}}
|
||||
{{- define "clearml.serviceFiles" -}}
|
||||
{{- if .Values.ingress.enabled }}
|
||||
{{- if .Values.ingress.tls.secretName }}
|
||||
{{- printf "%s%s%s" "https://" .Values.ingress.hostPrefixFiles .Values.ingress.host }}
|
||||
{{- else }}
|
||||
{{- printf "%s%s%s" "http://" .Values.ingress.hostPrefixFiles .Values.ingress.host }}
|
||||
{{- end }}
|
||||
{{- else }}
|
||||
{{- printf "%s%s%s%s" "http://" (include "clearml.fullname" .) "-fileserver:" (.Values.fileserver.service.port | toString) }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
13
charts/clearml/templates/configmap-apiserver.yaml
Normal file
13
charts/clearml/templates/configmap-apiserver.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
{{- if .Values.apiserver.additionalConfigs -}}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: "{{ include "clearml.fullname" . }}-apiserver-configmap"
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
data:
|
||||
{{- range $key, $val := .Values.apiserver.additionalConfigs }}
|
||||
{{ $key }}: |
|
||||
{{- $val | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
@@ -1,5 +1,6 @@
|
||||
{{- range $key, $value := .Values.agentGroups }}
|
||||
{{- with $value }}
|
||||
{{- if .enabled }}
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
@@ -9,6 +10,8 @@ metadata:
|
||||
{{- include "clearml.labels" $ | nindent 4 }}
|
||||
spec:
|
||||
replicas: {{ .replicaCount }}
|
||||
strategy:
|
||||
type: {{ .updateStrategy }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "clearml.selectorLabelsAgent" $ | nindent 6 }}
|
||||
@@ -38,7 +41,7 @@ spec:
|
||||
- -c
|
||||
- >
|
||||
set -x;
|
||||
while [ $(curl -sw '%{http_code}' "http://{{ include "clearml.fullname" $ }}-apiserver:{{ $.Values.apiserver.service.port }}/debug.ping" -o /dev/null) -ne 200 ] ; do
|
||||
while [ $(curl -sw '%{http_code}' "{{ include "clearml.serviceApi" $ }}/debug.ping" -o /dev/null) -ne 200 ] ; do
|
||||
echo "waiting for apiserver" ;
|
||||
sleep 5 ;
|
||||
done
|
||||
@@ -54,11 +57,11 @@ spec:
|
||||
{{ .nvidiaGpusPerAgent }}
|
||||
env:
|
||||
- name: CLEARML_API_HOST
|
||||
value: 'http://{{ include "clearml.fullname" $ }}-apiserver:{{ $.Values.apiserver.service.port }}'
|
||||
value: {{ include "clearml.serviceApi" $ }}
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: 'http://{{ include "clearml.fullname" $ }}-webserver:{{ $.Values.webserver.service.port }}'
|
||||
value: {{ include "clearml.serviceApp" $ }}
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: 'http://{{ include "clearml.fullname" $ }}-fileserver:{{ $.Values.fileserver.service.port }}'
|
||||
value: {{ include "clearml.serviceFiles" $ }}
|
||||
- name: CLEARML_AGENT_GIT_USER
|
||||
value: {{ .clearmlGitUser}}
|
||||
- name: CLEARML_AGENT_GIT_PASS
|
||||
@@ -91,6 +94,13 @@ spec:
|
||||
python3 -m pip install -U pip ;
|
||||
python3 -m pip install clearml-agent{{ .agentVersion}} ;
|
||||
CLEARML_AGENT_K8S_HOST_MOUNT=/root/.clearml:/root/.clearml clearml-agent daemon --queue {{ .queues}}"
|
||||
{{ if .clearmlConfig }}
|
||||
volumeMounts:
|
||||
- name: agent-clearml-conf-volume
|
||||
mountPath: /root/clearml.conf
|
||||
subPath: clearml.conf
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
{{- with .nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
@@ -105,3 +115,4 @@ spec:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
@@ -30,7 +30,7 @@ spec:
|
||||
- -c
|
||||
- >
|
||||
set -x;
|
||||
while [ $(curl -sw '%{http_code}' "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}/debug.ping" -o /dev/null) -ne 200 ] ; do
|
||||
while [ $(curl -sw '%{http_code}' "{{ include "clearml.serviceApi" $ }}/debug.ping" -o /dev/null) -ne 200 ] ; do
|
||||
echo "waiting for apiserver" ;
|
||||
sleep 5 ;
|
||||
done
|
||||
@@ -42,7 +42,7 @@ spec:
|
||||
- name: CLEARML_HOST_IP
|
||||
value: {{ .Values.agentservices.clearmlHostIp }}
|
||||
- name: CLEARML_API_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
|
||||
value: {{ include "clearml.serviceApi" $ }}
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: {{ .Values.agentservices.clearmlWebHost }}
|
||||
- name: CLEARML_FILES_HOST
|
||||
|
||||
@@ -18,12 +18,6 @@ spec:
|
||||
labels:
|
||||
{{- include "clearml.selectorLabelsApiServer" . | nindent 8 }}
|
||||
spec:
|
||||
{{- if .Values.apiserver.storage.enableConfigVolume }}
|
||||
volumes:
|
||||
- name: apiserver-config
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ include "clearml.fullname" . }}-apiserver-config
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: "{{ .Values.apiserver.image.repository }}:{{ .Values.apiserver.image.tag | default .Chart.AppVersion }}"
|
||||
@@ -101,13 +95,19 @@ spec:
|
||||
httpGet:
|
||||
path: /debug.ping
|
||||
port: 8008
|
||||
{{- if .Values.apiserver.storage.enableConfigVolume }}
|
||||
{{- if .Values.apiserver.additionalConfigs }}
|
||||
volumeMounts:
|
||||
- name: apiserver-config
|
||||
mountPath: /opt/clearml/config
|
||||
{{- end }}
|
||||
resources:
|
||||
{{- toYaml .Values.apiserver.resources | nindent 12 }}
|
||||
{{- if .Values.apiserver.additionalConfigs }}
|
||||
volumes:
|
||||
- name: apiserver-config
|
||||
configMap:
|
||||
name: "{{ include "clearml.fullname" . }}-apiserver-configmap"
|
||||
{{- end }}
|
||||
{{- with .Values.apiserver.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
|
||||
35
charts/clearml/templates/ingress-api.yaml
Normal file
35
charts/clearml/templates/ingress-api.yaml
Normal file
@@ -0,0 +1,35 @@
|
||||
{{- if .Values.ingress.enabled -}}
|
||||
{{- $fullName := include "clearml.fullname" . -}}
|
||||
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
{{- else -}}
|
||||
apiVersion: extensions/v1beta1
|
||||
{{- end }}
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ include "clearml.fullname" . }}-api
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if .Values.ingress.tls.secretName }}
|
||||
tls:
|
||||
- hosts:
|
||||
- "{{ .Values.ingress.hostPrefixApi }}{{ .Values.ingress.host }}"
|
||||
secretName: {{ .Values.ingress.tls.secretName }}
|
||||
{{- end }}
|
||||
rules:
|
||||
- host: "{{ .Values.ingress.hostPrefixApi }}{{ .Values.ingress.host }}"
|
||||
http:
|
||||
paths:
|
||||
- path: "/"
|
||||
pathType: Prefix
|
||||
backend:
|
||||
serviceName: {{ include "clearml.fullname" . }}-apiserver
|
||||
servicePort: {{ .Values.apiserver.service.port }}
|
||||
{{- end }}
|
||||
35
charts/clearml/templates/ingress-app.yaml
Normal file
35
charts/clearml/templates/ingress-app.yaml
Normal file
@@ -0,0 +1,35 @@
|
||||
{{- if .Values.ingress.enabled -}}
|
||||
{{- $fullName := include "clearml.fullname" . -}}
|
||||
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
{{- else -}}
|
||||
apiVersion: extensions/v1beta1
|
||||
{{- end }}
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ include "clearml.fullname" . }}-app
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if .Values.ingress.tls.secretName }}
|
||||
tls:
|
||||
- hosts:
|
||||
- "{{ .Values.ingress.hostPrefixApp }}{{ .Values.ingress.host }}"
|
||||
secretName: {{ .Values.ingress.tls.secretName }}
|
||||
{{- end }}
|
||||
rules:
|
||||
- host: "{{ .Values.ingress.hostPrefixApp }}{{ .Values.ingress.host }}"
|
||||
http:
|
||||
paths:
|
||||
- path: "/"
|
||||
pathType: Prefix
|
||||
backend:
|
||||
serviceName: {{ include "clearml.fullname" . }}-webserver
|
||||
servicePort: {{ .Values.webserver.service.port }}
|
||||
{{- end }}
|
||||
@@ -1,13 +1,15 @@
|
||||
{{- if .Values.ingress.enabled -}}
|
||||
{{- $fullName := include "clearml.fullname" . -}}
|
||||
{{- if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
{{- else -}}
|
||||
apiVersion: extensions/v1beta1
|
||||
{{- end }}
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ $fullName }}
|
||||
name: {{ include "clearml.fullname" . }}-files
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
@@ -18,28 +20,10 @@ spec:
|
||||
{{- if .Values.ingress.tls.secretName }}
|
||||
tls:
|
||||
- hosts:
|
||||
- "{{ .Values.ingress.hostPrefixAp }}{{ .Values.ingress.host }}"
|
||||
- "{{ .Values.ingress.hostPrefixFiles }}{{ .Values.ingress.host }}"
|
||||
- "{{ .Values.ingress.hostPrefixApi }}{{ .Values.ingress.host }}"
|
||||
secretName: {{ .Values.ingress.tls.secretName }}
|
||||
{{- end }}
|
||||
rules:
|
||||
- host: "{{ .Values.ingress.hostPrefixApp }}{{ .Values.ingress.host }}"
|
||||
http:
|
||||
paths:
|
||||
- path: "/"
|
||||
pathType: Prefix
|
||||
backend:
|
||||
serviceName: {{ include "clearml.fullname" . }}-webserver
|
||||
servicePort: {{ .Values.webserver.service.port }}
|
||||
- host: "{{ .Values.ingress.hostPrefixApi }}{{ .Values.ingress.host }}"
|
||||
http:
|
||||
paths:
|
||||
- path: "/"
|
||||
pathType: Prefix
|
||||
backend:
|
||||
serviceName: {{ include "clearml.fullname" . }}-apiserver
|
||||
servicePort: {{ .Values.apiserver.service.port }}
|
||||
- host: "{{ .Values.ingress.hostPrefixFiles }}{{ .Values.ingress.host }}"
|
||||
http:
|
||||
paths:
|
||||
@@ -1,15 +0,0 @@
|
||||
{{- if .Values.apiserver.storage.enableConfigVolume }}
|
||||
kind: PersistentVolumeClaim
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: {{ include "clearml.fullname" . }}-apiserver-config
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.apiserver.storage.config.size | quote }}
|
||||
storageClassName: {{ .Values.apiserver.storage.config.class | quote }}
|
||||
{{- end }}
|
||||
@@ -53,12 +53,16 @@ apiserver:
|
||||
|
||||
affinity: {}
|
||||
|
||||
# Optional: used in pvc-apiserver containing optional server configuration files
|
||||
storage:
|
||||
enableConfigVolume: false
|
||||
config:
|
||||
class: "standard"
|
||||
size: 1Gi
|
||||
additionalConfigs: {}
|
||||
# services.conf: |
|
||||
# tasks {
|
||||
# non_responsive_tasks_watchdog {
|
||||
# # In-progress tasks that haven't been updated for at least 'value' seconds will be stopped by the watchdog
|
||||
# threshold_sec: 21000
|
||||
# # Watchdog will sleep for this number of seconds after each cycle
|
||||
# watch_interval_sec: 900
|
||||
# }
|
||||
# }
|
||||
|
||||
fileserver:
|
||||
service:
|
||||
@@ -184,8 +188,10 @@ agentservices:
|
||||
|
||||
agentGroups:
|
||||
agent-group-cpu:
|
||||
enabled: true
|
||||
name: agent-group-cpu
|
||||
replicaCount: 1
|
||||
updateStrategy: Recreate
|
||||
nvidiaGpusPerAgent: 0
|
||||
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
|
||||
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")
|
||||
@@ -216,8 +222,10 @@ agentGroups:
|
||||
affinity: {}
|
||||
|
||||
agent-group-gpu:
|
||||
enabled: true
|
||||
name: agent-group-gpu
|
||||
replicaCount: 0
|
||||
updateStrategy: Recreate
|
||||
nvidiaGpusPerAgent: 1
|
||||
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
|
||||
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")
|
||||
|
||||
Reference in New Issue
Block a user