imported chart

This commit is contained in:
Valeriano Manassero 2021-07-07 08:18:37 +02:00
parent 15138b8983
commit 60079c3a61
24 changed files with 1280 additions and 0 deletions

View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

12
charts/clearml/Chart.lock Normal file
View File

@ -0,0 +1,12 @@
dependencies:
- name: redis
repository: https://charts.bitnami.com/bitnami
version: 10.9.0
- name: mongodb
repository: https://charts.bitnami.com/bitnami
version: 10.3.4
- name: elasticsearch
repository: https://helm.elastic.co
version: 7.10.1
digest: sha256:aefd3992b2ab085161e4cca35c6f73dd33f8d19272a9405b5ee4e8c2a0e79bba
generated: "2021-01-05T14:26:33.629164+01:00"

28
charts/clearml/Chart.yaml Normal file
View File

@ -0,0 +1,28 @@
apiVersion: v2
name: clearml
description: MLOps platform
type: application
version: "2.0.0-alpha1"
appVersion: "1.0.2"
home: https://allegro.ai
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
sources:
- https://github.com/allegroai/clearml-helm-charts
- https://github.com/allegroai/clearml
keywords:
- clearml
- "machine learning"
- mlops
dependencies:
- name: redis
version: "~10.9.0"
repository: "https://charts.bitnami.com/bitnami"
condition: redis.enabled
- name: mongodb
version: "~10.3.2"
repository: "https://charts.bitnami.com/bitnami"
condition: mongodb.enabled
- name: elasticsearch
version: "~7.10.1"
repository: "https://helm.elastic.co"
condition: elasticsearch.enabled

183
charts/clearml/README.md Normal file
View File

@ -0,0 +1,183 @@
# clearml
![Version: 2.0.0-alpha1](https://img.shields.io/badge/Version-2.0.0--alpha1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.0.2](https://img.shields.io/badge/AppVersion-1.0.2-informational?style=flat-square)
MLOps platform
**Homepage:** <https://allegro.ai>
## Source Code
* <https://github.com/allegroai/clearml-helm-charts>
* <https://github.com/allegroai/clearml>
## Requirements
| Repository | Name | Version |
|------------|------|---------|
| https://charts.bitnami.com/bitnami | mongodb | ~10.3.2 |
| https://charts.bitnami.com/bitnami | redis | ~10.9.0 |
| https://helm.elastic.co | elasticsearch | ~7.10.1 |
## Values
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| agentGroups.agent-group0.affinity | object | `{}` | |
| agentGroups.agent-group0.agentVersion | string | `""` | |
| agentGroups.agent-group0.awsAccessKeyId | string | `nil` | |
| agentGroups.agent-group0.awsDefaultRegion | string | `nil` | |
| agentGroups.agent-group0.awsSecretAccessKey | string | `nil` | |
| agentGroups.agent-group0.azureStorageAccount | string | `nil` | |
| agentGroups.agent-group0.azureStorageKey | string | `nil` | |
| agentGroups.agent-group0.clearmlAccessKey | string | `nil` | |
| agentGroups.agent-group0.clearmlConfig | string | `"sdk {\n}"` | |
| agentGroups.agent-group0.clearmlGitPassword | string | `nil` | |
| agentGroups.agent-group0.clearmlGitUser | string | `nil` | |
| agentGroups.agent-group0.clearmlSecretKey | string | `nil` | |
| agentGroups.agent-group0.image.pullPolicy | string | `"IfNotPresent"` | |
| agentGroups.agent-group0.image.repository | string | `"nvidia/cuda"` | |
| agentGroups.agent-group0.image.tag | string | `"11.0-base-ubuntu18.04"` | |
| agentGroups.agent-group0.name | string | `"agent-group0"` | |
| agentGroups.agent-group0.nodeSelector | object | `{}` | |
| agentGroups.agent-group0.nvidiaGpusPerAgent | int | `1` | |
| agentGroups.agent-group0.podAnnotations | object | `{}` | |
| agentGroups.agent-group0.queues | string | `"default"` | |
| agentGroups.agent-group0.replicaCount | int | `0` | |
| agentGroups.agent-group0.tolerations | list | `[]` | |
| agentservices.affinity | object | `{}` | |
| agentservices.agentVersion | string | `""` | |
| agentservices.awsAccessKeyId | string | `nil` | |
| agentservices.awsDefaultRegion | string | `nil` | |
| agentservices.awsSecretAccessKey | string | `nil` | |
| agentservices.azureStorageAccount | string | `nil` | |
| agentservices.azureStorageKey | string | `nil` | |
| agentservices.clearmlFilesHost | string | `nil` | |
| agentservices.clearmlGitPassword | string | `nil` | |
| agentservices.clearmlGitUser | string | `nil` | |
| agentservices.clearmlHostIp | string | `nil` | |
| agentservices.clearmlWebHost | string | `nil` | |
| agentservices.clearmlWorkerId | string | `"clearml-services"` | |
| agentservices.extraEnvs | list | `[]` | |
| agentservices.googleCredentials | string | `nil` | |
| agentservices.image.pullPolicy | string | `"IfNotPresent"` | |
| agentservices.image.repository | string | `"allegroai/clearml-agent-services"` | |
| agentservices.image.tag | string | `"latest"` | |
| agentservices.nodeSelector | object | `{}` | |
| agentservices.podAnnotations | object | `{}` | |
| agentservices.replicaCount | int | `1` | |
| agentservices.resources | object | `{}` | |
| agentservices.storage.data.class | string | `"standard"` | |
| agentservices.storage.data.size | string | `"50Gi"` | |
| agentservices.tolerations | list | `[]` | |
| apiserver.affinity | object | `{}` | |
| apiserver.configDir | string | `"/opt/clearml/config"` | |
| apiserver.extraEnvs | list | `[]` | |
| apiserver.image.pullPolicy | string | `"IfNotPresent"` | |
| apiserver.image.repository | string | `"allegroai/clearml"` | |
| apiserver.image.tag | string | `"1.0.2"` | |
| apiserver.livenessDelay | int | `60` | |
| apiserver.nodeSelector | object | `{}` | |
| apiserver.podAnnotations | object | `{}` | |
| apiserver.prepopulateArtifactsPath | string | `"/mnt/fileserver"` | |
| apiserver.prepopulateEnabled | string | `"true"` | |
| apiserver.prepopulateZipFiles | string | `"/opt/clearml/db-pre-populate"` | |
| apiserver.readinessDelay | int | `60` | |
| apiserver.replicaCount | int | `1` | |
| apiserver.resources | object | `{}` | |
| apiserver.service.port | int | `8008` | |
| apiserver.service.type | string | `"NodePort"` | |
| apiserver.storage.config.class | string | `"standard"` | |
| apiserver.storage.config.size | string | `"1Gi"` | |
| apiserver.storage.enableConfigVolume | bool | `false` | |
| apiserver.tolerations | list | `[]` | |
| clearml.defaultCompany | string | `"d1bd92a3b039400cbafc60a7a5b1e52b"` | |
| elasticsearch.clusterHealthCheckParams | string | `"wait_for_status=yellow&timeout=1s"` | |
| elasticsearch.clusterName | string | `"clearml-elastic"` | |
| elasticsearch.enabled | bool | `true` | |
| elasticsearch.esConfig."elasticsearch.yml" | string | `"xpack.security.enabled: false\n"` | |
| elasticsearch.esJavaOpts | string | `"-Xmx2g -Xms2g"` | |
| elasticsearch.extraEnvs[0].name | string | `"bootstrap.memory_lock"` | |
| elasticsearch.extraEnvs[0].value | string | `"false"` | |
| elasticsearch.extraEnvs[1].name | string | `"cluster.routing.allocation.node_initial_primaries_recoveries"` | |
| elasticsearch.extraEnvs[1].value | string | `"500"` | |
| elasticsearch.extraEnvs[2].name | string | `"cluster.routing.allocation.disk.watermark.low"` | |
| elasticsearch.extraEnvs[2].value | string | `"500mb"` | |
| elasticsearch.extraEnvs[3].name | string | `"cluster.routing.allocation.disk.watermark.high"` | |
| elasticsearch.extraEnvs[3].value | string | `"500mb"` | |
| elasticsearch.extraEnvs[4].name | string | `"cluster.routing.allocation.disk.watermark.flood_stage"` | |
| elasticsearch.extraEnvs[4].value | string | `"500mb"` | |
| elasticsearch.extraEnvs[5].name | string | `"http.compression_level"` | |
| elasticsearch.extraEnvs[5].value | string | `"7"` | |
| elasticsearch.extraEnvs[6].name | string | `"reindex.remote.whitelist"` | |
| elasticsearch.extraEnvs[6].value | string | `"*.*"` | |
| elasticsearch.extraEnvs[7].name | string | `"xpack.monitoring.enabled"` | |
| elasticsearch.extraEnvs[7].value | string | `"false"` | |
| elasticsearch.extraEnvs[8].name | string | `"xpack.security.enabled"` | |
| elasticsearch.extraEnvs[8].value | string | `"false"` | |
| elasticsearch.httpPort | int | `9200` | |
| elasticsearch.minimumMasterNodes | int | `1` | |
| elasticsearch.persistence.enabled | bool | `true` | |
| elasticsearch.replicas | int | `1` | |
| elasticsearch.resources.limits.memory | string | `"4Gi"` | |
| elasticsearch.resources.requests.memory | string | `"4Gi"` | |
| elasticsearch.roles.data | string | `"true"` | |
| elasticsearch.roles.ingest | string | `"true"` | |
| elasticsearch.roles.master | string | `"true"` | |
| elasticsearch.roles.remote_cluster_client | string | `"true"` | |
| elasticsearch.volumeClaimTemplate.accessModes[0] | string | `"ReadWriteOnce"` | |
| elasticsearch.volumeClaimTemplate.resources.requests.storage | string | `"50Gi"` | |
| fileserver.affinity | object | `{}` | |
| fileserver.extraEnvs | list | `[]` | |
| fileserver.image.pullPolicy | string | `"IfNotPresent"` | |
| fileserver.image.repository | string | `"allegroai/clearml"` | |
| fileserver.image.tag | string | `"1.0.2"` | |
| fileserver.nodeSelector | object | `{}` | |
| fileserver.podAnnotations | object | `{}` | |
| fileserver.replicaCount | int | `1` | |
| fileserver.resources | object | `{}` | |
| fileserver.service.port | int | `8081` | |
| fileserver.service.type | string | `"NodePort"` | |
| fileserver.storage.data.class | string | `"standard"` | |
| fileserver.storage.data.size | string | `"50Gi"` | |
| fileserver.tolerations | list | `[]` | |
| ingress.annotations | object | `{}` | |
| ingress.enabled | bool | `false` | |
| ingress.host | string | `""` | |
| ingress.name | string | `"clearml-server-ingress"` | |
| ingress.tls.secretName | string | `""` | |
| mongodb.architecture | string | `"standalone"` | |
| mongodb.auth.enabled | bool | `false` | |
| mongodb.enabled | bool | `true` | |
| mongodb.persistence.accessModes[0] | string | `"ReadWriteOnce"` | |
| mongodb.persistence.enabled | bool | `true` | |
| mongodb.persistence.size | string | `"50Gi"` | |
| mongodb.replicaCount | int | `1` | |
| mongodb.service.name | string | `"{{ .Release.Name }}-mongodb"` | |
| mongodb.service.port | int | `27017` | |
| mongodb.service.portName | string | `"mongo-service"` | |
| mongodb.service.type | string | `"ClusterIP"` | |
| redis.cluster.enabled | bool | `false` | |
| redis.databaseNumber | int | `0` | |
| redis.enabled | bool | `true` | |
| redis.master.name | string | `"{{ .Release.Name }}-redis-master"` | |
| redis.master.persistence.accessModes[0] | string | `"ReadWriteOnce"` | |
| redis.master.persistence.enabled | bool | `true` | |
| redis.master.persistence.size | string | `"5Gi"` | |
| redis.master.port | int | `6379` | |
| redis.usePassword | bool | `false` | |
| webserver.affinity | object | `{}` | |
| webserver.extraEnvs | list | `[]` | |
| webserver.image.pullPolicy | string | `"IfNotPresent"` | |
| webserver.image.repository | string | `"allegroai/clearml"` | |
| webserver.image.tag | string | `"1.0.2"` | |
| webserver.nodeSelector | object | `{}` | |
| webserver.podAnnotations | object | `{}` | |
| webserver.replicaCount | int | `1` | |
| webserver.resources | object | `{}` | |
| webserver.service.port | int | `80` | |
| webserver.service.type | string | `"NodePort"` | |
| webserver.tolerations | list | `[]` | |
----------------------------------------------
Autogenerated from chart metadata using [helm-docs v1.5.0](https://github.com/norwoodj/helm-docs/releases/v1.5.0)

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,22 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.webserver.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "clearml.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.webserver.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "clearml.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "clearml.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.webserver.service.port }}
{{- else if contains "ClusterIP" .Values.webserver.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "clearml.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,97 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "clearml.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "clearml.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "clearml.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "clearml.labels" -}}
helm.sh/chart: {{ include "clearml.chart" . }}
{{ include "clearml.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "clearml.selectorLabels" -}}
app.kubernetes.io/name: {{ include "clearml.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
Selector labels (apiserver)
*/}}
{{- define "clearml.selectorLabelsApiServer" -}}
app.kubernetes.io/name: {{ include "clearml.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}-apiserver
{{- end }}
Selector labels (fileserver)
*/}}
{{- define "clearml.selectorLabelsFileServer" -}}
app.kubernetes.io/name: {{ include "clearml.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}-fileserver
{{- end }}
Selector labels (webserver)
*/}}
{{- define "clearml.selectorLabelsWebServer" -}}
app.kubernetes.io/name: {{ include "clearml.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}-webserver
{{- end }}
Selector labels (agentservices)
*/}}
{{- define "clearml.selectorLabelsAgentServices" -}}
app.kubernetes.io/name: {{ include "clearml.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}-agentservices
{{- end }}
Selector labels (agent)
*/}}
{{- define "clearml.selectorLabelsAgent" -}}
app.kubernetes.io/name: {{ include "clearml.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}-agent
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "clearml.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "clearml.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,107 @@
{{- range $key, $value := .Values.agentGroups }}
{{- with $value }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "clearml.fullname" $ }}-{{ .name }}-agent
labels:
{{- include "clearml.labels" $ | nindent 4 }}
spec:
replicas: {{ .replicaCount }}
selector:
matchLabels:
{{- include "clearml.selectorLabelsAgent" $ | nindent 6 }}
template:
metadata:
{{- with .podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "clearml.selectorLabelsAgent" $ | nindent 8 }}
spec:
volumes:
{{ if .clearmlConfig }}
- name: agent-clearml-conf-volume
secret:
secretName: {{ .name }}-conf
items:
- key: clearml.conf
path: clearml.conf
{{ end }}
initContainers:
- name: init-agent-{{ .name }}
image: "{{ .image.repository }}:{{ .image.tag | default $.Chart.AppVersion }}"
command:
- /bin/sh
- -c
- >
set -x;
while [ $(curl -sw '%{http_code}' "http://{{ include "clearml.fullname" $ }}-apiserver:{{ $.Values.apiserver.service.port }}/debug.ping" -o /dev/null) -ne 200 ] ; do
echo "waiting for apiserver" ;
sleep 5 ;
done
containers:
- name: {{ $.Chart.Name }}-{{ .name }}
image: "{{ .image.repository }}:{{ .image.tag }}"
imagePullPolicy: {{ .image.pullPolicy }}
securityContext:
privileged: true
resources:
limits:
nvidia.com/gpu:
{{ .nvidiaGpusPerAgent }}
env:
- name: CLEARML_API_HOST
value: 'http://{{ include "clearml.fullname" $ }}-apiserver:{{ $.Values.apiserver.service.port }}'
- name: CLEARML_WEB_HOST
value: 'http://{{ include "clearml.fullname" $ }}-webserver:{{ $.Values.webserver.service.port }}'
- name: CLEARML_FILES_HOST
value: 'http://{{ include "clearml.fullname" $ }}-fileserver:{{ $.Values.fileserver.service.port }}'
- name: CLEARML_AGENT_GIT_USER
value: {{ .clearmlGitUser}}
- name: CLEARML_AGENT_GIT_PASS
value: {{ .clearmlGitPassword}}
- name: AWS_ACCESS_KEY_ID
value: {{ .awsAccessKeyId}}
- name: AWS_SECRET_ACCESS_KEY
value: {{ .awsSecretAccessKey}}
- name: AWS_DEFAULT_REGION
value: {{ .awsDefaultRegion}}
- name: AZURE_STORAGE_ACCOUNT
value: {{ .azureStorageAccount}}
- name: AZURE_STORAGE_KEY
value: {{ .azureStorageKey}}
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_key
- name: CLEARML_API_SECRET_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_secret
command:
- /bin/sh
- -c
- "apt-get update ;
apt-get install -y curl python3-pip git;
python3 -m pip install -U pip ;
python3 -m pip install clearml-agent{{ .agentVersion}} ;
CLEARML_AGENT_K8S_HOST_MOUNT=/root/.clearml:/root/.clearml clearml-agent daemon --queue {{ .queues}}"
{{- with .nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,100 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "clearml.fullname" . }}-agentservices
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.agentservices.replicaCount }}
selector:
matchLabels:
{{- include "clearml.selectorLabelsAgentServices" . | nindent 6 }}
template:
metadata:
{{- with .Values.agentservices.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "clearml.selectorLabelsAgentServices" . | nindent 8 }}
spec:
volumes:
- name: agentservices-data
persistentVolumeClaim:
claimName: {{ include "clearml.fullname" . }}-agentservices-data
initContainers:
- name: init-agentservices
image: "{{ .Values.agentservices.image.repository }}:{{ .Values.agentservices.image.tag | default .Chart.AppVersion }}"
command:
- /bin/sh
- -c
- >
set -x;
while [ $(curl -sw '%{http_code}' "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}/debug.ping" -o /dev/null) -ne 200 ] ; do
echo "waiting for apiserver" ;
sleep 5 ;
done
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.agentservices.image.repository }}:{{ .Values.agentservices.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.agentservices.image.pullPolicy }}
env:
- name: CLEARML_HOST_IP
value: {{ .Values.agentservices.clearmlHostIp }}
- name: CLEARML_API_HOST
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
- name: CLEARML_WEB_HOST
value: {{ .Values.agentservices.clearmlWebHost }}
- name: CLEARML_FILES_HOST
value: {{ .Values.agentservices.clearmlFilesHost }}
- name: CLEARML_AGENT_GIT_USER
value: {{ .Values.agentservices.clearmlGitUser }}
- name: CLEARML_AGENT_GIT_PASS
value: {{ .Values.agentservices.clearmlGitPassword }}
- name: CLEARML_AGENT_UPDATE_VERSION
value: {{ .Values.agentservices.agentVersion }}
- name: CLEARML_AGENT_DEFAULT_BASE_DOCKER
value: {{ .Values.agentservices.defaultBaseDocker }}
- name: AWS_ACCESS_KEY_ID
value: {{ .Values.agentservices.awsAccessKeyId }}
- name: AWS_SECRET_ACCESS_KEY
value: {{ .Values.agentservices.awsSecretAccessKey }}
- name: AWS_DEFAULT_REGION
value: {{ .Values.agentservices.awsDefaultRegion }}
- name: AZURE_STORAGE_ACCOUNT
value: {{ .Values.agentservices.azureStorageAccount }}
- name: AZURE_STORAGE_KEY
value: {{ .Values.agentservices.azureStorageKey }}
- name: GOOGLE_APPLICATION_CREDENTIALS
value: {{ .Values.agentservices.googleCredentials }}
- name: CLEARML_WORKER_ID
value: {{ .Values.agentservices.clearmlWorkerId }}
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_key
- name: CLEARML_API_SECRET_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_secret
args:
- agentservices
volumeMounts:
- name: agentservices-data
mountPath: /root/.clearml
resources:
{{- toYaml .Values.agentservices.resources | nindent 12 }}
{{- with .Values.agentservices.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.agentservices.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.agentservices.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -0,0 +1,122 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "clearml.fullname" . }}-apiserver
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.apiserver.replicaCount }}
selector:
matchLabels:
{{- include "clearml.selectorLabelsApiServer" . | nindent 6 }}
template:
metadata:
{{- with .Values.apiserver.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "clearml.selectorLabelsApiServer" . | nindent 8 }}
spec:
{{- if .Values.apiserver.storage.enableConfigVolume }}
volumes:
- name: apiserver-config
persistentVolumeClaim:
claimName: {{ include "clearml.fullname" . }}-apiserver-config
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.apiserver.image.repository }}:{{ .Values.apiserver.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.apiserver.image.pullPolicy }}
ports:
- name: http
containerPort: 8008
protocol: TCP
env:
- name: CLEARML_ELASTIC_SERVICE_HOST
value: "{{ .Values.elasticsearch.clusterName }}-master"
- name: CLEARML_ELASTIC_SERVICE_PORT
value: "{{ .Values.elasticsearch.httpPort }}"
- name: CLEARML_MONGODB_SERVICE_HOST
value: "{{ tpl .Values.mongodb.service.name . }}"
- name: CLEARML_MONGODB_SERVICE_PORT
value: "{{ .Values.mongodb.service.port }}"
- name: CLEARML_REDIS_SERVICE_HOST
value: "{{ tpl .Values.redis.master.name . }}"
- name: CLEARML_REDIS_SERVICE_PORT
value: "{{ .Values.redis.master.port }}"
- name: CLEARML__APISERVER__PRE_POPULATE__ENABLED
value: "{{ .Values.apiserver.prepopulateEnabled }}"
- name: CLEARML__APISERVER__PRE_POPULATE__ZIP_FILES
value: "{{ .Values.apiserver.prepopulateZipFiles }}"
- name: CLEARML_SERVER_DEPLOYMENT_TYPE
value: "helm-cloud"
- name: CLEARML_CONFIG_DIR
value: /opt/clearml/config
- name: CLEARML__APISERVER__DEFAULT_COMPANY
value: {{ .Values.clearml.defaultCompany }}
- name: CLEARML__SECURE__HTTP__SESSION_SECRET__APISERVER
valueFrom:
secretKeyRef:
name: clearml-conf
key: http_session
- name: CLEARML__SECURE__AUTH__TOKEN_SECRET
valueFrom:
secretKeyRef:
name: clearml-conf
key: auth_token
- name: CLEARML__SECURE__CREDENTIALS__APISERVER__USER_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: apiserver_key
- name: CLEARML__SECURE__CREDENTIALS__APISERVER__USER_SECRET
valueFrom:
secretKeyRef:
name: clearml-conf
key: apiserver_secret
- name: CLEARML__SECURE__CREDENTIALS__TESTS__USER_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_key
- name: CLEARML__SECURE__CREDENTIALS__TESTS__USER_SECRET
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_secret
{{- if .Values.apiserver.extraEnvs }}
{{ toYaml .Values.apiserver.extraEnvs | nindent 10 }}
{{- end }}
args:
- apiserver
livenessProbe:
initialDelaySeconds: {{ .Values.apiserver.livenessDelay }}
httpGet:
path: /debug.ping
port: 8008
readinessProbe:
initialDelaySeconds: {{ .Values.apiserver.readinessDelay }}
failureThreshold: 8
httpGet:
path: /debug.ping
port: 8008
{{- if .Values.apiserver.storage.enableConfigVolume }}
volumeMounts:
- name: apiserver-config
mountPath: /opt/clearml/config
{{- end }}
resources:
{{- toYaml .Values.apiserver.resources | nindent 12 }}
{{- with .Values.apiserver.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.apiserver.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.apiserver.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -0,0 +1,69 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "clearml.fullname" . }}-fileserver
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.fileserver.replicaCount }}
selector:
matchLabels:
{{- include "clearml.selectorLabelsFileServer" . | nindent 6 }}
template:
metadata:
{{- with .Values.fileserver.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "clearml.selectorLabelsFileServer" . | nindent 8 }}
spec:
volumes:
- name: fileserver-data
persistentVolumeClaim:
claimName: {{ include "clearml.fullname" . }}-fileserver-data
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.fileserver.image.repository }}:{{ .Values.fileserver.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.fileserver.image.pullPolicy }}
ports:
- name: http
containerPort: 8081
protocol: TCP
env:
- name: CLEARML_CONFIG_DIR
value: /opt/clearml/config
{{- if .Values.fileserver.extraEnvs }}
{{ toYaml .Values.fileserver.extraEnvs | nindent 10 }}
{{- end }}
args:
- fileserver
livenessProbe:
exec:
command:
- curl
- -X OPTIONS
- http://localhost:8081/
readinessProbe:
exec:
command:
- curl
- -X OPTIONS
- http://localhost:8081/
volumeMounts:
- name: fileserver-data
mountPath: /mnt/fileserver
resources:
{{- toYaml .Values.fileserver.resources | nindent 12 }}
{{- with .Values.fileserver.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.fileserver.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.fileserver.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -0,0 +1,64 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "clearml.fullname" . }}-webserver
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.webserver.replicaCount }}
selector:
matchLabels:
{{- include "clearml.selectorLabelsWebServer" . | nindent 6 }}
template:
metadata:
{{- with .Values.webserver.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "clearml.selectorLabelsWebServer" . | nindent 8 }}
spec:
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.webserver.image.pullPolicy }}
ports:
- name: http
containerPort: 80
protocol: TCP
livenessProbe:
exec:
command:
- curl
- -X OPTIONS
- http://0.0.0.0:80/
readinessProbe:
exec:
command:
- curl
- -X OPTIONS
- http://0.0.0.0:80/
env:
- name: NGINX_APISERVER_ADDRESS
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
- name: NGINX_FILESERVER_ADDRESS
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
{{- if .Values.webserver.extraEnvs }}
{{ toYaml .Values.webserver.extraEnvs | nindent 10 }}
{{- end }}
args:
- webserver
resources:
{{- toYaml .Values.webserver.resources | nindent 12 }}
{{- with .Values.webserver.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.webserver.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.webserver.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -0,0 +1,48 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "clearml.fullname" . -}}
{{- if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}
labels:
{{- include "clearml.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.ingress.tls.secretName }}
tls:
- hosts:
- "app.{{ .Values.ingress.host }}"
- "files.{{ .Values.ingress.host }}"
- "api.{{ .Values.ingress.host }}"
secretName: {{ .Values.ingress.tls.secretName }}
{{- end }}
rules:
- host: "app.{{ .Values.ingress.host }}"
http:
paths:
- path: "/*"
backend:
serviceName: {{ include "clearml.fullname" . }}-webserver
servicePort: {{ .Values.webserver.service.port }}
- host: "api.{{ .Values.ingress.host }}"
http:
paths:
- path: "/*"
backend:
serviceName: {{ include "clearml.fullname" . }}-apiserver
servicePort: {{ .Values.apiserver.service.port }}
- host: "files.{{ .Values.ingress.host }}"
http:
paths:
- path: "/*"
backend:
serviceName: {{ include "clearml.fullname" . }}-fileserver
servicePort: {{ .Values.fileserver.service.port }}
{{- end }}

View File

@ -0,0 +1,13 @@
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: {{ include "clearml.fullname" . }}-agentservices-data
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: {{ .Values.agentservices.storage.data.size | quote }}
storageClassName: {{ .Values.agentservices.storage.data.class | quote }}

View File

@ -0,0 +1,15 @@
{{- if .Values.apiserver.storage.enableConfigVolume }}
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: {{ include "clearml.fullname" . }}-apiserver-config
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: {{ .Values.apiserver.storage.config.size | quote }}
storageClassName: {{ .Values.apiserver.storage.config.class | quote }}
{{- end }}

View File

@ -0,0 +1,13 @@
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: {{ include "clearml.fullname" . }}-fileserver-data
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: {{ .Values.fileserver.storage.data.size | quote }}
storageClassName: {{ .Values.fileserver.storage.data.class | quote }}

View File

@ -0,0 +1,13 @@
{{- range $key, $value := .Values.agentGroups }}
{{- with $value }}
---
{{ if .clearmlConfig }}
apiVersion: v1
kind: Secret
metadata:
name: {{ .name }}-conf
data:
clearml.conf: {{ .clearmlConfig | b64enc }}
{{ end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,11 @@
apiVersion: v1
kind: Secret
metadata:
name: clearml-conf
data:
apiserver_key: NTQ0MkYzNDQzTUpNT1JXWkEzWkg=
apiserver_secret: QnhhcElSbzlaSU5pOHgyNUNSeHo4V2RtcjJwUWp6dVdWQjRQTkFTWnFDdFR5V2dXVlE=
http_session: OVR3MjBSYmhKMWJMQmlIRU9XWHZocGxLR1ViVGdMekF0d0ZOMm9MUXZXd1MwdVJwRDU=
auth_token: MVNDZjBvdjNObTU0NFRkMm9aMGdYU3JzTng1WGhNV2RWbEt6MXRPZ2N4MTU4YkQ1UlY=
tests_user_key: RU5QMzlFUU00U0xBQ0dENUZYQjc=
tests_user_secret: bFBjbTBpbWJjQlo4bXdnTzd0cGFkdXRpUzNnbkpEMDV4OWo3YWZ3WFBTMzVJS2JwaVE=

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "clearml.fullname" . }}-apiserver
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
type: {{ .Values.apiserver.service.type }}
ports:
- port: {{ .Values.apiserver.service.port }}
targetPort: {{ .Values.apiserver.service.port }}
nodePort: 30008
protocol: TCP
selector:
{{- include "clearml.selectorLabelsApiServer" . | nindent 4 }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "clearml.fullname" . }}-fileserver
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
type: {{ .Values.fileserver.service.type }}
ports:
- port: {{ .Values.fileserver.service.port }}
targetPort: {{ .Values.fileserver.service.port }}
nodePort: 30081
protocol: TCP
selector:
{{- include "clearml.selectorLabelsFileServer" . | nindent 4 }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "clearml.fullname" . }}-webserver
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
type: {{ .Values.webserver.service.type }}
ports:
- port: {{ .Values.webserver.service.port }}
targetPort: {{ .Values.webserver.service.port }}
nodePort: 30080
protocol: TCP
selector:
{{- include "clearml.selectorLabelsWebServer" . | nindent 4 }}

295
charts/clearml/values.yaml Normal file
View File

@ -0,0 +1,295 @@
clearml:
defaultCompany: "d1bd92a3b039400cbafc60a7a5b1e52b"
ingress:
enabled: false
name: clearml-server-ingress
annotations: {}
host: ""
tls:
secretName: ""
apiserver:
prepopulateEnabled: "true"
prepopulateZipFiles: "/opt/clearml/db-pre-populate"
prepopulateArtifactsPath: "/mnt/fileserver"
configDir: /opt/clearml/config
service:
type: NodePort
port: 8008
livenessDelay: 60
readinessDelay: 60
replicaCount: 1
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.0.2"
extraEnvs: []
podAnnotations: {}
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
nodeSelector: {}
tolerations: []
affinity: {}
# Optional: used in pvc-apiserver containing optional server configuration files
storage:
enableConfigVolume: false
config:
class: "standard"
size: 1Gi
fileserver:
service:
type: NodePort
port: 8081
replicaCount: 1
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.0.2"
extraEnvs: []
podAnnotations: {}
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
nodeSelector: {}
tolerations: []
affinity: {}
storage:
data:
class: "standard"
size: 50Gi
webserver:
extraEnvs: []
service:
type: NodePort
port: 80
replicaCount: 1
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.0.2"
podAnnotations: {}
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
nodeSelector: {}
tolerations: []
affinity: {}
agentservices:
clearmlHostIp: null
agentVersion: ""
clearmlWebHost: null
clearmlFilesHost: null
clearmlGitUser: null
clearmlGitPassword: null
awsAccessKeyId: null
awsSecretAccessKey: null
awsDefaultRegion: null
azureStorageAccount: null
azureStorageKey: null
googleCredentials: null
clearmlWorkerId: "clearml-services"
replicaCount: 1
image:
repository: "allegroai/clearml-agent-services"
pullPolicy: IfNotPresent
tag: "latest"
extraEnvs: []
podAnnotations: {}
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
nodeSelector: {}
tolerations: []
affinity: {}
storage:
data:
class: "standard"
size: 50Gi
agentGroups:
agent-group0:
name: agent-group0
replicaCount: 0
nvidiaGpusPerAgent: 1
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")
clearmlGitUser: null
clearmlGitPassword: null
clearmlAccessKey: null
clearmlSecretKey: null
awsAccessKeyId: null
awsSecretAccessKey: null
awsDefaultRegion: null
azureStorageAccount: null
azureStorageKey: null
clearmlConfig: |-
sdk {
}
image:
repository: "nvidia/cuda"
pullPolicy: IfNotPresent
tag: "11.0-base-ubuntu18.04"
podAnnotations: {}
nodeSelector: {}
tolerations: []
affinity: {}
redis: # configuration from https://github.com/bitnami/charts/blob/master/bitnami/redis/values.yaml
enabled: true
usePassword: false
databaseNumber: 0
master:
name: "{{ .Release.Name }}-redis-master"
port: 6379
persistence:
enabled: true
accessModes:
- ReadWriteOnce
size: 5Gi
cluster:
enabled: false
mongodb: # configuration from https://github.com/bitnami/charts/blob/master/bitnami/mongodb/values.yaml
enabled: true
architecture: standalone
auth:
enabled: false
replicaCount: 1
persistence:
enabled: true
accessModes:
- ReadWriteOnce
size: 50Gi
service:
name: "{{ .Release.Name }}-mongodb"
type: ClusterIP
port: 27017
portName: mongo-service
elasticsearch: # configuration from https://github.com/elastic/helm-charts/blob/7.10/elasticsearch/values.yaml
enabled: true
httpPort: 9200
roles:
master: "true"
ingest: "true"
data: "true"
remote_cluster_client: "true"
replicas: 1
# Readiness probe hack for a single-node cluster (where status will never be green). Should be removed if using replicas > 1
clusterHealthCheckParams: "wait_for_status=yellow&timeout=1s"
minimumMasterNodes: 1
clusterName: clearml-elastic
esJavaOpts: "-Xmx2g -Xms2g"
extraEnvs:
- name: bootstrap.memory_lock
value: "false"
- name: cluster.routing.allocation.node_initial_primaries_recoveries
value: "500"
- name: cluster.routing.allocation.disk.watermark.low
value: 500mb
- name: cluster.routing.allocation.disk.watermark.high
value: 500mb
- name: cluster.routing.allocation.disk.watermark.flood_stage
value: 500mb
- name: http.compression_level
value: "7"
- name: reindex.remote.whitelist
value: '*.*'
- name: xpack.monitoring.enabled
value: "false"
- name: xpack.security.enabled
value: "false"
resources:
requests:
memory: "4Gi"
limits:
memory: "4Gi"
persistence:
enabled: true
volumeClaimTemplate:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 50Gi
esConfig:
elasticsearch.yml: |
xpack.security.enabled: false