Compare commits

...

10 Commits

Author SHA1 Message Date
Valeriano Manassero
82ad17860d New ingress style (#27)
* new ingress style

* bump up version

* hostName fix

* helm-docs update
2021-09-16 08:51:07 +02:00
Valeriano Manassero
aa761dd450 Agent enable switch (#26)
* enable/disable switch

* bump up chart
2021-09-15 08:13:01 +02:00
Valeriano Manassero
7ff2f94d1a Apiserver configmap (#25)
* metadata name fix

* use toString

* use configmap for apiserver configs

* bump up version

* indentation fix

* fix trailing whitespaces
2021-09-14 15:43:10 +02:00
Valeriano Manassero
618a269c97 Fix service url generation (#21)
* service url generation functions

* use generation functions

* bump up version
2021-08-26 10:58:06 +02:00
Valeriano Manassero
3f215d2d90 Use many ingresses (#20)
* use many ingresses

* bump up version
2021-08-25 14:49:43 +02:00
Valeriano Manassero
03223fc1c1 Use Recreate as strategy (#19)
* use Recreate as strategybump up version

* fix strategy indentation and position

* updatesStrategy configurable

* updateStrategy parameter

* use 2.2.0 instead of patch release
2021-08-17 14:59:13 +02:00
Valeriano Manassero
898089b7fb Fix agent clearml conf (#18)
* fix agent mount clearml.conf

* bump up version
2021-08-14 12:00:23 +02:00
Valeriano Manassero
732bb970aa Configurable prefix ingress (#17)
* ingress configurable prefixes

* chart version bump up

* fix version number
2021-08-12 12:02:26 +03:00
Valeriano Manassero
91d45281fa bump up app version to 1.1.1 (#16) 2021-08-06 14:18:53 +02:00
Valeriano Manassero
28b6e9f4e4 Remove badges from root README since we have them in chart one (#14) 2021-07-27 16:18:02 +02:00
14 changed files with 229 additions and 105 deletions

View File

@@ -1,12 +1,8 @@
# ClearML Helm Charts Library for Kubernetes
Helm charts provided by [Allegro AI](https://clear.ml), ready to launch on Kubernetes using [Kubernetes Helm](https://github.com/helm/helm).
## Auto-Magical Experiment Manager & Version Control for AI
[![GitHub license](https://img.shields.io/badge/license-SSPL-green.svg)](https://img.shields.io/badge/license-SSPL-green.svg)
[![GitHub version](https://img.shields.io/github/release-pre/allegroai/clearml-server.svg)](https://img.shields.io/github/release-pre/allegroai/clearml-server.svg)
[![PyPI status](https://img.shields.io/badge/status-beta-yellow.svg)](https://img.shields.io/badge/status-beta-yellow.svg)
Helm charts provided by [Allegro AI](https://clear.ml), ready to launch on Kubernetes using [Kubernetes Helm](https://github.com/helm/helm).
## Introduction
@@ -84,4 +80,4 @@ Additionally, you can always find us at *clearml@allegro.ai*
**PRs are always welcomed** :heart: See more details in the ClearML [Guidelines for Contributing](https://github.com/allegroai/clearml-helm-charts/blob/main/CONTRIBUTING.md).
_May the force (and the goddess of learning rates) be with you!_
_May the force (and the goddess of learning rates) be with you!_

View File

@@ -2,8 +2,8 @@ apiVersion: v2
name: clearml
description: MLOps platform
type: application
version: "2.0.0"
appVersion: "1.1.0"
version: "3.0.0"
appVersion: "1.1.1"
home: https://clear.ml
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
sources:

View File

@@ -1,6 +1,6 @@
# ClearML Ecosystem for Kubernetes
![Version: 2.0.0](https://img.shields.io/badge/Version-2.0.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.1.0](https://img.shields.io/badge/AppVersion-1.1.0-informational?style=flat-square)
![Version: 3.0.0](https://img.shields.io/badge/Version-3.0.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.1.1](https://img.shields.io/badge/AppVersion-1.1.1-informational?style=flat-square)
MLOps platform
@@ -121,6 +121,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| agentGroups.agent-group-cpu.clearmlGitPassword | string | `nil` | |
| agentGroups.agent-group-cpu.clearmlGitUser | string | `nil` | |
| agentGroups.agent-group-cpu.clearmlSecretKey | string | `nil` | |
| agentGroups.agent-group-cpu.enabled | bool | `true` | |
| agentGroups.agent-group-cpu.image.pullPolicy | string | `"IfNotPresent"` | |
| agentGroups.agent-group-cpu.image.repository | string | `"ubuntu"` | |
| agentGroups.agent-group-cpu.image.tag | string | `"18.04"` | |
@@ -131,6 +132,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| agentGroups.agent-group-cpu.queues | string | `"default"` | |
| agentGroups.agent-group-cpu.replicaCount | int | `1` | |
| agentGroups.agent-group-cpu.tolerations | list | `[]` | |
| agentGroups.agent-group-cpu.updateStrategy | string | `"Recreate"` | |
| agentGroups.agent-group-gpu.affinity | object | `{}` | |
| agentGroups.agent-group-gpu.agentVersion | string | `""` | |
| agentGroups.agent-group-gpu.awsAccessKeyId | string | `nil` | |
@@ -143,6 +145,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| agentGroups.agent-group-gpu.clearmlGitPassword | string | `nil` | |
| agentGroups.agent-group-gpu.clearmlGitUser | string | `nil` | |
| agentGroups.agent-group-gpu.clearmlSecretKey | string | `nil` | |
| agentGroups.agent-group-gpu.enabled | bool | `true` | |
| agentGroups.agent-group-gpu.image.pullPolicy | string | `"IfNotPresent"` | |
| agentGroups.agent-group-gpu.image.repository | string | `"nvidia/cuda"` | |
| agentGroups.agent-group-gpu.image.tag | string | `"11.0-base-ubuntu18.04"` | |
@@ -153,6 +156,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| agentGroups.agent-group-gpu.queues | string | `"default"` | |
| agentGroups.agent-group-gpu.replicaCount | int | `0` | |
| agentGroups.agent-group-gpu.tolerations | list | `[]` | |
| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | |
| agentservices.affinity | object | `{}` | |
| agentservices.agentVersion | string | `""` | |
| agentservices.awsAccessKeyId | string | `nil` | |
@@ -178,12 +182,13 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| agentservices.storage.data.class | string | `"standard"` | |
| agentservices.storage.data.size | string | `"50Gi"` | |
| agentservices.tolerations | list | `[]` | |
| apiserver.additionalConfigs | object | `{}` | |
| apiserver.affinity | object | `{}` | |
| apiserver.configDir | string | `"/opt/clearml/config"` | |
| apiserver.extraEnvs | list | `[]` | |
| apiserver.image.pullPolicy | string | `"IfNotPresent"` | |
| apiserver.image.repository | string | `"allegroai/clearml"` | |
| apiserver.image.tag | string | `"1.1.0"` | |
| apiserver.image.tag | string | `"1.1.1"` | |
| apiserver.livenessDelay | int | `60` | |
| apiserver.nodeSelector | object | `{}` | |
| apiserver.podAnnotations | object | `{}` | |
@@ -195,9 +200,6 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| apiserver.resources | object | `{}` | |
| apiserver.service.port | int | `8008` | |
| apiserver.service.type | string | `"NodePort"` | |
| apiserver.storage.config.class | string | `"standard"` | |
| apiserver.storage.config.size | string | `"1Gi"` | |
| apiserver.storage.enableConfigVolume | bool | `false` | |
| apiserver.tolerations | list | `[]` | |
| clearml.defaultCompany | string | `"d1bd92a3b039400cbafc60a7a5b1e52b"` | |
| elasticsearch.clusterHealthCheckParams | string | `"wait_for_status=yellow&timeout=1s"` | |
@@ -239,7 +241,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| fileserver.extraEnvs | list | `[]` | |
| fileserver.image.pullPolicy | string | `"IfNotPresent"` | |
| fileserver.image.repository | string | `"allegroai/clearml"` | |
| fileserver.image.tag | string | `"1.1.0"` | |
| fileserver.image.tag | string | `"1.1.1"` | |
| fileserver.nodeSelector | object | `{}` | |
| fileserver.podAnnotations | object | `{}` | |
| fileserver.replicaCount | int | `1` | |
@@ -250,10 +252,14 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| fileserver.storage.data.size | string | `"50Gi"` | |
| fileserver.tolerations | list | `[]` | |
| ingress.annotations | object | `{}` | |
| ingress.api.hostName | string | `"api.clearml.127-0-0-1.nip.io"` | |
| ingress.api.tlsSecretName | string | `""` | |
| ingress.app.hostName | string | `"app.clearml.127-0-0-1.nip.io"` | |
| ingress.app.tlsSecretName | string | `""` | |
| ingress.enabled | bool | `false` | |
| ingress.host | string | `""` | |
| ingress.files.hostName | string | `"files.clearml.127-0-0-1.nip.io"` | |
| ingress.files.tlsSecretName | string | `""` | |
| ingress.name | string | `"clearml-server-ingress"` | |
| ingress.tls.secretName | string | `""` | |
| mongodb.architecture | string | `"standalone"` | |
| mongodb.auth.enabled | bool | `false` | |
| mongodb.enabled | bool | `true` | |
@@ -278,7 +284,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| webserver.extraEnvs | list | `[]` | |
| webserver.image.pullPolicy | string | `"IfNotPresent"` | |
| webserver.image.repository | string | `"allegroai/clearml"` | |
| webserver.image.tag | string | `"1.1.0"` | |
| webserver.image.tag | string | `"1.1.1"` | |
| webserver.nodeSelector | object | `{}` | |
| webserver.podAnnotations | object | `{}` | |
| webserver.replicaCount | int | `1` | |

View File

@@ -95,3 +95,48 @@ Create the name of the service account to use
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
{{/*
Create the name of the App service to use
*/}}
{{- define "clearml.serviceApp" -}}
{{- if .Values.ingress.enabled }}
{{- if .Values.ingress.tls.secretName }}
{{- printf "%s%s%s" "https://" .Values.ingress.hostPrefixApp .Values.ingress.host }}
{{- else }}
{{- printf "%s%s%s" "http://" .Values.ingress.hostPrefixApp .Values.ingress.host }}
{{- end }}
{{- else }}
{{- printf "%s%s%s%s" "http://" (include "clearml.fullname" .) "-webserver:" (.Values.webserver.service.port | toString) }}
{{- end }}
{{- end }}
{{/*
Create the name of the Api service to use
*/}}
{{- define "clearml.serviceApi" -}}
{{- if .Values.ingress.enabled }}
{{- if .Values.ingress.tls.secretName }}
{{- printf "%s%s%s" "https://" .Values.ingress.hostPrefixApi .Values.ingress.host }}
{{- else }}
{{- printf "%s%s%s" "http://" .Values.ingress.hostPrefixApi .Values.ingress.host }}
{{- end }}
{{- else }}
{{- printf "%s%s%s%s" "http://" (include "clearml.fullname" .) "-apiserver:" (.Values.apiserver.service.port | toString) }}
{{- end }}
{{- end }}
{{/*
Create the name of the Files service to use
*/}}
{{- define "clearml.serviceFiles" -}}
{{- if .Values.ingress.enabled }}
{{- if .Values.ingress.tls.secretName }}
{{- printf "%s%s%s" "https://" .Values.ingress.hostPrefixFiles .Values.ingress.host }}
{{- else }}
{{- printf "%s%s%s" "http://" .Values.ingress.hostPrefixFiles .Values.ingress.host }}
{{- end }}
{{- else }}
{{- printf "%s%s%s%s" "http://" (include "clearml.fullname" .) "-fileserver:" (.Values.fileserver.service.port | toString) }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,13 @@
{{- if .Values.apiserver.additionalConfigs -}}
apiVersion: v1
kind: ConfigMap
metadata:
name: "{{ include "clearml.fullname" . }}-apiserver-configmap"
labels:
{{- include "clearml.labels" . | nindent 4 }}
data:
{{- range $key, $val := .Values.apiserver.additionalConfigs }}
{{ $key }}: |
{{- $val | nindent 4 }}
{{- end }}
{{- end -}}

View File

@@ -1,5 +1,6 @@
{{- range $key, $value := .Values.agentGroups }}
{{- with $value }}
{{- if .enabled }}
---
apiVersion: apps/v1
kind: Deployment
@@ -9,6 +10,8 @@ metadata:
{{- include "clearml.labels" $ | nindent 4 }}
spec:
replicas: {{ .replicaCount }}
strategy:
type: {{ .updateStrategy }}
selector:
matchLabels:
{{- include "clearml.selectorLabelsAgent" $ | nindent 6 }}
@@ -38,7 +41,7 @@ spec:
- -c
- >
set -x;
while [ $(curl -sw '%{http_code}' "http://{{ include "clearml.fullname" $ }}-apiserver:{{ $.Values.apiserver.service.port }}/debug.ping" -o /dev/null) -ne 200 ] ; do
while [ $(curl -sw '%{http_code}' "{{ include "clearml.serviceApi" $ }}/debug.ping" -o /dev/null) -ne 200 ] ; do
echo "waiting for apiserver" ;
sleep 5 ;
done
@@ -54,11 +57,11 @@ spec:
{{ .nvidiaGpusPerAgent }}
env:
- name: CLEARML_API_HOST
value: 'http://{{ include "clearml.fullname" $ }}-apiserver:{{ $.Values.apiserver.service.port }}'
value: {{ include "clearml.serviceApi" $ }}
- name: CLEARML_WEB_HOST
value: 'http://{{ include "clearml.fullname" $ }}-webserver:{{ $.Values.webserver.service.port }}'
value: {{ include "clearml.serviceApp" $ }}
- name: CLEARML_FILES_HOST
value: 'http://{{ include "clearml.fullname" $ }}-fileserver:{{ $.Values.fileserver.service.port }}'
value: {{ include "clearml.serviceFiles" $ }}
- name: CLEARML_AGENT_GIT_USER
value: {{ .clearmlGitUser}}
- name: CLEARML_AGENT_GIT_PASS
@@ -91,6 +94,13 @@ spec:
python3 -m pip install -U pip ;
python3 -m pip install clearml-agent{{ .agentVersion}} ;
CLEARML_AGENT_K8S_HOST_MOUNT=/root/.clearml:/root/.clearml clearml-agent daemon --queue {{ .queues}}"
{{ if .clearmlConfig }}
volumeMounts:
- name: agent-clearml-conf-volume
mountPath: /root/clearml.conf
subPath: clearml.conf
readOnly: true
{{- end }}
{{- with .nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
@@ -105,3 +115,4 @@ spec:
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@@ -30,7 +30,7 @@ spec:
- -c
- >
set -x;
while [ $(curl -sw '%{http_code}' "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}/debug.ping" -o /dev/null) -ne 200 ] ; do
while [ $(curl -sw '%{http_code}' "{{ include "clearml.serviceApi" $ }}/debug.ping" -o /dev/null) -ne 200 ] ; do
echo "waiting for apiserver" ;
sleep 5 ;
done
@@ -42,7 +42,7 @@ spec:
- name: CLEARML_HOST_IP
value: {{ .Values.agentservices.clearmlHostIp }}
- name: CLEARML_API_HOST
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
value: {{ include "clearml.serviceApi" $ }}
- name: CLEARML_WEB_HOST
value: {{ .Values.agentservices.clearmlWebHost }}
- name: CLEARML_FILES_HOST

View File

@@ -18,12 +18,6 @@ spec:
labels:
{{- include "clearml.selectorLabelsApiServer" . | nindent 8 }}
spec:
{{- if .Values.apiserver.storage.enableConfigVolume }}
volumes:
- name: apiserver-config
persistentVolumeClaim:
claimName: {{ include "clearml.fullname" . }}-apiserver-config
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.apiserver.image.repository }}:{{ .Values.apiserver.image.tag | default .Chart.AppVersion }}"
@@ -101,13 +95,19 @@ spec:
httpGet:
path: /debug.ping
port: 8008
{{- if .Values.apiserver.storage.enableConfigVolume }}
{{- if .Values.apiserver.additionalConfigs }}
volumeMounts:
- name: apiserver-config
mountPath: /opt/clearml/config
{{- end }}
resources:
{{- toYaml .Values.apiserver.resources | nindent 12 }}
{{- if .Values.apiserver.additionalConfigs }}
volumes:
- name: apiserver-config
configMap:
name: "{{ include "clearml.fullname" . }}-apiserver-configmap"
{{- end }}
{{- with .Values.apiserver.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}

View File

@@ -0,0 +1,34 @@
{{- if .Values.ingress.enabled -}}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ include "clearml.fullname" . }}-api
labels:
{{- include "clearml.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.ingress.api.tlsSecretName }}
tls:
- hosts:
- {{ .Values.ingress.api.hostName }}
secretName: {{ .Values.ingress.api.tlsSecretName }}
{{- end }}
rules:
- host: {{ .Values.ingress.api.hostName }}
http:
paths:
- path: "/"
pathType: Prefix
backend:
serviceName: {{ include "clearml.fullname" . }}-apiserver
servicePort: {{ .Values.apiserver.service.port }}
{{- end }}

View File

@@ -0,0 +1,34 @@
{{- if .Values.ingress.enabled -}}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ include "clearml.fullname" . }}-app
labels:
{{- include "clearml.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.ingress.app.tlsSecretName }}
tls:
- hosts:
- {{ .Values.ingress.app.hostName }}
secretName: {{ .Values.ingress.app.tlsSecretName }}
{{- end }}
rules:
- host: {{ .Values.ingress.app.hostName }}
http:
paths:
- path: "/"
pathType: Prefix
backend:
serviceName: {{ include "clearml.fullname" . }}-webserver
servicePort: {{ .Values.webserver.service.port }}
{{- end }}

View File

@@ -0,0 +1,34 @@
{{- if .Values.ingress.enabled -}}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ include "clearml.fullname" . }}-files
labels:
{{- include "clearml.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.ingress.files.tlsSecretName }}
tls:
- hosts:
- {{ .Values.ingress.files.hostName }}
secretName: {{ .Values.ingress.files.tlsSecretName }}
{{- end }}
rules:
- host: {{ .Values.ingress.files.hostName }}
http:
paths:
- path: "/"
pathType: Prefix
backend:
serviceName: {{ include "clearml.fullname" . }}-fileserver
servicePort: {{ .Values.fileserver.service.port }}
{{- end }}

View File

@@ -1,48 +0,0 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "clearml.fullname" . -}}
{{- if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}
labels:
{{- include "clearml.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.ingress.tls.secretName }}
tls:
- hosts:
- "app.{{ .Values.ingress.host }}"
- "files.{{ .Values.ingress.host }}"
- "api.{{ .Values.ingress.host }}"
secretName: {{ .Values.ingress.tls.secretName }}
{{- end }}
rules:
- host: "app.{{ .Values.ingress.host }}"
http:
paths:
- path: "/*"
backend:
serviceName: {{ include "clearml.fullname" . }}-webserver
servicePort: {{ .Values.webserver.service.port }}
- host: "api.{{ .Values.ingress.host }}"
http:
paths:
- path: "/*"
backend:
serviceName: {{ include "clearml.fullname" . }}-apiserver
servicePort: {{ .Values.apiserver.service.port }}
- host: "files.{{ .Values.ingress.host }}"
http:
paths:
- path: "/*"
backend:
serviceName: {{ include "clearml.fullname" . }}-fileserver
servicePort: {{ .Values.fileserver.service.port }}
{{- end }}

View File

@@ -1,15 +0,0 @@
{{- if .Values.apiserver.storage.enableConfigVolume }}
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: {{ include "clearml.fullname" . }}-apiserver-config
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: {{ .Values.apiserver.storage.config.size | quote }}
storageClassName: {{ .Values.apiserver.storage.config.class | quote }}
{{- end }}

View File

@@ -4,9 +4,15 @@ ingress:
enabled: false
name: clearml-server-ingress
annotations: {}
host: ""
tls:
secretName: ""
app:
hostName: "app.clearml.127-0-0-1.nip.io"
tlsSecretName: ""
api:
hostName: "api.clearml.127-0-0-1.nip.io"
tlsSecretName: ""
files:
hostName: "files.clearml.127-0-0-1.nip.io"
tlsSecretName: ""
apiserver:
prepopulateEnabled: "true"
@@ -26,7 +32,7 @@ apiserver:
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.1.0"
tag: "1.1.1"
extraEnvs: []
@@ -50,12 +56,16 @@ apiserver:
affinity: {}
# Optional: used in pvc-apiserver containing optional server configuration files
storage:
enableConfigVolume: false
config:
class: "standard"
size: 1Gi
additionalConfigs: {}
# services.conf: |
# tasks {
# non_responsive_tasks_watchdog {
# # In-progress tasks that haven't been updated for at least 'value' seconds will be stopped by the watchdog
# threshold_sec: 21000
# # Watchdog will sleep for this number of seconds after each cycle
# watch_interval_sec: 900
# }
# }
fileserver:
service:
@@ -67,7 +77,7 @@ fileserver:
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.1.0"
tag: "1.1.1"
extraEnvs: []
@@ -108,7 +118,7 @@ webserver:
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.1.0"
tag: "1.1.1"
podAnnotations: {}
@@ -181,8 +191,10 @@ agentservices:
agentGroups:
agent-group-cpu:
enabled: true
name: agent-group-cpu
replicaCount: 1
updateStrategy: Recreate
nvidiaGpusPerAgent: 0
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")
@@ -213,8 +225,10 @@ agentGroups:
affinity: {}
agent-group-gpu:
enabled: true
name: agent-group-gpu
replicaCount: 0
updateStrategy: Recreate
nvidiaGpusPerAgent: 1
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")