mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-04-17 01:31:13 +00:00
Compare commits
5 Commits
clearml-5.
...
clearml-ag
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7b3a28989 | ||
|
|
12baef0d75 | ||
|
|
72916e171a | ||
|
|
126f313cdf | ||
|
|
9aa1997ebd |
@@ -1,10 +1,10 @@
|
||||
apiVersion: v2
|
||||
name: clearml-agent
|
||||
description: MLOps platform
|
||||
description: MLOps platform Task running agent
|
||||
type: application
|
||||
version: "3.1.4"
|
||||
version: "3.3.0"
|
||||
appVersion: "1.24"
|
||||
kubeVersion: ">= 1.19.0-0 < 1.26.0-0"
|
||||
kubeVersion: ">= 1.21.0-0 < 1.27.0-0"
|
||||
home: https://clear.ml
|
||||
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
|
||||
sources:
|
||||
@@ -17,3 +17,8 @@ keywords:
|
||||
- clearml
|
||||
- "machine learning"
|
||||
- mlops
|
||||
- "task agent"
|
||||
annotations:
|
||||
artifacthub.io/changes: |
|
||||
- kind: added
|
||||
description: affinity parameter
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# ClearML Kubernetes Agent
|
||||
|
||||
  
|
||||
  
|
||||
|
||||
MLOps platform
|
||||
MLOps platform Task running agent
|
||||
|
||||
**Homepage:** <https://clear.ml>
|
||||
|
||||
@@ -24,16 +24,18 @@ It allows you to schedule distributed experiments on a Kubernetes cluster.
|
||||
|
||||
## Requirements
|
||||
|
||||
Kubernetes: `>= 1.19.0-0 < 1.26.0-0`
|
||||
Kubernetes: `>= 1.21.0-0 < 1.27.0-0`
|
||||
|
||||
## Values
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| agentk8sglue | object | `{"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"serviceExistingAccountName":"","volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
|
||||
| agentk8sglue | object | `{"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"securityContext":{},"serviceExistingAccountName":"","tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
|
||||
| agentk8sglue.affinity | object | `{}` | affinity setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.annotations | object | `{}` | annotations setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url |
|
||||
| agentk8sglue.basePodTemplate | object | `{"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate | object | `{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate.affinity | object | `{}` | affinity setup for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate.annotations | object | `{}` | annotations setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.fileMounts | list | `[]` | file definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
@@ -59,7 +61,9 @@ Kubernetes: `>= 1.19.0-0 < 1.26.0-0`
|
||||
| agentk8sglue.nodeSelector | object | `{}` | nodeSelector setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.queue | string | `"default"` | ClearML queue this agent will consume |
|
||||
| agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods |
|
||||
| agentk8sglue.securityContext | object | `{}` | Web Server pod security context |
|
||||
| agentk8sglue.serviceExistingAccountName | string | `""` | if set, don't create a serviceAccountName but use defined existing one |
|
||||
| agentk8sglue.tolerations | list | `[]` | tolerations setup for Agent pod (example in values.yaml comments) |
|
||||
| agentk8sglue.volumeMounts | list | `[]` | volume mounts definition for Glue Agent (example in values.yaml comments) |
|
||||
| agentk8sglue.volumes | list | `[]` | volumes definition for Glue Agent (example in values.yaml comments) |
|
||||
| agentk8sglue.webServerUrlReference | string | `"https://app.clear.ml"` | Reference to Web server url |
|
||||
|
||||
@@ -172,6 +172,17 @@ data:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if $value.templateOverrides.affinity }}
|
||||
{{- with $value.templateOverrides.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.affinity }}
|
||||
{{- with $.Values.agentk8sglue.basePodTemplate.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
secrets.yaml: |
|
||||
{{- range $key, $value := $.Values.enterpriseFeatures.queues }}
|
||||
@@ -250,6 +261,10 @@ data:
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.basePodTemplate.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.sessions.portModeEnabled }}
|
||||
{{- range untilStep 1 ( ( add .Values.sessions.maxServices 1 ) | int ) 1 }}
|
||||
|
||||
@@ -28,6 +28,7 @@ spec:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "clearml.serviceAccountName" . }}
|
||||
securityContext: {{ toYaml .Values.agentk8sglue.securityContext | nindent 8 }}
|
||||
initContainers:
|
||||
- name: init-k8s-glue
|
||||
{{- if .Values.enterpriseFeatures.enabled }}
|
||||
@@ -176,6 +177,14 @@ spec:
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: {{ include "clearml.name" . }}-pt
|
||||
configMap:
|
||||
|
||||
@@ -73,12 +73,19 @@ agentk8sglue:
|
||||
containerCustomBashScript: ""
|
||||
# -- Extra Environment variables for Glue Agent
|
||||
extraEnvs: []
|
||||
# - name: PYTHONPATH
|
||||
# value: "somepath"
|
||||
|
||||
# - name: PYTHONPATH
|
||||
# value: "somepath"
|
||||
# -- Web Server pod security context
|
||||
securityContext: {}
|
||||
# runAsUser: 1001
|
||||
# fsGroup: 1001
|
||||
# -- nodeSelector setup for Agent pod (example in values.yaml comments)
|
||||
nodeSelector: {}
|
||||
# fleet: agent-nodes
|
||||
# -- tolerations setup for Agent pod (example in values.yaml comments)
|
||||
tolerations: []
|
||||
# -- affinity setup for Agent pod (example in values.yaml comments)
|
||||
affinity: {}
|
||||
# -- volumes definition for Glue Agent (example in values.yaml comments)
|
||||
volumes: []
|
||||
# - name: "yourvolume"
|
||||
@@ -159,17 +166,20 @@ agentk8sglue:
|
||||
resources: {}
|
||||
# limits:
|
||||
# nvidia.com/gpu: 1
|
||||
# -- nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
nodeSelector: {}
|
||||
# fleet: gpu-nodes
|
||||
# -- tolerations setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
tolerations: []
|
||||
# - key: "nvidia.com/gpu"
|
||||
# operator: Exists
|
||||
# effect: "NoSchedule"
|
||||
# -- nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
nodeSelector: {}
|
||||
# fleet: gpu-nodes
|
||||
# -- affinity setup for pods spawned to consume ClearML Task
|
||||
affinity: {}
|
||||
# -- securityContext setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
securityContext: {}
|
||||
# runAsUser: 1000
|
||||
# runAsUser: 1001
|
||||
# fsGroup: 1001
|
||||
# -- hostAliases setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
hostAliases: {}
|
||||
# - ip: "127.0.0.1"
|
||||
|
||||
@@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: clearml
|
||||
description: MLOps platform
|
||||
type: application
|
||||
version: "5.5.0"
|
||||
version: "5.5.1"
|
||||
appVersion: "1.9.2"
|
||||
kubeVersion: ">= 1.21.0-0 < 1.27.0-0"
|
||||
home: https://clear.ml
|
||||
@@ -32,5 +32,5 @@ dependencies:
|
||||
condition: elasticsearch.enabled
|
||||
annotations:
|
||||
artifacthub.io/changes: |
|
||||
- kind: added
|
||||
description: support for fileserver static PVC
|
||||
- kind: changed
|
||||
description: check redis and mongodb too before starting apiserver
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# ClearML Ecosystem for Kubernetes
|
||||
|
||||
  
|
||||
  
|
||||
|
||||
MLOps platform
|
||||
|
||||
|
||||
@@ -54,10 +54,24 @@ spec:
|
||||
- -c
|
||||
- >
|
||||
set -x;
|
||||
{{- if .Values.elasticsearch.enabled }}
|
||||
while [ $(curl -sw '%{http_code}' "http://{{ include "elasticsearch.servicename" . }}:{{ include "elasticsearch.serviceport" . }}/_cluster/health" -o /dev/null) -ne 200 ] ; do
|
||||
echo "waiting for elasticsearch" ;
|
||||
sleep 5 ;
|
||||
done
|
||||
done ;
|
||||
{{- end }}
|
||||
{{- if .Values.mongodb.enabled }}
|
||||
while [ $(curl --telnet-option BOGUS --connect-timeout 2 -s "telnet://{{ .Release.Name }}-mongodb:27017" -o /dev/null; echo $?) -ne 49 ] ; do
|
||||
echo "waiting for mongodb" ;
|
||||
sleep 5 ;
|
||||
done ;
|
||||
{{- end }}
|
||||
{{- if .Values.redis.enabled }}
|
||||
while [ $(curl --telnet-option BOGUS --connect-timeout 2 -s "telnet://{{ include "redis.servicename" . }}:{{ include "redis.serviceport" . }}" -o /dev/null; echo $?) -ne 49 ] ; do
|
||||
echo "waiting for redis" ;
|
||||
sleep 5 ;
|
||||
done ;
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: clearml-apiserver
|
||||
{{- if .Values.enterpriseFeatures.enabled }}
|
||||
|
||||
3
platform-specific-configs/openshift/README.md
Normal file
3
platform-specific-configs/openshift/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Openshift specific configuration
|
||||
|
||||
Use override files when deploying ClearML. Proposed files in this folder require setup of `<USER>` and `<FSUSER>` values to uids accepted by specific openshift configuration.
|
||||
@@ -0,0 +1,6 @@
|
||||
agentk8sglue:
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
basePodTemplate:
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
36
platform-specific-configs/openshift/values-clearml.yaml
Normal file
36
platform-specific-configs/openshift/values-clearml.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
apiserver:
|
||||
podSecurityContext:
|
||||
fsGroup: <FSUSER>
|
||||
runAsUser: <USER>
|
||||
runAsNonRoot: true
|
||||
fileserver:
|
||||
podSecurityContext:
|
||||
fsGroup: <FSUSER>
|
||||
runAsUser: <USER>
|
||||
runAsNonRoot: true
|
||||
webserver:
|
||||
podSecurityContext:
|
||||
fsGroup: <FSUSER>
|
||||
runAsUser: <USER>
|
||||
runAsNonRoot: true
|
||||
elasticsearch:
|
||||
securityContext:
|
||||
runAsUser: <USER>
|
||||
podSecurityContext:
|
||||
fsGroup: <FSUSER>
|
||||
runAsUser: <USER>
|
||||
sysctlInitContainer:
|
||||
enabled: false
|
||||
volumeClaimTemplate:
|
||||
redis:
|
||||
securityContext:
|
||||
fsGroup: <FSUSER>
|
||||
runAsUser: <USER>
|
||||
mongodb:
|
||||
podSecurityContext:
|
||||
enabled: true
|
||||
fsGroup: <FSUSER>
|
||||
containerSecurityContext:
|
||||
enabled: true
|
||||
runAsUser: <USER>
|
||||
runAsNonRoot: true
|
||||
3
platform-specific-configs/tanzu/README.md
Normal file
3
platform-specific-configs/tanzu/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Tanzu specific configuration
|
||||
|
||||
Before installing any ClearML chart, apply `rolebinding.yaml` file after setting needed `<NAMESPACE>` in it.
|
||||
@@ -2,7 +2,7 @@ kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: clearml-tanzu-rolebinding
|
||||
namespace: clearml
|
||||
namespace: <NAMESPACE>
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: psp:vmware-system-privileged
|
||||
|
||||
Reference in New Issue
Block a user