mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-04-17 01:31:13 +00:00
Compare commits
3 Commits
clearml-3.
...
clearml-3.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9c15a8a348 | ||
|
|
cd7f22f7d8 | ||
|
|
078e394e24 |
@@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: clearml
|
||||
description: MLOps platform
|
||||
type: application
|
||||
version: "3.3.0"
|
||||
version: "3.4.1"
|
||||
appVersion: "1.1.1"
|
||||
home: https://clear.ml
|
||||
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# ClearML Ecosystem for Kubernetes
|
||||
|
||||
  
|
||||
  
|
||||
|
||||
MLOps platform
|
||||
|
||||
@@ -163,6 +163,16 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-gpu.replicaCount | int | `0` | |
|
||||
| agentGroups.agent-group-gpu.tolerations | list | `[]` | |
|
||||
| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | |
|
||||
| agentk8sglue.defaultDockerImage | string | `"nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04"` | |
|
||||
| agentk8sglue.enabled | bool | `false` | |
|
||||
| agentk8sglue.id | string | `"k8s-agent"` | |
|
||||
| agentk8sglue.image.repository | string | `"allegroai/clearml-agent-k8s"` | |
|
||||
| agentk8sglue.image.tag | string | `"aws-latest-1.21"` | |
|
||||
| agentk8sglue.maxPods | int | `10` | |
|
||||
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | |
|
||||
| agentk8sglue.podTemplate.resources | object | `{}` | |
|
||||
| agentk8sglue.podTemplate.tolerations | object | `{}` | |
|
||||
| agentk8sglue.queue | string | `"aws-instances"` | |
|
||||
| agentservices.affinity | object | `{}` | |
|
||||
| agentservices.agentVersion | string | `""` | |
|
||||
| agentservices.awsAccessKeyId | string | `nil` | |
|
||||
@@ -268,11 +278,14 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| fileserver.storage.data.size | string | `"50Gi"` | |
|
||||
| fileserver.tolerations | list | `[]` | |
|
||||
| ingress.annotations | object | `{}` | |
|
||||
| ingress.api.annotations | object | `{}` | |
|
||||
| ingress.api.hostName | string | `"api.clearml.127-0-0-1.nip.io"` | |
|
||||
| ingress.api.tlsSecretName | string | `""` | |
|
||||
| ingress.app.annotations | object | `{}` | |
|
||||
| ingress.app.hostName | string | `"app.clearml.127-0-0-1.nip.io"` | |
|
||||
| ingress.app.tlsSecretName | string | `""` | |
|
||||
| ingress.enabled | bool | `false` | |
|
||||
| ingress.files.annotations | object | `{}` | |
|
||||
| ingress.files.hostName | string | `"files.clearml.127-0-0-1.nip.io"` | |
|
||||
| ingress.files.tlsSecretName | string | `""` | |
|
||||
| ingress.name | string | `"clearml-server-ingress"` | |
|
||||
|
||||
7
charts/clearml/ci/README.md
Normal file
7
charts/clearml/ci/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
Place values files with different values in this directory to ensure these cases are tested by the CI as well.
|
||||
|
||||
https://github.com/helm/chart-testing/blob/main/doc/ct_install.md
|
||||
|
||||
```
|
||||
"Charts may have multiple custom values files matching the glob pattern '*-values.yaml' in a directory named 'ci' in the root of the chart's directory. The chart is installed and tested for each of these files. If no custom values file is present, the chart is installed and tested with defaults."
|
||||
```
|
||||
1
charts/clearml/ci/default-values.yaml
Normal file
1
charts/clearml/ci/default-values.yaml
Normal file
@@ -0,0 +1 @@
|
||||
# empty so default values.yaml gets tested
|
||||
2
charts/clearml/ci/k8sagent-values.yaml
Normal file
2
charts/clearml/ci/k8sagent-values.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
agentk8sglue:
|
||||
enabled: true
|
||||
37
charts/clearml/templates/configmap-agentk8s-template.yaml
Normal file
37
charts/clearml/templates/configmap-agentk8s-template.yaml
Normal file
@@ -0,0 +1,37 @@
|
||||
{{- if .Values.agentk8sglue.enabled }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: k8sagent-pod-template
|
||||
data:
|
||||
template.yaml: |
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
namespace: {{ .Release.namespace }}
|
||||
spec:
|
||||
containers:
|
||||
- resources:
|
||||
{{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }}
|
||||
env:
|
||||
- name: CLEARML_API_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-webserver"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_secret
|
||||
tolerations:
|
||||
{{- toYaml .Values.agentk8sglue.podTemplate.tolerations | nindent 8 }}
|
||||
nodeSelector:
|
||||
{{- toYaml .Values.agentk8sglue.podTemplate.nodeSelector | nindent 8 }}
|
||||
{{- end }}
|
||||
|
||||
62
charts/clearml/templates/deployment-agentk8s.yaml
Normal file
62
charts/clearml/templates/deployment-agentk8s.yaml
Normal file
@@ -0,0 +1,62 @@
|
||||
{{- if .Values.agentk8sglue.enabled }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: "{{ include "clearml.fullname" . }}-k8sagent"
|
||||
labels:
|
||||
app: k8sagent
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: k8sagent
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: k8sagent
|
||||
spec:
|
||||
containers:
|
||||
- name: k8s-glue-container
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
|
||||
imagePullPolicy: Always
|
||||
command: ["/bin/bash", "-c", "export PATH=$PATH:$HOME/bin; source /root/.bashrc && /root/entrypoint.sh"]
|
||||
volumeMounts:
|
||||
- name: k8sagent-pod-template
|
||||
mountPath: /root/template
|
||||
env:
|
||||
- name: CLEARML_API_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-webserver"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
|
||||
- name: K8S_GLUE_MAX_PODS
|
||||
value: "{{.Values.agentk8sglue.maxPods}}"
|
||||
- name: K8S_GLUE_QUEUE
|
||||
value: "{{.Values.agentk8sglue.queue}}"
|
||||
- name: K8S_GLUE_EXTRA_ARGS
|
||||
value: "--template-yaml /root/template/template.yaml"
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_secret
|
||||
- name: CLEARML_WORKER_ID
|
||||
value: "{{.Values.agentk8sglue.id}}"
|
||||
- name: CLEARML_AGENT_UPDATE_REPO
|
||||
value: ""
|
||||
- name: FORCE_CLEARML_AGENT_REPO
|
||||
value: ""
|
||||
- name: CLEARML_DOCKER_IMAGE
|
||||
value: "{{.Values.agentk8sglue.defaultDockerImage}}"
|
||||
volumes:
|
||||
- name: k8sagent-pod-template
|
||||
configMap:
|
||||
name: k8sagent-pod-template
|
||||
{{- end }}
|
||||
|
||||
@@ -11,10 +11,13 @@ metadata:
|
||||
name: {{ include "clearml.fullname" . }}-api
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- $annotations := .Values.ingress.annotations }}
|
||||
{{- if .Values.ingress.api.annotations }}
|
||||
{{- $annotations = mergeOverwrite $annotations .Values.ingress.api.annotations }}
|
||||
{{- end }}
|
||||
annotations:
|
||||
{{- toYaml $annotations | nindent 4 }}
|
||||
|
||||
spec:
|
||||
{{- if .Values.ingress.api.tlsSecretName }}
|
||||
tls:
|
||||
|
||||
@@ -11,10 +11,12 @@ metadata:
|
||||
name: {{ include "clearml.fullname" . }}-app
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- $annotations := .Values.ingress.annotations }}
|
||||
{{- if .Values.ingress.app.annotations }}
|
||||
{{- $annotations = mergeOverwrite $annotations .Values.ingress.app.annotations }}
|
||||
{{- end }}
|
||||
annotations:
|
||||
{{- toYaml $annotations | nindent 4 }}
|
||||
spec:
|
||||
{{- if .Values.ingress.app.tlsSecretName }}
|
||||
tls:
|
||||
|
||||
@@ -11,10 +11,12 @@ metadata:
|
||||
name: {{ include "clearml.fullname" . }}-files
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- $annotations := .Values.ingress.annotations }}
|
||||
{{- if .Values.ingress.files.annotations }}
|
||||
{{- $annotations = mergeOverwrite $annotations .Values.ingress.files.annotations }}
|
||||
{{- end }}
|
||||
annotations:
|
||||
{{- toYaml $annotations | nindent 4 }}
|
||||
spec:
|
||||
{{- if .Values.ingress.files.tlsSecretName }}
|
||||
tls:
|
||||
|
||||
25
charts/clearml/templates/rbac-agentk8s.yaml
Normal file
25
charts/clearml/templates/rbac-agentk8s.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
{{- if .Values.agentk8sglue.enabled }}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: k8sagent-pods-access
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs: ["get", "list", "watch", "create", "patch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: k8sagent-pods-access
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: default
|
||||
namespace: {{ .Release.namespace }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: k8sagent-pods-access
|
||||
{{- end }}
|
||||
@@ -7,12 +7,15 @@ ingress:
|
||||
app:
|
||||
hostName: "app.clearml.127-0-0-1.nip.io"
|
||||
tlsSecretName: ""
|
||||
annotations: {}
|
||||
api:
|
||||
hostName: "api.clearml.127-0-0-1.nip.io"
|
||||
tlsSecretName: ""
|
||||
annotations: {}
|
||||
files:
|
||||
hostName: "files.clearml.127-0-0-1.nip.io"
|
||||
tlsSecretName: ""
|
||||
annotations: {}
|
||||
|
||||
secret:
|
||||
# -- Set for http_session field
|
||||
@@ -291,6 +294,30 @@ agentGroups:
|
||||
|
||||
affinity: {}
|
||||
|
||||
# This agent will spawn queued experiments in new pods, a good use case is to combine this with
|
||||
# GPU autoscaling nodes.
|
||||
# https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue
|
||||
agentk8sglue:
|
||||
enabled: false
|
||||
image:
|
||||
repository: "allegroai/clearml-agent-k8s"
|
||||
tag: "aws-latest-1.21"
|
||||
maxPods: 10
|
||||
defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04 # default docker image that is spawned as new pod
|
||||
queue: aws-instances # create this queue manually in the UI first for it to work
|
||||
id: k8s-agent
|
||||
podTemplate:
|
||||
resources: {}
|
||||
# limits:
|
||||
# nvidia.com/gpu: 1
|
||||
tolerations: {}
|
||||
# - key: "nvidia.com/gpu"
|
||||
# operator: Exists
|
||||
# effect: "NoSchedule"
|
||||
nodeSelector: {}
|
||||
# fleet: gpu-nodes
|
||||
|
||||
|
||||
externalServices:
|
||||
# -- Existing ElasticSearch Hostname to use if elasticsearch.enabled is false
|
||||
elasticsearchHost: ""
|
||||
|
||||
Reference in New Issue
Block a user