mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-04-17 01:31:13 +00:00
Upgrade to app version 1.5.0 (#81)
* Changed: upgrade to 1.5.0 * Fixed: inject after ct check * Fixed: list changd * Fixed: typo
This commit is contained in:
parent
84a003b7bc
commit
e22bd30764
13
.github/workflows/ci.yaml
vendored
13
.github/workflows/ci.yaml
vendored
@ -34,16 +34,19 @@ jobs:
|
||||
node_image: kindest/node:${{ matrix.k8s }}
|
||||
- name: Set up chart-testing
|
||||
uses: helm/chart-testing-action@v2.2.1
|
||||
- name: Inject secrets
|
||||
run: |
|
||||
find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUEKEY/${{ secrets.agentk8sglueKey }}/g" {} \;
|
||||
find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUESECRET/${{ secrets.agentk8sglueSecret }}/g" {} \;
|
||||
- name: Run chart-testing (list-changed)
|
||||
id: list-changed
|
||||
run: |
|
||||
changed=$(ct list-changed --chart-dirs=charts --target-branch=main)
|
||||
if [[ -n "$changed" ]]; then
|
||||
echo "::set-output name=changed::true"
|
||||
echo "::set-output name=changed_charts::\"${changed//$'\n'/,}\""
|
||||
fi
|
||||
- name: Inject secrets
|
||||
run: |
|
||||
find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUEKEY/${{ secrets.agentk8sglueKey }}/g" {} \;
|
||||
find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUESECRET/${{ secrets.agentk8sglueSecret }}/g" {} \;
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
- name: Run chart-testing (lint and install)
|
||||
run: ct lint-and-install --chart-dirs=charts --target-branch=main --helm-extra-args="--timeout=15m" --debug=true
|
||||
run: ct lint-and-install --chart-dirs=charts --target-branch=main --helm-extra-args="--timeout=15m" --charts=${{steps.list-changed.outputs.changed_charts}} --debug=true
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
|
@ -2,8 +2,8 @@ apiVersion: v2
|
||||
name: clearml
|
||||
description: MLOps platform
|
||||
type: application
|
||||
version: "3.10.5"
|
||||
appVersion: "1.4.0"
|
||||
version: "4.0.0"
|
||||
appVersion: "1.5.0"
|
||||
home: https://clear.ml
|
||||
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
|
||||
sources:
|
||||
|
@ -1,6 +1,6 @@
|
||||
# ClearML Ecosystem for Kubernetes
|
||||
|
||||
  
|
||||
  
|
||||
|
||||
MLOps platform
|
||||
|
||||
@ -129,94 +129,6 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| agentGroups.agent-group-cpu.affinity | object | `{}` | |
|
||||
| agentGroups.agent-group-cpu.agentVersion | string | `""` | |
|
||||
| agentGroups.agent-group-cpu.awsAccessKeyId | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.awsDefaultRegion | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.awsSecretAccessKey | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.azureStorageAccount | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.azureStorageKey | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.clearmlAccessKey | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.clearmlConfig | string | `"sdk {\n}"` | |
|
||||
| agentGroups.agent-group-cpu.clearmlGitPassword | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.clearmlGitUser | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.clearmlSecretKey | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.enabled | bool | `false` | |
|
||||
| agentGroups.agent-group-cpu.extraEnvs | list | `[]` | |
|
||||
| agentGroups.agent-group-cpu.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| agentGroups.agent-group-cpu.image.repository | string | `"ubuntu"` | |
|
||||
| agentGroups.agent-group-cpu.image.tag | string | `"18.04"` | |
|
||||
| agentGroups.agent-group-cpu.name | string | `"agent-group-cpu"` | |
|
||||
| agentGroups.agent-group-cpu.nodeSelector | object | `{}` | |
|
||||
| agentGroups.agent-group-cpu.nvidiaGpusPerAgent | int | `0` | |
|
||||
| agentGroups.agent-group-cpu.podAnnotations | object | `{}` | |
|
||||
| agentGroups.agent-group-cpu.queues | string | `"default"` | |
|
||||
| agentGroups.agent-group-cpu.replicaCount | int | `1` | |
|
||||
| agentGroups.agent-group-cpu.tolerations | list | `[]` | |
|
||||
| agentGroups.agent-group-cpu.updateStrategy | string | `"Recreate"` | |
|
||||
| agentGroups.agent-group-gpu.affinity | object | `{}` | |
|
||||
| agentGroups.agent-group-gpu.agentVersion | string | `""` | |
|
||||
| agentGroups.agent-group-gpu.awsAccessKeyId | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.awsDefaultRegion | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.awsSecretAccessKey | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.azureStorageAccount | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.azureStorageKey | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.clearmlAccessKey | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.clearmlConfig | string | `"sdk {\n}"` | |
|
||||
| agentGroups.agent-group-gpu.clearmlGitPassword | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.clearmlGitUser | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.clearmlSecretKey | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.enabled | bool | `false` | |
|
||||
| agentGroups.agent-group-gpu.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| agentGroups.agent-group-gpu.image.repository | string | `"nvidia/cuda"` | |
|
||||
| agentGroups.agent-group-gpu.image.tag | string | `"11.0-base-ubuntu18.04"` | |
|
||||
| agentGroups.agent-group-gpu.name | string | `"agent-group-gpu"` | |
|
||||
| agentGroups.agent-group-gpu.nodeSelector | object | `{}` | |
|
||||
| agentGroups.agent-group-gpu.nvidiaGpusPerAgent | int | `1` | |
|
||||
| agentGroups.agent-group-gpu.podAnnotations | object | `{}` | |
|
||||
| agentGroups.agent-group-gpu.queues | string | `"default"` | |
|
||||
| agentGroups.agent-group-gpu.replicaCount | int | `0` | |
|
||||
| agentGroups.agent-group-gpu.tolerations | list | `[]` | |
|
||||
| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | |
|
||||
| agentk8sglue.defaultDockerImage | string | `"nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04"` | |
|
||||
| agentk8sglue.enabled | bool | `true` | |
|
||||
| agentk8sglue.id | string | `"k8s-agent"` | |
|
||||
| agentk8sglue.image.repository | string | `"allegroai/clearml-agent-k8s"` | |
|
||||
| agentk8sglue.image.tag | string | `"base-1.21"` | |
|
||||
| agentk8sglue.maxPods | int | `10` | |
|
||||
| agentk8sglue.podTemplate.env | list | `[]` | |
|
||||
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | |
|
||||
| agentk8sglue.podTemplate.resources | object | `{}` | |
|
||||
| agentk8sglue.podTemplate.tolerations | list | `[]` | |
|
||||
| agentk8sglue.podTemplate.volumes | list | `[]` | |
|
||||
| agentk8sglue.queue | string | `"default"` | |
|
||||
| agentk8sglue.serviceAccountName | string | `"default"` | |
|
||||
| agentservices.affinity | object | `{}` | |
|
||||
| agentservices.agentVersion | string | `""` | |
|
||||
| agentservices.awsAccessKeyId | string | `nil` | |
|
||||
| agentservices.awsDefaultRegion | string | `nil` | |
|
||||
| agentservices.awsSecretAccessKey | string | `nil` | |
|
||||
| agentservices.azureStorageAccount | string | `nil` | |
|
||||
| agentservices.azureStorageKey | string | `nil` | |
|
||||
| agentservices.clearmlFilesHost | string | `nil` | |
|
||||
| agentservices.clearmlGitPassword | string | `nil` | |
|
||||
| agentservices.clearmlGitUser | string | `nil` | |
|
||||
| agentservices.clearmlHostIp | string | `nil` | |
|
||||
| agentservices.clearmlWebHost | string | `nil` | |
|
||||
| agentservices.clearmlWorkerId | string | `"clearml-services"` | |
|
||||
| agentservices.enabled | bool | `false` | |
|
||||
| agentservices.extraEnvs | list | `[]` | |
|
||||
| agentservices.googleCredentials | string | `nil` | |
|
||||
| agentservices.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| agentservices.image.repository | string | `"allegroai/clearml-agent-services"` | |
|
||||
| agentservices.image.tag | string | `"latest"` | |
|
||||
| agentservices.nodeSelector | object | `{}` | |
|
||||
| agentservices.podAnnotations | object | `{}` | |
|
||||
| agentservices.replicaCount | int | `1` | |
|
||||
| agentservices.resources | object | `{}` | |
|
||||
| agentservices.storage.data.class | string | `""` | |
|
||||
| agentservices.storage.data.size | string | `"50Gi"` | |
|
||||
| agentservices.tolerations | list | `[]` | |
|
||||
| apiserver.additionalConfigs | object | `{}` | additional configurations that can be used by api server; check examples in values.yaml file |
|
||||
| apiserver.affinity | object | `{}` | |
|
||||
| apiserver.authCookiesMaxAge | int | `864000` | Amount of seconds the authorization cookie will last in user browser |
|
||||
@ -224,7 +136,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| apiserver.extraEnvs | list | `[]` | |
|
||||
| apiserver.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| apiserver.image.repository | string | `"allegroai/clearml"` | |
|
||||
| apiserver.image.tag | string | `"1.4.0"` | |
|
||||
| apiserver.image.tag | string | `"1.5.0"` | |
|
||||
| apiserver.livenessDelay | int | `60` | |
|
||||
| apiserver.nodeSelector | object | `{}` | |
|
||||
| apiserver.podAnnotations | object | `{}` | |
|
||||
@ -238,7 +150,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| apiserver.service.port | int | `8008` | |
|
||||
| apiserver.service.type | string | `"NodePort"` | This will set to service's spec.type field |
|
||||
| apiserver.tolerations | list | `[]` | |
|
||||
| clearml.defaultCompany | string | `"d1bd92a3b039400cbafc60a7a5b1e52b"` | |
|
||||
| clearml | object | `{"defaultCompany":"d1bd92a3b039400cbafc60a7a5b1e52b"}` | ClearMl generic configurations |
|
||||
| elasticsearch.clusterHealthCheckParams | string | `"wait_for_status=yellow&timeout=1s"` | |
|
||||
| elasticsearch.clusterName | string | `"clearml-elastic"` | |
|
||||
| elasticsearch.enabled | bool | `true` | |
|
||||
@ -284,7 +196,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| fileserver.extraEnvs | list | `[]` | |
|
||||
| fileserver.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| fileserver.image.repository | string | `"allegroai/clearml"` | |
|
||||
| fileserver.image.tag | string | `"1.4.0"` | |
|
||||
| fileserver.image.tag | string | `"1.5.0"` | |
|
||||
| fileserver.nodeSelector | object | `{}` | |
|
||||
| fileserver.podAnnotations | object | `{}` | |
|
||||
| fileserver.replicaCount | int | `1` | |
|
||||
@ -295,6 +207,13 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| fileserver.storage.data.class | string | `""` | |
|
||||
| fileserver.storage.data.size | string | `"50Gi"` | |
|
||||
| fileserver.tolerations | list | `[]` | |
|
||||
| imageCredentials | object | `{"email":"someone@host.com","enabled":false,"existingSecret":"","password":"pwd","registry":"docker.io","username":"someone"}` | Private image registry configuration |
|
||||
| imageCredentials.email | string | `"someone@host.com"` | Email |
|
||||
| imageCredentials.enabled | bool | `false` | Use private authentication mode |
|
||||
| imageCredentials.existingSecret | string | `""` | If this is set, chart will not generate a secret but will use what is defined here |
|
||||
| imageCredentials.password | string | `"pwd"` | Registry password |
|
||||
| imageCredentials.registry | string | `"docker.io"` | Registry name |
|
||||
| imageCredentials.username | string | `"someone"` | Registry username |
|
||||
| ingress.annotations | object | `{}` | |
|
||||
| ingress.api.annotations | object | `{}` | |
|
||||
| ingress.api.enabled | bool | `false` | |
|
||||
@ -343,7 +262,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| webserver.extraEnvs | list | `[]` | |
|
||||
| webserver.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| webserver.image.repository | string | `"allegroai/clearml"` | |
|
||||
| webserver.image.tag | string | `"1.4.0"` | |
|
||||
| webserver.image.tag | string | `"1.5.0"` | |
|
||||
| webserver.nodeSelector | object | `{}` | |
|
||||
| webserver.podAnnotations | object | `{}` | |
|
||||
| webserver.replicaCount | int | `1` | |
|
||||
|
@ -1,57 +0,0 @@
|
||||
{{- if .Values.agentk8sglue.enabled }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: k8sagent-pod-template
|
||||
data:
|
||||
template.yaml: |
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }}
|
||||
volumes:
|
||||
{{- range .Values.agentk8sglue.podTemplate.volumes }}
|
||||
- name: {{ .name }}
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .name }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- resources:
|
||||
{{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }}
|
||||
ports:
|
||||
- containerPort: 10022
|
||||
volumeMounts:
|
||||
{{- range .Values.agentk8sglue.podTemplate.volumes }}
|
||||
- mountPath: {{ .path }}
|
||||
name: {{ .name }}
|
||||
{{- end }}
|
||||
env:
|
||||
- name: CLEARML_API_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-webserver"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_secret
|
||||
{{- if .Values.agentk8sglue.podTemplate.env }}
|
||||
{{ toYaml .Values.agentk8sglue.podTemplate.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.podTemplate.nodeSelector}}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.podTemplate.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
@ -1,122 +0,0 @@
|
||||
{{- range $key, $value := .Values.agentGroups }}
|
||||
{{- with $value }}
|
||||
{{- if .enabled }}
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "clearml.fullname" $ }}-{{ .name }}-agent
|
||||
labels:
|
||||
{{- include "clearml.labels" $ | nindent 4 }}
|
||||
spec:
|
||||
replicas: {{ .replicaCount }}
|
||||
strategy:
|
||||
type: {{ .updateStrategy }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "clearml.selectorLabelsAgent" $ | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
checksum/secret: {{ include (print $.Template.BasePath "/secrets.yaml") $ | sha256sum }}
|
||||
{{- with .podAnnotations }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "clearml.selectorLabelsAgent" $ | nindent 8 }}
|
||||
spec:
|
||||
volumes:
|
||||
{{ if .clearmlConfig }}
|
||||
- name: agent-clearml-conf-volume
|
||||
secret:
|
||||
secretName: {{ .name }}-conf
|
||||
items:
|
||||
- key: clearml.conf
|
||||
path: clearml.conf
|
||||
{{ end }}
|
||||
initContainers:
|
||||
- name: init-agent-{{ .name }}
|
||||
image: "{{ .image.repository }}:{{ .image.tag | default $.Chart.AppVersion }}"
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- >
|
||||
set -x;
|
||||
while [ $(curl -sw '%{http_code}' "{{ include "clearml.serviceApi" $ }}/debug.ping" -o /dev/null) -ne 200 ] ; do
|
||||
echo "waiting for apiserver" ;
|
||||
sleep 5 ;
|
||||
done
|
||||
containers:
|
||||
- name: {{ $.Chart.Name }}-{{ .name }}
|
||||
image: "{{ .image.repository }}:{{ .image.tag }}"
|
||||
imagePullPolicy: {{ .image.pullPolicy }}
|
||||
securityContext:
|
||||
privileged: true
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu:
|
||||
{{ .nvidiaGpusPerAgent }}
|
||||
env:
|
||||
- name: CLEARML_API_HOST
|
||||
value: {{ include "clearml.serviceApi" $ }}
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: {{ include "clearml.serviceApp" $ }}
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: {{ include "clearml.serviceFiles" $ }}
|
||||
- name: CLEARML_AGENT_GIT_USER
|
||||
value: {{ .clearmlGitUser}}
|
||||
- name: CLEARML_AGENT_GIT_PASS
|
||||
value: {{ .clearmlGitPassword}}
|
||||
- name: AWS_ACCESS_KEY_ID
|
||||
value: {{ .awsAccessKeyId}}
|
||||
- name: AWS_SECRET_ACCESS_KEY
|
||||
value: {{ .awsSecretAccessKey}}
|
||||
- name: AWS_DEFAULT_REGION
|
||||
value: {{ .awsDefaultRegion}}
|
||||
- name: AZURE_STORAGE_ACCOUNT
|
||||
value: {{ .azureStorageAccount}}
|
||||
- name: AZURE_STORAGE_KEY
|
||||
value: {{ .azureStorageKey}}
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: tests_user_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: tests_user_secret
|
||||
{{- if .extraEnvs }}
|
||||
{{ toYaml .extraEnvs | nindent 10 }}
|
||||
{{- end }}
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- "apt-get update ;
|
||||
apt-get install -y curl python3-pip git;
|
||||
python3 -m pip install -U pip ;
|
||||
python3 -m pip install clearml-agent{{ .agentVersion}} ;
|
||||
CLEARML_AGENT_K8S_HOST_MOUNT=/root/.clearml:/root/.clearml clearml-agent daemon --foreground --queue {{ .queues}}"
|
||||
{{ if .clearmlConfig }}
|
||||
volumeMounts:
|
||||
- name: agent-clearml-conf-volume
|
||||
mountPath: /root/clearml.conf
|
||||
subPath: clearml.conf
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
{{- with .nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
@ -1,64 +0,0 @@
|
||||
{{- if .Values.agentk8sglue.enabled }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: "{{ include "clearml.fullname" . }}-k8sagent"
|
||||
labels:
|
||||
app: k8sagent
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: k8sagent
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: k8sagent
|
||||
spec:
|
||||
containers:
|
||||
- name: k8s-glue-container
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["/bin/bash", "-c", "export PATH=$PATH:$HOME/bin; source /root/.bashrc && /root/entrypoint.sh"]
|
||||
volumeMounts:
|
||||
- name: k8sagent-pod-template
|
||||
mountPath: /root/template
|
||||
env:
|
||||
- name: CLEARML_API_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-webserver"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
|
||||
- name: K8S_GLUE_MAX_PODS
|
||||
value: "{{.Values.agentk8sglue.maxPods}}"
|
||||
- name: K8S_GLUE_QUEUE
|
||||
value: "{{.Values.agentk8sglue.queue}}"
|
||||
- name: K8S_GLUE_EXTRA_ARGS
|
||||
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml"
|
||||
- name: K8S_DEFAULT_NAMESPACE
|
||||
value: "{{ .Release.Namespace }}"
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_secret
|
||||
- name: CLEARML_WORKER_ID
|
||||
value: "{{.Values.agentk8sglue.id}}"
|
||||
- name: CLEARML_AGENT_UPDATE_REPO
|
||||
value: ""
|
||||
- name: FORCE_CLEARML_AGENT_REPO
|
||||
value: ""
|
||||
- name: CLEARML_DOCKER_IMAGE
|
||||
value: "{{.Values.agentk8sglue.defaultDockerImage}}"
|
||||
volumes:
|
||||
- name: k8sagent-pod-template
|
||||
configMap:
|
||||
name: k8sagent-pod-template
|
||||
{{- end }}
|
||||
|
@ -1,106 +0,0 @@
|
||||
{{- if .Values.agentservices.enabled }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "clearml.fullname" . }}-agentservices
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
spec:
|
||||
replicas: {{ .Values.agentservices.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "clearml.selectorLabelsAgentServices" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
checksum/secret: {{ include (print $.Template.BasePath "/secrets.yaml") . | sha256sum }}
|
||||
{{- with .Values.agentservices.podAnnotations }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "clearml.selectorLabelsAgentServices" . | nindent 8 }}
|
||||
spec:
|
||||
volumes:
|
||||
- name: agentservices-data
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ include "clearml.fullname" . }}-agentservices-data
|
||||
initContainers:
|
||||
- name: init-agentservices
|
||||
image: "{{ .Values.agentservices.image.repository }}:{{ .Values.agentservices.image.tag | default .Chart.AppVersion }}"
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- >
|
||||
set -x;
|
||||
while [ $(curl -sw '%{http_code}' "{{ include "clearml.serviceApi" $ }}/debug.ping" -o /dev/null) -ne 200 ] ; do
|
||||
echo "waiting for apiserver" ;
|
||||
sleep 5 ;
|
||||
done
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: "{{ .Values.agentservices.image.repository }}:{{ .Values.agentservices.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: {{ .Values.agentservices.image.pullPolicy }}
|
||||
env:
|
||||
- name: CLEARML_HOST_IP
|
||||
value: {{ .Values.agentservices.clearmlHostIp }}
|
||||
- name: CLEARML_API_HOST
|
||||
value: {{ include "clearml.serviceApi" $ }}
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: {{ .Values.agentservices.clearmlWebHost }}
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: {{ .Values.agentservices.clearmlFilesHost }}
|
||||
- name: CLEARML_AGENT_GIT_USER
|
||||
value: {{ .Values.agentservices.clearmlGitUser }}
|
||||
- name: CLEARML_AGENT_GIT_PASS
|
||||
value: {{ .Values.agentservices.clearmlGitPassword }}
|
||||
- name: CLEARML_AGENT_UPDATE_VERSION
|
||||
value: {{ .Values.agentservices.agentVersion }}
|
||||
- name: CLEARML_AGENT_DEFAULT_BASE_DOCKER
|
||||
value: {{ .Values.agentservices.defaultBaseDocker }}
|
||||
- name: AWS_ACCESS_KEY_ID
|
||||
value: {{ .Values.agentservices.awsAccessKeyId }}
|
||||
- name: AWS_SECRET_ACCESS_KEY
|
||||
value: {{ .Values.agentservices.awsSecretAccessKey }}
|
||||
- name: AWS_DEFAULT_REGION
|
||||
value: {{ .Values.agentservices.awsDefaultRegion }}
|
||||
- name: AZURE_STORAGE_ACCOUNT
|
||||
value: {{ .Values.agentservices.azureStorageAccount }}
|
||||
- name: AZURE_STORAGE_KEY
|
||||
value: {{ .Values.agentservices.azureStorageKey }}
|
||||
- name: GOOGLE_APPLICATION_CREDENTIALS
|
||||
value: {{ .Values.agentservices.googleCredentials }}
|
||||
- name: CLEARML_WORKER_ID
|
||||
value: {{ .Values.agentservices.clearmlWorkerId }}
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: tests_user_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: tests_user_secret
|
||||
{{- if .Values.agentservices.extraEnvs }}
|
||||
{{ toYaml .Values.agentservices.extraEnvs | nindent 10 }}
|
||||
{{- end }}
|
||||
args:
|
||||
- agentservices
|
||||
volumeMounts:
|
||||
- name: agentservices-data
|
||||
mountPath: /root/.clearml
|
||||
resources:
|
||||
{{- toYaml .Values.agentservices.resources | nindent 12 }}
|
||||
{{- with .Values.agentservices.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentservices.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentservices.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
@ -19,7 +19,14 @@ spec:
|
||||
labels:
|
||||
{{- include "clearml.selectorLabelsApiServer" . | nindent 8 }}
|
||||
spec:
|
||||
{{- include "clearml.imagePullSecrets" . | indent 6 }}
|
||||
{{- if .Values.imageCredentials.enabled }}
|
||||
imagePullSecrets:
|
||||
{{- if .Values.imageCredentials.existingSecret }}
|
||||
- name: .Values.imageCredentials.existingSecret
|
||||
{{- else }}
|
||||
- name: clearml-agent-registry-key
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: "{{ .Values.apiserver.image.repository }}:{{ .Values.apiserver.image.tag | default .Chart.AppVersion }}"
|
||||
|
@ -22,7 +22,14 @@ spec:
|
||||
- name: fileserver-data
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ include "clearml.fullname" . }}-fileserver-data
|
||||
{{- include "clearml.imagePullSecrets" . | indent 6 }}
|
||||
{{- if .Values.imageCredentials.enabled }}
|
||||
imagePullSecrets:
|
||||
{{- if .Values.imageCredentials.existingSecret }}
|
||||
- name: .Values.imageCredentials.existingSecret
|
||||
{{- else }}
|
||||
- name: clearml-agent-registry-key
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: "{{ .Values.fileserver.image.repository }}:{{ .Values.fileserver.image.tag | default .Chart.AppVersion }}"
|
||||
|
@ -18,7 +18,14 @@ spec:
|
||||
labels:
|
||||
{{- include "clearml.selectorLabelsWebServer" . | nindent 8 }}
|
||||
spec:
|
||||
{{- include "clearml.imagePullSecrets" . | indent 6 }}
|
||||
{{- if .Values.imageCredentials.enabled }}
|
||||
imagePullSecrets:
|
||||
{{- if .Values.imageCredentials.existingSecret }}
|
||||
- name: .Values.imageCredentials.existingSecret
|
||||
{{- else }}
|
||||
- name: clearml-agent-registry-key
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: "{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Chart.AppVersion }}"
|
||||
|
@ -1,17 +0,0 @@
|
||||
{{- if .Values.agentservices.enabled }}
|
||||
kind: PersistentVolumeClaim
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: {{ include "clearml.fullname" . }}-agentservices-data
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.agentservices.storage.data.size | quote }}
|
||||
{{- if .Values.agentservices.storage.data.class -}}
|
||||
storageClassName: {{ .Values.agentservices.storage.data.class | quote }}
|
||||
{{- end -}}
|
||||
{{- end }}
|
@ -1,25 +0,0 @@
|
||||
{{- if .Values.agentk8sglue.enabled }}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: k8sagent-pods-access
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs: ["get", "list", "watch", "create", "patch", "delete"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: k8sagent-pods-access
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: default
|
||||
namespace: {{ .Release.Namespace }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: k8sagent-pods-access
|
||||
{{- end }}
|
@ -1,13 +0,0 @@
|
||||
{{- range $key, $value := .Values.agentGroups }}
|
||||
{{- with $value }}
|
||||
---
|
||||
{{ if .clearmlConfig }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ .name }}-conf
|
||||
data:
|
||||
clearml.conf: {{ .clearmlConfig | b64enc }}
|
||||
{{ end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
@ -1,6 +1,19 @@
|
||||
# global:
|
||||
# imagePullSecrets:
|
||||
# - docker-cfg
|
||||
# -- Private image registry configuration
|
||||
imageCredentials:
|
||||
# -- Use private authentication mode
|
||||
enabled: false
|
||||
# -- If this is set, chart will not generate a secret but will use what is defined here
|
||||
existingSecret: ""
|
||||
# -- Registry name
|
||||
registry: docker.io
|
||||
# -- Registry username
|
||||
username: someone
|
||||
# -- Registry password
|
||||
password: pwd
|
||||
# -- Email
|
||||
email: someone@host.com
|
||||
|
||||
# -- ClearMl generic configurations
|
||||
clearml:
|
||||
defaultCompany: "d1bd92a3b039400cbafc60a7a5b1e52b"
|
||||
ingress:
|
||||
@ -67,7 +80,7 @@ apiserver:
|
||||
image:
|
||||
repository: "allegroai/clearml"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "1.4.0"
|
||||
tag: "1.5.0"
|
||||
|
||||
extraEnvs: []
|
||||
|
||||
@ -136,7 +149,7 @@ fileserver:
|
||||
image:
|
||||
repository: "allegroai/clearml"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "1.4.0"
|
||||
tag: "1.5.0"
|
||||
|
||||
extraEnvs: []
|
||||
|
||||
@ -181,7 +194,7 @@ webserver:
|
||||
image:
|
||||
repository: "allegroai/clearml"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "1.4.0"
|
||||
tag: "1.5.0"
|
||||
|
||||
podAnnotations: {}
|
||||
|
||||
@ -205,164 +218,6 @@ webserver:
|
||||
|
||||
additionalConfigs: {}
|
||||
|
||||
agentservices:
|
||||
enabled: false
|
||||
clearmlHostIp: null
|
||||
agentVersion: ""
|
||||
clearmlWebHost: null
|
||||
clearmlFilesHost: null
|
||||
clearmlGitUser: null
|
||||
clearmlGitPassword: null
|
||||
awsAccessKeyId: null
|
||||
awsSecretAccessKey: null
|
||||
awsDefaultRegion: null
|
||||
azureStorageAccount: null
|
||||
azureStorageKey: null
|
||||
googleCredentials: null
|
||||
clearmlWorkerId: "clearml-services"
|
||||
|
||||
replicaCount: 1
|
||||
|
||||
image:
|
||||
repository: "allegroai/clearml-agent-services"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "latest"
|
||||
|
||||
extraEnvs: []
|
||||
|
||||
podAnnotations: {}
|
||||
|
||||
resources: {}
|
||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||
# choice for the user. This also increases chances charts run on environments with little
|
||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
|
||||
storage:
|
||||
data:
|
||||
class: ""
|
||||
size: 50Gi
|
||||
|
||||
agentGroups:
|
||||
agent-group-cpu:
|
||||
enabled: false
|
||||
name: agent-group-cpu
|
||||
replicaCount: 1
|
||||
updateStrategy: Recreate
|
||||
nvidiaGpusPerAgent: 0
|
||||
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
|
||||
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")
|
||||
clearmlGitUser: null
|
||||
clearmlGitPassword: null
|
||||
clearmlAccessKey: null
|
||||
clearmlSecretKey: null
|
||||
awsAccessKeyId: null
|
||||
awsSecretAccessKey: null
|
||||
awsDefaultRegion: null
|
||||
azureStorageAccount: null
|
||||
azureStorageKey: null
|
||||
clearmlConfig: |-
|
||||
sdk {
|
||||
}
|
||||
|
||||
image:
|
||||
repository: "ubuntu"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "18.04"
|
||||
|
||||
extraEnvs: []
|
||||
|
||||
podAnnotations: {}
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
|
||||
agent-group-gpu:
|
||||
enabled: false
|
||||
name: agent-group-gpu
|
||||
replicaCount: 0
|
||||
updateStrategy: Recreate
|
||||
nvidiaGpusPerAgent: 1
|
||||
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
|
||||
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")
|
||||
clearmlGitUser: null
|
||||
clearmlGitPassword: null
|
||||
clearmlAccessKey: null
|
||||
clearmlSecretKey: null
|
||||
awsAccessKeyId: null
|
||||
awsSecretAccessKey: null
|
||||
awsDefaultRegion: null
|
||||
azureStorageAccount: null
|
||||
azureStorageKey: null
|
||||
clearmlConfig: |-
|
||||
sdk {
|
||||
}
|
||||
|
||||
image:
|
||||
repository: "nvidia/cuda"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "11.0-base-ubuntu18.04"
|
||||
|
||||
podAnnotations: {}
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
|
||||
# This agent will spawn queued experiments in new pods, a good use case is to combine this with
|
||||
# GPU autoscaling nodes.
|
||||
# https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue
|
||||
agentk8sglue:
|
||||
enabled: true
|
||||
image:
|
||||
repository: "allegroai/clearml-agent-k8s"
|
||||
tag: "base-1.21"
|
||||
serviceAccountName: default
|
||||
maxPods: 10
|
||||
defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04 # default docker image that is spawned as new pod
|
||||
queue: default
|
||||
id: k8s-agent
|
||||
podTemplate:
|
||||
volumes: []
|
||||
# - name: "yourvolume"
|
||||
# path: "/yourpath"
|
||||
env: []
|
||||
# # to setup access to private repo, setup secret with git credentials:
|
||||
# - name: CLEARML_AGENT_GIT_USER
|
||||
# value: mygitusername
|
||||
# - name: CLEARML_AGENT_GIT_PASS
|
||||
# valueFrom:
|
||||
# secretKeyRef:
|
||||
# name: git-password
|
||||
# key: git-password
|
||||
resources: {}
|
||||
# limits:
|
||||
# nvidia.com/gpu: 1
|
||||
tolerations: []
|
||||
# - key: "nvidia.com/gpu"
|
||||
# operator: Exists
|
||||
# effect: "NoSchedule"
|
||||
nodeSelector: {}
|
||||
# fleet: gpu-nodes
|
||||
|
||||
|
||||
externalServices:
|
||||
# -- Existing ElasticSearch Hostname to use if elasticsearch.enabled is false
|
||||
elasticsearchHost: ""
|
||||
|
Loading…
Reference in New Issue
Block a user