diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index daa0ede..9ea0800 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -34,16 +34,19 @@ jobs: node_image: kindest/node:${{ matrix.k8s }} - name: Set up chart-testing uses: helm/chart-testing-action@v2.2.1 - - name: Inject secrets - run: | - find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUEKEY/${{ secrets.agentk8sglueKey }}/g" {} \; - find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUESECRET/${{ secrets.agentk8sglueSecret }}/g" {} \; - name: Run chart-testing (list-changed) id: list-changed run: | changed=$(ct list-changed --chart-dirs=charts --target-branch=main) if [[ -n "$changed" ]]; then echo "::set-output name=changed::true" + echo "::set-output name=changed_charts::\"${changed//$'\n'/,}\"" fi + - name: Inject secrets + run: | + find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUEKEY/${{ secrets.agentk8sglueKey }}/g" {} \; + find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUESECRET/${{ secrets.agentk8sglueSecret }}/g" {} \; + if: steps.list-changed.outputs.changed == 'true' - name: Run chart-testing (lint and install) - run: ct lint-and-install --chart-dirs=charts --target-branch=main --helm-extra-args="--timeout=15m" --debug=true + run: ct lint-and-install --chart-dirs=charts --target-branch=main --helm-extra-args="--timeout=15m" --charts=${{steps.list-changed.outputs.changed_charts}} --debug=true + if: steps.list-changed.outputs.changed == 'true' diff --git a/charts/clearml/Chart.yaml b/charts/clearml/Chart.yaml index cbdd867..b418c83 100644 --- a/charts/clearml/Chart.yaml +++ b/charts/clearml/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: clearml description: MLOps platform type: application -version: "3.10.5" -appVersion: "1.4.0" +version: "4.0.0" +appVersion: "1.5.0" home: https://clear.ml icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg sources: diff --git a/charts/clearml/README.md b/charts/clearml/README.md index 5a9992a..42d96e4 100644 --- a/charts/clearml/README.md +++ b/charts/clearml/README.md @@ -1,6 +1,6 @@ # ClearML Ecosystem for Kubernetes -![Version: 3.10.5](https://img.shields.io/badge/Version-3.10.5-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.4.0](https://img.shields.io/badge/AppVersion-1.4.0-informational?style=flat-square) +![Version: 4.0.0](https://img.shields.io/badge/Version-4.0.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.5.0](https://img.shields.io/badge/AppVersion-1.5.0-informational?style=flat-square) MLOps platform @@ -129,94 +129,6 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a | Key | Type | Default | Description | |-----|------|---------|-------------| -| agentGroups.agent-group-cpu.affinity | object | `{}` | | -| agentGroups.agent-group-cpu.agentVersion | string | `""` | | -| agentGroups.agent-group-cpu.awsAccessKeyId | string | `nil` | | -| agentGroups.agent-group-cpu.awsDefaultRegion | string | `nil` | | -| agentGroups.agent-group-cpu.awsSecretAccessKey | string | `nil` | | -| agentGroups.agent-group-cpu.azureStorageAccount | string | `nil` | | -| agentGroups.agent-group-cpu.azureStorageKey | string | `nil` | | -| agentGroups.agent-group-cpu.clearmlAccessKey | string | `nil` | | -| agentGroups.agent-group-cpu.clearmlConfig | string | `"sdk {\n}"` | | -| agentGroups.agent-group-cpu.clearmlGitPassword | string | `nil` | | -| agentGroups.agent-group-cpu.clearmlGitUser | string | `nil` | | -| agentGroups.agent-group-cpu.clearmlSecretKey | string | `nil` | | -| agentGroups.agent-group-cpu.enabled | bool | `false` | | -| agentGroups.agent-group-cpu.extraEnvs | list | `[]` | | -| agentGroups.agent-group-cpu.image.pullPolicy | string | `"IfNotPresent"` | | -| agentGroups.agent-group-cpu.image.repository | string | `"ubuntu"` | | -| agentGroups.agent-group-cpu.image.tag | string | `"18.04"` | | -| agentGroups.agent-group-cpu.name | string | `"agent-group-cpu"` | | -| agentGroups.agent-group-cpu.nodeSelector | object | `{}` | | -| agentGroups.agent-group-cpu.nvidiaGpusPerAgent | int | `0` | | -| agentGroups.agent-group-cpu.podAnnotations | object | `{}` | | -| agentGroups.agent-group-cpu.queues | string | `"default"` | | -| agentGroups.agent-group-cpu.replicaCount | int | `1` | | -| agentGroups.agent-group-cpu.tolerations | list | `[]` | | -| agentGroups.agent-group-cpu.updateStrategy | string | `"Recreate"` | | -| agentGroups.agent-group-gpu.affinity | object | `{}` | | -| agentGroups.agent-group-gpu.agentVersion | string | `""` | | -| agentGroups.agent-group-gpu.awsAccessKeyId | string | `nil` | | -| agentGroups.agent-group-gpu.awsDefaultRegion | string | `nil` | | -| agentGroups.agent-group-gpu.awsSecretAccessKey | string | `nil` | | -| agentGroups.agent-group-gpu.azureStorageAccount | string | `nil` | | -| agentGroups.agent-group-gpu.azureStorageKey | string | `nil` | | -| agentGroups.agent-group-gpu.clearmlAccessKey | string | `nil` | | -| agentGroups.agent-group-gpu.clearmlConfig | string | `"sdk {\n}"` | | -| agentGroups.agent-group-gpu.clearmlGitPassword | string | `nil` | | -| agentGroups.agent-group-gpu.clearmlGitUser | string | `nil` | | -| agentGroups.agent-group-gpu.clearmlSecretKey | string | `nil` | | -| agentGroups.agent-group-gpu.enabled | bool | `false` | | -| agentGroups.agent-group-gpu.image.pullPolicy | string | `"IfNotPresent"` | | -| agentGroups.agent-group-gpu.image.repository | string | `"nvidia/cuda"` | | -| agentGroups.agent-group-gpu.image.tag | string | `"11.0-base-ubuntu18.04"` | | -| agentGroups.agent-group-gpu.name | string | `"agent-group-gpu"` | | -| agentGroups.agent-group-gpu.nodeSelector | object | `{}` | | -| agentGroups.agent-group-gpu.nvidiaGpusPerAgent | int | `1` | | -| agentGroups.agent-group-gpu.podAnnotations | object | `{}` | | -| agentGroups.agent-group-gpu.queues | string | `"default"` | | -| agentGroups.agent-group-gpu.replicaCount | int | `0` | | -| agentGroups.agent-group-gpu.tolerations | list | `[]` | | -| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | | -| agentk8sglue.defaultDockerImage | string | `"nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04"` | | -| agentk8sglue.enabled | bool | `true` | | -| agentk8sglue.id | string | `"k8s-agent"` | | -| agentk8sglue.image.repository | string | `"allegroai/clearml-agent-k8s"` | | -| agentk8sglue.image.tag | string | `"base-1.21"` | | -| agentk8sglue.maxPods | int | `10` | | -| agentk8sglue.podTemplate.env | list | `[]` | | -| agentk8sglue.podTemplate.nodeSelector | object | `{}` | | -| agentk8sglue.podTemplate.resources | object | `{}` | | -| agentk8sglue.podTemplate.tolerations | list | `[]` | | -| agentk8sglue.podTemplate.volumes | list | `[]` | | -| agentk8sglue.queue | string | `"default"` | | -| agentk8sglue.serviceAccountName | string | `"default"` | | -| agentservices.affinity | object | `{}` | | -| agentservices.agentVersion | string | `""` | | -| agentservices.awsAccessKeyId | string | `nil` | | -| agentservices.awsDefaultRegion | string | `nil` | | -| agentservices.awsSecretAccessKey | string | `nil` | | -| agentservices.azureStorageAccount | string | `nil` | | -| agentservices.azureStorageKey | string | `nil` | | -| agentservices.clearmlFilesHost | string | `nil` | | -| agentservices.clearmlGitPassword | string | `nil` | | -| agentservices.clearmlGitUser | string | `nil` | | -| agentservices.clearmlHostIp | string | `nil` | | -| agentservices.clearmlWebHost | string | `nil` | | -| agentservices.clearmlWorkerId | string | `"clearml-services"` | | -| agentservices.enabled | bool | `false` | | -| agentservices.extraEnvs | list | `[]` | | -| agentservices.googleCredentials | string | `nil` | | -| agentservices.image.pullPolicy | string | `"IfNotPresent"` | | -| agentservices.image.repository | string | `"allegroai/clearml-agent-services"` | | -| agentservices.image.tag | string | `"latest"` | | -| agentservices.nodeSelector | object | `{}` | | -| agentservices.podAnnotations | object | `{}` | | -| agentservices.replicaCount | int | `1` | | -| agentservices.resources | object | `{}` | | -| agentservices.storage.data.class | string | `""` | | -| agentservices.storage.data.size | string | `"50Gi"` | | -| agentservices.tolerations | list | `[]` | | | apiserver.additionalConfigs | object | `{}` | additional configurations that can be used by api server; check examples in values.yaml file | | apiserver.affinity | object | `{}` | | | apiserver.authCookiesMaxAge | int | `864000` | Amount of seconds the authorization cookie will last in user browser | @@ -224,7 +136,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a | apiserver.extraEnvs | list | `[]` | | | apiserver.image.pullPolicy | string | `"IfNotPresent"` | | | apiserver.image.repository | string | `"allegroai/clearml"` | | -| apiserver.image.tag | string | `"1.4.0"` | | +| apiserver.image.tag | string | `"1.5.0"` | | | apiserver.livenessDelay | int | `60` | | | apiserver.nodeSelector | object | `{}` | | | apiserver.podAnnotations | object | `{}` | | @@ -238,7 +150,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a | apiserver.service.port | int | `8008` | | | apiserver.service.type | string | `"NodePort"` | This will set to service's spec.type field | | apiserver.tolerations | list | `[]` | | -| clearml.defaultCompany | string | `"d1bd92a3b039400cbafc60a7a5b1e52b"` | | +| clearml | object | `{"defaultCompany":"d1bd92a3b039400cbafc60a7a5b1e52b"}` | ClearMl generic configurations | | elasticsearch.clusterHealthCheckParams | string | `"wait_for_status=yellow&timeout=1s"` | | | elasticsearch.clusterName | string | `"clearml-elastic"` | | | elasticsearch.enabled | bool | `true` | | @@ -284,7 +196,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a | fileserver.extraEnvs | list | `[]` | | | fileserver.image.pullPolicy | string | `"IfNotPresent"` | | | fileserver.image.repository | string | `"allegroai/clearml"` | | -| fileserver.image.tag | string | `"1.4.0"` | | +| fileserver.image.tag | string | `"1.5.0"` | | | fileserver.nodeSelector | object | `{}` | | | fileserver.podAnnotations | object | `{}` | | | fileserver.replicaCount | int | `1` | | @@ -295,6 +207,13 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a | fileserver.storage.data.class | string | `""` | | | fileserver.storage.data.size | string | `"50Gi"` | | | fileserver.tolerations | list | `[]` | | +| imageCredentials | object | `{"email":"someone@host.com","enabled":false,"existingSecret":"","password":"pwd","registry":"docker.io","username":"someone"}` | Private image registry configuration | +| imageCredentials.email | string | `"someone@host.com"` | Email | +| imageCredentials.enabled | bool | `false` | Use private authentication mode | +| imageCredentials.existingSecret | string | `""` | If this is set, chart will not generate a secret but will use what is defined here | +| imageCredentials.password | string | `"pwd"` | Registry password | +| imageCredentials.registry | string | `"docker.io"` | Registry name | +| imageCredentials.username | string | `"someone"` | Registry username | | ingress.annotations | object | `{}` | | | ingress.api.annotations | object | `{}` | | | ingress.api.enabled | bool | `false` | | @@ -343,7 +262,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a | webserver.extraEnvs | list | `[]` | | | webserver.image.pullPolicy | string | `"IfNotPresent"` | | | webserver.image.repository | string | `"allegroai/clearml"` | | -| webserver.image.tag | string | `"1.4.0"` | | +| webserver.image.tag | string | `"1.5.0"` | | | webserver.nodeSelector | object | `{}` | | | webserver.podAnnotations | object | `{}` | | | webserver.replicaCount | int | `1` | | diff --git a/charts/clearml/templates/configmap-agentk8s-template.yaml b/charts/clearml/templates/configmap-agentk8s-template.yaml deleted file mode 100644 index 5e8e9dc..0000000 --- a/charts/clearml/templates/configmap-agentk8s-template.yaml +++ /dev/null @@ -1,57 +0,0 @@ -{{- if .Values.agentk8sglue.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: k8sagent-pod-template -data: - template.yaml: | - apiVersion: v1 - metadata: - namespace: {{ .Release.Namespace }} - spec: - serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }} - volumes: - {{- range .Values.agentk8sglue.podTemplate.volumes }} - - name: {{ .name }} - persistentVolumeClaim: - claimName: {{ .name }} - {{- end }} - containers: - - resources: - {{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }} - ports: - - containerPort: 10022 - volumeMounts: - {{- range .Values.agentk8sglue.podTemplate.volumes }} - - mountPath: {{ .path }} - name: {{ .name }} - {{- end }} - env: - - name: CLEARML_API_HOST - value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}" - - name: CLEARML_WEB_HOST - value: "http://{{ include "clearml.fullname" . }}-webserver" - - name: CLEARML_FILES_HOST - value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}" - - name: CLEARML_API_ACCESS_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: apiserver_key - - name: CLEARML_API_SECRET_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: apiserver_secret - {{- if .Values.agentk8sglue.podTemplate.env }} - {{ toYaml .Values.agentk8sglue.podTemplate.env | nindent 8 }} - {{- end }} - {{- with .Values.agentk8sglue.podTemplate.nodeSelector}} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.agentk8sglue.podTemplate.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} diff --git a/charts/clearml/templates/deployment-agent.yaml b/charts/clearml/templates/deployment-agent.yaml deleted file mode 100644 index 116a7c1..0000000 --- a/charts/clearml/templates/deployment-agent.yaml +++ /dev/null @@ -1,122 +0,0 @@ -{{- range $key, $value := .Values.agentGroups }} -{{- with $value }} -{{- if .enabled }} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "clearml.fullname" $ }}-{{ .name }}-agent - labels: - {{- include "clearml.labels" $ | nindent 4 }} -spec: - replicas: {{ .replicaCount }} - strategy: - type: {{ .updateStrategy }} - selector: - matchLabels: - {{- include "clearml.selectorLabelsAgent" $ | nindent 6 }} - template: - metadata: - annotations: - checksum/secret: {{ include (print $.Template.BasePath "/secrets.yaml") $ | sha256sum }} - {{- with .podAnnotations }} - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "clearml.selectorLabelsAgent" $ | nindent 8 }} - spec: - volumes: - {{ if .clearmlConfig }} - - name: agent-clearml-conf-volume - secret: - secretName: {{ .name }}-conf - items: - - key: clearml.conf - path: clearml.conf - {{ end }} - initContainers: - - name: init-agent-{{ .name }} - image: "{{ .image.repository }}:{{ .image.tag | default $.Chart.AppVersion }}" - command: - - /bin/sh - - -c - - > - set -x; - while [ $(curl -sw '%{http_code}' "{{ include "clearml.serviceApi" $ }}/debug.ping" -o /dev/null) -ne 200 ] ; do - echo "waiting for apiserver" ; - sleep 5 ; - done - containers: - - name: {{ $.Chart.Name }}-{{ .name }} - image: "{{ .image.repository }}:{{ .image.tag }}" - imagePullPolicy: {{ .image.pullPolicy }} - securityContext: - privileged: true - resources: - limits: - nvidia.com/gpu: - {{ .nvidiaGpusPerAgent }} - env: - - name: CLEARML_API_HOST - value: {{ include "clearml.serviceApi" $ }} - - name: CLEARML_WEB_HOST - value: {{ include "clearml.serviceApp" $ }} - - name: CLEARML_FILES_HOST - value: {{ include "clearml.serviceFiles" $ }} - - name: CLEARML_AGENT_GIT_USER - value: {{ .clearmlGitUser}} - - name: CLEARML_AGENT_GIT_PASS - value: {{ .clearmlGitPassword}} - - name: AWS_ACCESS_KEY_ID - value: {{ .awsAccessKeyId}} - - name: AWS_SECRET_ACCESS_KEY - value: {{ .awsSecretAccessKey}} - - name: AWS_DEFAULT_REGION - value: {{ .awsDefaultRegion}} - - name: AZURE_STORAGE_ACCOUNT - value: {{ .azureStorageAccount}} - - name: AZURE_STORAGE_KEY - value: {{ .azureStorageKey}} - - name: CLEARML_API_ACCESS_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: tests_user_key - - name: CLEARML_API_SECRET_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: tests_user_secret - {{- if .extraEnvs }} - {{ toYaml .extraEnvs | nindent 10 }} - {{- end }} - command: - - /bin/sh - - -c - - "apt-get update ; - apt-get install -y curl python3-pip git; - python3 -m pip install -U pip ; - python3 -m pip install clearml-agent{{ .agentVersion}} ; - CLEARML_AGENT_K8S_HOST_MOUNT=/root/.clearml:/root/.clearml clearml-agent daemon --foreground --queue {{ .queues}}" - {{ if .clearmlConfig }} - volumeMounts: - - name: agent-clearml-conf-volume - mountPath: /root/clearml.conf - subPath: clearml.conf - readOnly: true - {{- end }} - {{- with .nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} -{{- end }} -{{- end }} diff --git a/charts/clearml/templates/deployment-agentk8s.yaml b/charts/clearml/templates/deployment-agentk8s.yaml deleted file mode 100644 index 66df885..0000000 --- a/charts/clearml/templates/deployment-agentk8s.yaml +++ /dev/null @@ -1,64 +0,0 @@ -{{- if .Values.agentk8sglue.enabled }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: "{{ include "clearml.fullname" . }}-k8sagent" - labels: - app: k8sagent -spec: - replicas: 1 - selector: - matchLabels: - app: k8sagent - template: - metadata: - labels: - app: k8sagent - spec: - containers: - - name: k8s-glue-container - image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}" - imagePullPolicy: IfNotPresent - command: ["/bin/bash", "-c", "export PATH=$PATH:$HOME/bin; source /root/.bashrc && /root/entrypoint.sh"] - volumeMounts: - - name: k8sagent-pod-template - mountPath: /root/template - env: - - name: CLEARML_API_HOST - value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}" - - name: CLEARML_WEB_HOST - value: "http://{{ include "clearml.fullname" . }}-webserver" - - name: CLEARML_FILES_HOST - value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}" - - name: K8S_GLUE_MAX_PODS - value: "{{.Values.agentk8sglue.maxPods}}" - - name: K8S_GLUE_QUEUE - value: "{{.Values.agentk8sglue.queue}}" - - name: K8S_GLUE_EXTRA_ARGS - value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml" - - name: K8S_DEFAULT_NAMESPACE - value: "{{ .Release.Namespace }}" - - name: CLEARML_API_ACCESS_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: apiserver_key - - name: CLEARML_API_SECRET_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: apiserver_secret - - name: CLEARML_WORKER_ID - value: "{{.Values.agentk8sglue.id}}" - - name: CLEARML_AGENT_UPDATE_REPO - value: "" - - name: FORCE_CLEARML_AGENT_REPO - value: "" - - name: CLEARML_DOCKER_IMAGE - value: "{{.Values.agentk8sglue.defaultDockerImage}}" - volumes: - - name: k8sagent-pod-template - configMap: - name: k8sagent-pod-template -{{- end }} - diff --git a/charts/clearml/templates/deployment-agentservices.yaml b/charts/clearml/templates/deployment-agentservices.yaml deleted file mode 100644 index e5fb302..0000000 --- a/charts/clearml/templates/deployment-agentservices.yaml +++ /dev/null @@ -1,106 +0,0 @@ -{{- if .Values.agentservices.enabled }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "clearml.fullname" . }}-agentservices - labels: - {{- include "clearml.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.agentservices.replicaCount }} - selector: - matchLabels: - {{- include "clearml.selectorLabelsAgentServices" . | nindent 6 }} - template: - metadata: - annotations: - checksum/secret: {{ include (print $.Template.BasePath "/secrets.yaml") . | sha256sum }} - {{- with .Values.agentservices.podAnnotations }} - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "clearml.selectorLabelsAgentServices" . | nindent 8 }} - spec: - volumes: - - name: agentservices-data - persistentVolumeClaim: - claimName: {{ include "clearml.fullname" . }}-agentservices-data - initContainers: - - name: init-agentservices - image: "{{ .Values.agentservices.image.repository }}:{{ .Values.agentservices.image.tag | default .Chart.AppVersion }}" - command: - - /bin/sh - - -c - - > - set -x; - while [ $(curl -sw '%{http_code}' "{{ include "clearml.serviceApi" $ }}/debug.ping" -o /dev/null) -ne 200 ] ; do - echo "waiting for apiserver" ; - sleep 5 ; - done - containers: - - name: {{ .Chart.Name }} - image: "{{ .Values.agentservices.image.repository }}:{{ .Values.agentservices.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.agentservices.image.pullPolicy }} - env: - - name: CLEARML_HOST_IP - value: {{ .Values.agentservices.clearmlHostIp }} - - name: CLEARML_API_HOST - value: {{ include "clearml.serviceApi" $ }} - - name: CLEARML_WEB_HOST - value: {{ .Values.agentservices.clearmlWebHost }} - - name: CLEARML_FILES_HOST - value: {{ .Values.agentservices.clearmlFilesHost }} - - name: CLEARML_AGENT_GIT_USER - value: {{ .Values.agentservices.clearmlGitUser }} - - name: CLEARML_AGENT_GIT_PASS - value: {{ .Values.agentservices.clearmlGitPassword }} - - name: CLEARML_AGENT_UPDATE_VERSION - value: {{ .Values.agentservices.agentVersion }} - - name: CLEARML_AGENT_DEFAULT_BASE_DOCKER - value: {{ .Values.agentservices.defaultBaseDocker }} - - name: AWS_ACCESS_KEY_ID - value: {{ .Values.agentservices.awsAccessKeyId }} - - name: AWS_SECRET_ACCESS_KEY - value: {{ .Values.agentservices.awsSecretAccessKey }} - - name: AWS_DEFAULT_REGION - value: {{ .Values.agentservices.awsDefaultRegion }} - - name: AZURE_STORAGE_ACCOUNT - value: {{ .Values.agentservices.azureStorageAccount }} - - name: AZURE_STORAGE_KEY - value: {{ .Values.agentservices.azureStorageKey }} - - name: GOOGLE_APPLICATION_CREDENTIALS - value: {{ .Values.agentservices.googleCredentials }} - - name: CLEARML_WORKER_ID - value: {{ .Values.agentservices.clearmlWorkerId }} - - name: CLEARML_API_ACCESS_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: tests_user_key - - name: CLEARML_API_SECRET_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: tests_user_secret - {{- if .Values.agentservices.extraEnvs }} - {{ toYaml .Values.agentservices.extraEnvs | nindent 10 }} - {{- end }} - args: - - agentservices - volumeMounts: - - name: agentservices-data - mountPath: /root/.clearml - resources: - {{- toYaml .Values.agentservices.resources | nindent 12 }} - {{- with .Values.agentservices.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.agentservices.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.agentservices.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} diff --git a/charts/clearml/templates/deployment-apiserver.yaml b/charts/clearml/templates/deployment-apiserver.yaml index 04ac474..15ac311 100644 --- a/charts/clearml/templates/deployment-apiserver.yaml +++ b/charts/clearml/templates/deployment-apiserver.yaml @@ -19,7 +19,14 @@ spec: labels: {{- include "clearml.selectorLabelsApiServer" . | nindent 8 }} spec: -{{- include "clearml.imagePullSecrets" . | indent 6 }} + {{- if .Values.imageCredentials.enabled }} + imagePullSecrets: + {{- if .Values.imageCredentials.existingSecret }} + - name: .Values.imageCredentials.existingSecret + {{- else }} + - name: clearml-agent-registry-key + {{- end }} + {{- end }} containers: - name: {{ .Chart.Name }} image: "{{ .Values.apiserver.image.repository }}:{{ .Values.apiserver.image.tag | default .Chart.AppVersion }}" diff --git a/charts/clearml/templates/deployment-fileserver.yaml b/charts/clearml/templates/deployment-fileserver.yaml index d920c57..88c6805 100644 --- a/charts/clearml/templates/deployment-fileserver.yaml +++ b/charts/clearml/templates/deployment-fileserver.yaml @@ -22,7 +22,14 @@ spec: - name: fileserver-data persistentVolumeClaim: claimName: {{ include "clearml.fullname" . }}-fileserver-data -{{- include "clearml.imagePullSecrets" . | indent 6 }} + {{- if .Values.imageCredentials.enabled }} + imagePullSecrets: + {{- if .Values.imageCredentials.existingSecret }} + - name: .Values.imageCredentials.existingSecret + {{- else }} + - name: clearml-agent-registry-key + {{- end }} + {{- end }} containers: - name: {{ .Chart.Name }} image: "{{ .Values.fileserver.image.repository }}:{{ .Values.fileserver.image.tag | default .Chart.AppVersion }}" diff --git a/charts/clearml/templates/deployment-webserver.yaml b/charts/clearml/templates/deployment-webserver.yaml index 87f3fa6..68880bc 100644 --- a/charts/clearml/templates/deployment-webserver.yaml +++ b/charts/clearml/templates/deployment-webserver.yaml @@ -18,7 +18,14 @@ spec: labels: {{- include "clearml.selectorLabelsWebServer" . | nindent 8 }} spec: -{{- include "clearml.imagePullSecrets" . | indent 6 }} + {{- if .Values.imageCredentials.enabled }} + imagePullSecrets: + {{- if .Values.imageCredentials.existingSecret }} + - name: .Values.imageCredentials.existingSecret + {{- else }} + - name: clearml-agent-registry-key + {{- end }} + {{- end }} containers: - name: {{ .Chart.Name }} image: "{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Chart.AppVersion }}" diff --git a/charts/clearml/templates/pvc-agentservices.yaml b/charts/clearml/templates/pvc-agentservices.yaml deleted file mode 100644 index eb1511d..0000000 --- a/charts/clearml/templates/pvc-agentservices.yaml +++ /dev/null @@ -1,17 +0,0 @@ -{{- if .Values.agentservices.enabled }} -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: {{ include "clearml.fullname" . }}-agentservices-data - labels: - {{- include "clearml.labels" . | nindent 4 }} -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.agentservices.storage.data.size | quote }} - {{- if .Values.agentservices.storage.data.class -}} - storageClassName: {{ .Values.agentservices.storage.data.class | quote }} - {{- end -}} -{{- end }} diff --git a/charts/clearml/templates/rbac-agentk8s.yaml b/charts/clearml/templates/rbac-agentk8s.yaml deleted file mode 100644 index e677b81..0000000 --- a/charts/clearml/templates/rbac-agentk8s.yaml +++ /dev/null @@ -1,25 +0,0 @@ -{{- if .Values.agentk8sglue.enabled }} -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: k8sagent-pods-access -rules: - - apiGroups: - - "" - resources: - - pods - verbs: ["get", "list", "watch", "create", "patch", "delete"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: k8sagent-pods-access -subjects: - - kind: ServiceAccount - name: default - namespace: {{ .Release.Namespace }} -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: k8sagent-pods-access -{{- end }} diff --git a/charts/clearml/templates/secret-agent.yaml b/charts/clearml/templates/secret-agent.yaml deleted file mode 100644 index adea851..0000000 --- a/charts/clearml/templates/secret-agent.yaml +++ /dev/null @@ -1,13 +0,0 @@ -{{- range $key, $value := .Values.agentGroups }} -{{- with $value }} ---- -{{ if .clearmlConfig }} -apiVersion: v1 -kind: Secret -metadata: - name: {{ .name }}-conf -data: - clearml.conf: {{ .clearmlConfig | b64enc }} -{{ end }} -{{- end }} -{{- end }} diff --git a/charts/clearml/values.yaml b/charts/clearml/values.yaml index 2afe314..4600328 100755 --- a/charts/clearml/values.yaml +++ b/charts/clearml/values.yaml @@ -1,6 +1,19 @@ -# global: -# imagePullSecrets: -# - docker-cfg +# -- Private image registry configuration +imageCredentials: + # -- Use private authentication mode + enabled: false + # -- If this is set, chart will not generate a secret but will use what is defined here + existingSecret: "" + # -- Registry name + registry: docker.io + # -- Registry username + username: someone + # -- Registry password + password: pwd + # -- Email + email: someone@host.com + +# -- ClearMl generic configurations clearml: defaultCompany: "d1bd92a3b039400cbafc60a7a5b1e52b" ingress: @@ -67,7 +80,7 @@ apiserver: image: repository: "allegroai/clearml" pullPolicy: IfNotPresent - tag: "1.4.0" + tag: "1.5.0" extraEnvs: [] @@ -136,7 +149,7 @@ fileserver: image: repository: "allegroai/clearml" pullPolicy: IfNotPresent - tag: "1.4.0" + tag: "1.5.0" extraEnvs: [] @@ -181,7 +194,7 @@ webserver: image: repository: "allegroai/clearml" pullPolicy: IfNotPresent - tag: "1.4.0" + tag: "1.5.0" podAnnotations: {} @@ -205,164 +218,6 @@ webserver: additionalConfigs: {} -agentservices: - enabled: false - clearmlHostIp: null - agentVersion: "" - clearmlWebHost: null - clearmlFilesHost: null - clearmlGitUser: null - clearmlGitPassword: null - awsAccessKeyId: null - awsSecretAccessKey: null - awsDefaultRegion: null - azureStorageAccount: null - azureStorageKey: null - googleCredentials: null - clearmlWorkerId: "clearml-services" - - replicaCount: 1 - - image: - repository: "allegroai/clearml-agent-services" - pullPolicy: IfNotPresent - tag: "latest" - - extraEnvs: [] - - podAnnotations: {} - - resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - - nodeSelector: {} - - tolerations: [] - - affinity: {} - - storage: - data: - class: "" - size: 50Gi - -agentGroups: - agent-group-cpu: - enabled: false - name: agent-group-cpu - replicaCount: 1 - updateStrategy: Recreate - nvidiaGpusPerAgent: 0 - agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1") - queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default") - clearmlGitUser: null - clearmlGitPassword: null - clearmlAccessKey: null - clearmlSecretKey: null - awsAccessKeyId: null - awsSecretAccessKey: null - awsDefaultRegion: null - azureStorageAccount: null - azureStorageKey: null - clearmlConfig: |- - sdk { - } - - image: - repository: "ubuntu" - pullPolicy: IfNotPresent - tag: "18.04" - - extraEnvs: [] - - podAnnotations: {} - - nodeSelector: {} - - tolerations: [] - - affinity: {} - - agent-group-gpu: - enabled: false - name: agent-group-gpu - replicaCount: 0 - updateStrategy: Recreate - nvidiaGpusPerAgent: 1 - agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1") - queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default") - clearmlGitUser: null - clearmlGitPassword: null - clearmlAccessKey: null - clearmlSecretKey: null - awsAccessKeyId: null - awsSecretAccessKey: null - awsDefaultRegion: null - azureStorageAccount: null - azureStorageKey: null - clearmlConfig: |- - sdk { - } - - image: - repository: "nvidia/cuda" - pullPolicy: IfNotPresent - tag: "11.0-base-ubuntu18.04" - - podAnnotations: {} - - nodeSelector: {} - - tolerations: [] - - affinity: {} - -# This agent will spawn queued experiments in new pods, a good use case is to combine this with -# GPU autoscaling nodes. -# https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue -agentk8sglue: - enabled: true - image: - repository: "allegroai/clearml-agent-k8s" - tag: "base-1.21" - serviceAccountName: default - maxPods: 10 - defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04 # default docker image that is spawned as new pod - queue: default - id: k8s-agent - podTemplate: - volumes: [] - # - name: "yourvolume" - # path: "/yourpath" - env: [] - # # to setup access to private repo, setup secret with git credentials: - # - name: CLEARML_AGENT_GIT_USER - # value: mygitusername - # - name: CLEARML_AGENT_GIT_PASS - # valueFrom: - # secretKeyRef: - # name: git-password - # key: git-password - resources: {} - # limits: - # nvidia.com/gpu: 1 - tolerations: [] - # - key: "nvidia.com/gpu" - # operator: Exists - # effect: "NoSchedule" - nodeSelector: {} - # fleet: gpu-nodes - - externalServices: # -- Existing ElasticSearch Hostname to use if elasticsearch.enabled is false elasticsearchHost: ""