From dd1c201eebeba2d1e5b6807b5bd653cef5005305 Mon Sep 17 00:00:00 2001 From: Valeriano Manassero <14011549+valeriano-manassero@users.noreply.github.com> Date: Mon, 13 Feb 2023 08:17:53 +0100 Subject: [PATCH 1/9] Avoid collisions in internal helper variable naming (#154) * Fixed: helper variable rename to avoid collisions * Changed: bump version --- charts/clearml-agent/Chart.yaml | 4 +-- charts/clearml-agent/README.md | 2 +- charts/clearml-agent/templates/_helpers.tpl | 24 +++++++-------- .../templates/agentk8sglue-configmap.yaml | 28 ++++++++--------- .../templates/agentk8sglue-deployment.yaml | 30 +++++++++---------- .../templates/agentk8sglue-rbac.yaml | 18 +++++------ .../templates/clearml-secrets.yaml | 4 +-- .../templates/service-secret.yaml | 6 ++-- .../templates/service-sessions.yaml | 2 +- 9 files changed, 59 insertions(+), 59 deletions(-) diff --git a/charts/clearml-agent/Chart.yaml b/charts/clearml-agent/Chart.yaml index 9844f41..def6d6c 100644 --- a/charts/clearml-agent/Chart.yaml +++ b/charts/clearml-agent/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: clearml-agent description: MLOps platform Task running agent type: application -version: "3.3.1" +version: "3.3.2" appVersion: "1.24" kubeVersion: ">= 1.21.0-0 < 1.27.0-0" home: https://clear.ml @@ -21,4 +21,4 @@ keywords: annotations: artifacthub.io/changes: | - kind: fixed - description: typo on existingSecret references + description: clearml agent internal helper variable name diff --git a/charts/clearml-agent/README.md b/charts/clearml-agent/README.md index 3c5ae9d..991608d 100644 --- a/charts/clearml-agent/README.md +++ b/charts/clearml-agent/README.md @@ -1,6 +1,6 @@ # ClearML Kubernetes Agent -![Version: 3.3.1](https://img.shields.io/badge/Version-3.3.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square) +![Version: 3.3.2](https://img.shields.io/badge/Version-3.3.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square) MLOps platform Task running agent diff --git a/charts/clearml-agent/templates/_helpers.tpl b/charts/clearml-agent/templates/_helpers.tpl index 3239bae..fbed08c 100644 --- a/charts/clearml-agent/templates/_helpers.tpl +++ b/charts/clearml-agent/templates/_helpers.tpl @@ -1,23 +1,23 @@ {{/* Expand the name of the chart. */}} -{{- define "clearml.name" -}} +{{- define "clearmlAgent.name" -}} {{- .Release.Name | trunc 59 | trimSuffix "-" }} {{- end }} {{/* Create chart name and version as used by the chart label. */}} -{{- define "clearml.chart" -}} +{{- define "clearmlAgent.chart" -}} {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 59 | trimSuffix "-" }} {{- end }} {{/* Common labels */}} -{{- define "clearml.labels" -}} -helm.sh/chart: {{ include "clearml.chart" . }} -{{ include "clearml.selectorLabels" . }} +{{- define "clearmlAgent.labels" -}} +helm.sh/chart: {{ include "clearmlAgent.chart" . }} +{{ include "clearmlAgent.selectorLabels" . }} {{- if .Chart.AppVersion }} app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} @@ -30,7 +30,7 @@ app.kubernetes.io/managed-by: {{ .Release.Service }} {{/* Common annotations */}} -{{- define "clearml.annotations" -}} +{{- define "clearmlAgent.annotations" -}} {{- if $.Values.agentk8sglue.annotations }} {{ toYaml $.Values.agentk8sglue.annotations }} {{- end }} @@ -39,8 +39,8 @@ Common annotations {{/* Selector labels */}} -{{- define "clearml.selectorLabels" -}} -app.kubernetes.io/name: {{ include "clearml.name" . }} +{{- define "clearmlAgent.selectorLabels" -}} +app.kubernetes.io/name: {{ include "clearmlAgent.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} @@ -48,18 +48,18 @@ app.kubernetes.io/instance: {{ .Release.Name }} Selector labels (agentk8sglue) */}} {{- define "agentk8sglue.selectorLabels" -}} -app.kubernetes.io/name: {{ include "clearml.name" . }} -app.kubernetes.io/instance: {{ include "clearml.name" . }} +app.kubernetes.io/name: {{ include "clearmlAgent.name" . }} +app.kubernetes.io/instance: {{ include "clearmlAgent.name" . }} {{- end }} {{/* Create the name of the service account to use */}} -{{- define "clearml.serviceAccountName" -}} +{{- define "clearmlAgent.serviceAccountName" -}} {{- if .Values.agentk8sglue.serviceExistingAccountName }} {{- .Values.agentk8sglue.serviceExistingAccountName }} {{- else }} -{{- include "clearml.name" . }}-sa +{{- include "clearmlAgent.name" . }}-sa {{- end }} {{- end }} diff --git a/charts/clearml-agent/templates/agentk8sglue-configmap.yaml b/charts/clearml-agent/templates/agentk8sglue-configmap.yaml index aecde2e..df064d0 100644 --- a/charts/clearml-agent/templates/agentk8sglue-configmap.yaml +++ b/charts/clearml-agent/templates/agentk8sglue-configmap.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: ConfigMap metadata: - name: {{ include "clearml.name" . }}-pt + name: {{ include "clearmlAgent.name" . }}-pt data: {{- if .Values.enterpriseFeatures.enabled }} template.yaml: | @@ -30,7 +30,7 @@ data: {{- if $.Values.imageCredentials.existingSecret }} - name: {{ $.Values.imageCredentials.existingSecret }} {{- else }} - - name: {{ include "clearml.name" $ }}-ark + - name: {{ include "clearmlAgent.name" $ }}-ark {{- end }} {{- end }} {{- if $value.templateOverrides.schedulerName }} @@ -66,14 +66,14 @@ data: {{- if $value.templateOverrides.fileMounts }} - name: filemounts secret: - secretName: {{ include "clearml.name" $ }}-{{ $key }}-fm + secretName: {{ include "clearmlAgent.name" $ }}-{{ $key }}-fm {{- else if $.Values.agentk8sglue.basePodTemplate.fileMounts }} - name: filemounts secret: - secretName: {{ include "clearml.name" $ }}-fm + secretName: {{ include "clearmlAgent.name" $ }}-fm {{- end }} {{- if not $.Values.enterpriseFeatures.serviceAccountClusterAccess }} - serviceAccountName: {{ include "clearml.serviceAccountName" $ }} + serviceAccountName: {{ include "clearmlAgent.serviceAccountName" $ }} {{- end }} {{- if $value.templateOverrides.initContainers }} initContainers: @@ -126,7 +126,7 @@ data: {{- if $.Values.clearml.existingAgentk8sglueSecret }} name: {{ $.Values.clearml.existingAgentk8sglueSecret }} {{- else }} - name: {{ include "clearml.name" $ }}-ac + name: {{ include "clearmlAgent.name" $ }}-ac {{- end }} key: agentk8sglue_key - name: CLEARML_API_SECRET_KEY @@ -135,7 +135,7 @@ data: {{- if $.Values.clearml.existingAgentk8sglueSecret }} name: {{ $.Values.clearml.existingAgentk8sglueSecret }} {{- else }} - name: {{ include "clearml.name" $ }}-ac + name: {{ include "clearmlAgent.name" $ }}-ac {{- end }} key: agentk8sglue_secret {{- end }} @@ -188,9 +188,9 @@ data: {{- range $key, $value := $.Values.enterpriseFeatures.queues }} {{ $key }}: {{- if $value.templateOverrides.fileMounts }} - - {{ include "clearml.name" $ }}-{{ $key }}-fm + - {{ include "clearmlAgent.name" $ }}-{{ $key }}-fm {{- else if $.Values.agentk8sglue.basePodTemplate.fileMounts }} - - {{ include "clearml.name" $ }}-fm + - {{ include "clearmlAgent.name" $ }}-fm {{- end }} {{- end }} {{- else }} @@ -208,14 +208,14 @@ data: {{- if .Values.imageCredentials.existingSecret }} - name: {{ .Values.imageCredentials.existingSecret }} {{- else }} - - name: name: {{ include "clearml.name" $ }}-ark + - name: name: {{ include "clearmlAgent.name" $ }}-ark {{- end }} {{- end }} {{- with .Values.agentk8sglue.basePodTemplate.volumes }} volumes: {{- toYaml . | nindent 8 }} {{- end }} - serviceAccountName: {{ include "clearml.serviceAccountName" $ }} + serviceAccountName: {{ include "clearmlAgent.serviceAccountName" $ }} containers: - resources: {{- toYaml .Values.agentk8sglue.basePodTemplate.resources | nindent 10 }} @@ -238,7 +238,7 @@ data: {{- if .Values.clearml.existingAgentk8sglueSecret }} name: {{ .Values.clearml.existingAgentk8sglueSecret }} {{- else }} - name: {{ include "clearml.name" . }}-ac + name: {{ include "clearmlAgent.name" . }}-ac {{- end }} key: agentk8sglue_key - name: CLEARML_API_SECRET_KEY @@ -247,7 +247,7 @@ data: {{- if .Values.clearml.existingAgentk8sglueSecret }} name: {{ .Values.clearml.existingAgentk8sglueSecret }} {{- else }} - name: {{ include "clearml.name" . }}-ac + name: {{ include "clearmlAgent.name" . }}-ac {{- end }} key: agentk8sglue_secret {{- if .Values.agentk8sglue.basePodTemplate.env }} @@ -274,7 +274,7 @@ data: metadata: name: clearml-session-{{ . }} labels: - {{- include "clearml.labels" $ | nindent 8 }} + {{- include "clearmlAgent.labels" $ | nindent 8 }} {{- with $.Values.sessions.svcAnnotations }} annotations: {{- toYaml . | nindent 8 }} diff --git a/charts/clearml-agent/templates/agentk8sglue-deployment.yaml b/charts/clearml-agent/templates/agentk8sglue-deployment.yaml index a3f9f8d..f54b487 100644 --- a/charts/clearml-agent/templates/agentk8sglue-deployment.yaml +++ b/charts/clearml-agent/templates/agentk8sglue-deployment.yaml @@ -1,11 +1,11 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: {{ include "clearml.name" . }} + name: {{ include "clearmlAgent.name" . }} labels: - {{- include "clearml.labels" . | nindent 4 }} + {{- include "clearmlAgent.labels" . | nindent 4 }} annotations: - {{- include "clearml.annotations" . | nindent 4 }} + {{- include "clearmlAgent.annotations" . | nindent 4 }} spec: replicas: {{ .Values.agentk8sglue.replicaCount }} selector: @@ -15,19 +15,19 @@ spec: metadata: annotations: checksum/config: {{ printf "%s%s" .Values.clearml .Values.agentk8sglue | sha256sum }} - {{- include "clearml.annotations" . | nindent 8 }} + {{- include "clearmlAgent.annotations" . | nindent 8 }} labels: - {{- include "clearml.labels" . | nindent 8 }} + {{- include "clearmlAgent.labels" . | nindent 8 }} spec: {{- if .Values.imageCredentials.enabled }} imagePullSecrets: {{- if .Values.imageCredentials.existingSecret }} - name: {{ .Values.imageCredentials.existingSecret }} {{- else }} - - name: {{ include "clearml.name" . }}-ark + - name: {{ include "clearmlAgent.name" . }}-ark {{- end }} {{- end }} - serviceAccountName: {{ include "clearml.serviceAccountName" . }} + serviceAccountName: {{ include "clearmlAgent.serviceAccountName" . }} securityContext: {{ toYaml .Values.agentk8sglue.securityContext | nindent 8 }} initContainers: - name: init-k8s-glue @@ -68,7 +68,7 @@ spec: export PATH=$PATH:$HOME/bin; source /root/.bashrc && /root/entrypoint.sh volumeMounts: - - name: {{ include "clearml.name" . }}-pt + - name: {{ include "clearmlAgent.name" . }}-pt mountPath: /root/template {{ if .Values.clearml.clearmlConfig }} - name: k8sagent-clearml-conf-volume @@ -122,15 +122,15 @@ spec: - name: CLEARML_API_ACCESS_KEY valueFrom: secretKeyRef: - name: {{ include "clearml.name" . }}-ac + name: {{ include "clearmlAgent.name" . }}-ac key: agentk8sglue_key - name: CLEARML_API_SECRET_KEY valueFrom: secretKeyRef: - name: {{ include "clearml.name" . }}-ac + name: {{ include "clearmlAgent.name" . }}-ac key: agentk8sglue_secret - name: CLEARML_WORKER_ID - value: {{ include "clearml.name" . }} + value: {{ include "clearmlAgent.name" . }} - name: CLEARML_AGENT_UPDATE_REPO value: "" - name: FORCE_CLEARML_AGENT_REPO @@ -186,13 +186,13 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} volumes: - - name: {{ include "clearml.name" . }}-pt + - name: {{ include "clearmlAgent.name" . }}-pt configMap: - name: {{ include "clearml.name" . }}-pt + name: {{ include "clearmlAgent.name" . }}-pt {{ if .Values.clearml.clearmlConfig }} - name: k8sagent-clearml-conf-volume secret: - secretName: {{ include "clearml.name" . }}-ac + secretName: {{ include "clearmlAgent.name" . }}-ac items: - key: clearml.conf path: clearml.conf @@ -203,5 +203,5 @@ spec: {{ if .Values.agentk8sglue.fileMounts }} - name: filemounts secret: - secretName: {{ include "clearml.name" . }}-afm + secretName: {{ include "clearmlAgent.name" . }}-afm {{- end }} diff --git a/charts/clearml-agent/templates/agentk8sglue-rbac.yaml b/charts/clearml-agent/templates/agentk8sglue-rbac.yaml index 7b19d9c..549ca3a 100644 --- a/charts/clearml-agent/templates/agentk8sglue-rbac.yaml +++ b/charts/clearml-agent/templates/agentk8sglue-rbac.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: ServiceAccount metadata: - name: {{ include "clearml.serviceAccountName" . }} + name: {{ include "clearmlAgent.serviceAccountName" . }} namespace: {{ .Release.Namespace }} {{- end }} {{- if .Values.enterpriseFeatures.serviceAccountClusterAccess }} @@ -10,7 +10,7 @@ metadata: apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: {{ include "clearml.name" . }}-kpa + name: {{ include "clearmlAgent.name" . }}-kpa rules: - apiGroups: - "" @@ -28,21 +28,21 @@ rules: apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: {{ include "clearml.name" . }}-kpa + name: {{ include "clearmlAgent.name" . }}-kpa subjects: - kind: ServiceAccount - name: {{ include "clearml.serviceAccountName" . }} + name: {{ include "clearmlAgent.serviceAccountName" . }} namespace: {{ .Release.Namespace }} roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: {{ include "clearml.name" . }}-kpa + name: {{ include "clearmlAgent.name" . }}-kpa {{- else }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - name: {{ include "clearml.name" . }}-kpa + name: {{ include "clearmlAgent.name" . }}-kpa rules: - apiGroups: - "" @@ -60,13 +60,13 @@ rules: apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: - name: {{ include "clearml.name" . }}-kpa + name: {{ include "clearmlAgent.name" . }}-kpa subjects: - kind: ServiceAccount - name: {{ include "clearml.serviceAccountName" . }} + name: {{ include "clearmlAgent.serviceAccountName" . }} namespace: {{ .Release.Namespace }} roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: {{ include "clearml.name" . }}-kpa + name: {{ include "clearmlAgent.name" . }}-kpa {{- end }} diff --git a/charts/clearml-agent/templates/clearml-secrets.yaml b/charts/clearml-agent/templates/clearml-secrets.yaml index 718ba59..b4348f8 100644 --- a/charts/clearml-agent/templates/clearml-secrets.yaml +++ b/charts/clearml-agent/templates/clearml-secrets.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Secret metadata: - name: {{ include "clearml.name" . }}-ac + name: {{ include "clearmlAgent.name" . }}-ac data: agentk8sglue_key: {{ .Values.clearml.agentk8sglueKey | b64enc }} agentk8sglue_secret: {{ .Values.clearml.agentk8sglueSecret | b64enc }} @@ -12,7 +12,7 @@ data: apiVersion: v1 kind: Secret metadata: - name: {{ include "clearml.name" . }}-ark + name: {{ include "clearmlAgent.name" . }}-ark type: kubernetes.io/dockerconfigjson data: .dockerconfigjson: {{ template "imagePullSecret" . }} diff --git a/charts/clearml-agent/templates/service-secret.yaml b/charts/clearml-agent/templates/service-secret.yaml index 8434a0c..40bcb36 100644 --- a/charts/clearml-agent/templates/service-secret.yaml +++ b/charts/clearml-agent/templates/service-secret.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: Secret metadata: - name: {{ include "clearml.name" . }}-afm + name: {{ include "clearmlAgent.name" . }}-afm data: {{- range .Values.agentk8sglue.fileMounts }} {{ .name }}: {{ .fileContent | b64enc }} @@ -14,7 +14,7 @@ data: apiVersion: v1 kind: Secret metadata: - name: {{ include "clearml.name" . }}-fm + name: {{ include "clearmlAgent.name" . }}-fm data: {{- range .Values.agentk8sglue.basePodTemplate.fileMounts }} {{ .name }}: {{ .fileContent | b64enc }} @@ -26,7 +26,7 @@ data: apiVersion: v1 kind: Secret metadata: - name: {{ include "clearml.name" $ }}-{{ $key }}-fm + name: {{ include "clearmlAgent.name" $ }}-{{ $key }}-fm data: {{- range .templateOverrides.fileMounts }} {{ .name }}: {{ .fileContent | b64enc }} diff --git a/charts/clearml-agent/templates/service-sessions.yaml b/charts/clearml-agent/templates/service-sessions.yaml index 3602291..87c1b29 100644 --- a/charts/clearml-agent/templates/service-sessions.yaml +++ b/charts/clearml-agent/templates/service-sessions.yaml @@ -7,7 +7,7 @@ kind: Service metadata: name: clearml-session-{{ . }} labels: - {{- include "clearml.labels" $ | nindent 4 }} + {{- include "clearmlAgent.labels" $ | nindent 4 }} {{- with $.Values.sessions.svcAnnotations }} annotations: {{- toYaml . | nindent 4 }} From a4f77c624df2d8c626edf3d3e541f7f4578a5e1b Mon Sep 17 00:00:00 2001 From: Valeriano Manassero <14011549+valeriano-manassero@users.noreply.github.com> Date: Mon, 13 Feb 2023 08:58:08 +0100 Subject: [PATCH 2/9] Create inactive-issues.yaml --- .github/workflows/inactive-issues.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .github/workflows/inactive-issues.yaml diff --git a/.github/workflows/inactive-issues.yaml b/.github/workflows/inactive-issues.yaml new file mode 100644 index 0000000..269e65c --- /dev/null +++ b/.github/workflows/inactive-issues.yaml @@ -0,0 +1,22 @@ +name: Close inactive issues +on: + schedule: + - cron: "30 1 * * *" + +jobs: + close-issues: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v7 + with: + days-before-issue-stale: 14 + days-before-issue-close: 7 + stale-issue-label: "stale" + stale-issue-message: "This issue is stale because it has been open for 14 days with no activity." + close-issue-message: "This issue was closed because it has been inactive for 7 days since being marked as stale." + days-before-pr-stale: -1 + days-before-pr-close: -1 + repo-token: ${{ secrets.GITHUB_TOKEN }} From a29a144119338491f05e0ef6e462af2fdae302fc Mon Sep 17 00:00:00 2001 From: Valeriano Manassero <14011549+valeriano-manassero@users.noreply.github.com> Date: Mon, 13 Feb 2023 12:22:01 +0100 Subject: [PATCH 3/9] Changed: redis cluster configuration for production (#156) --- charts/clearml/values-production.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/charts/clearml/values-production.yaml b/charts/clearml/values-production.yaml index 553a8b6..95f8eca 100755 --- a/charts/clearml/values-production.yaml +++ b/charts/clearml/values-production.yaml @@ -16,6 +16,28 @@ webserver: ingress: enabled: true hostName: "app.clearml.127-0-0-1.nip.io" +redis: + master: + name: "{{ .Release.Name }}-redis" + persistence: + enabled: true + accessModes: + - ReadWriteOnce + size: 5Gi + ## If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner + storageClass: null + slave: + persistence: + enabled: true + accessModes: + - ReadWriteOnce + size: 5Gi + ## If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner + storageClass: null + cluster: + enabled: true + sentinel: + enabled: true mongodb: enabled: true architecture: replicaset From 97550c720f15f7771f8b2935726a362ca437c62e Mon Sep 17 00:00:00 2001 From: Valeriano Manassero <14011549+valeriano-manassero@users.noreply.github.com> Date: Tue, 14 Feb 2023 08:42:26 +0100 Subject: [PATCH 4/9] Fix cookiename availability (#158) * Fixed: cookieName availability * Changed: bump up version --- charts/clearml/Chart.yaml | 6 +++--- charts/clearml/README.md | 2 +- charts/clearml/templates/apiserver-deployment.yaml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/charts/clearml/Chart.yaml b/charts/clearml/Chart.yaml index 0cffa95..703ce76 100644 --- a/charts/clearml/Chart.yaml +++ b/charts/clearml/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: clearml description: MLOps platform type: application -version: "5.6.0" +version: "5.6.1" appVersion: "1.9.2" kubeVersion: ">= 1.21.0-0 < 1.27.0-0" home: https://clear.ml @@ -32,5 +32,5 @@ dependencies: condition: elasticsearch.enabled annotations: artifacthub.io/changes: | - - kind: added - description: multi host external elasticsearch support + - kind: fixed + description: custom cookieName always available diff --git a/charts/clearml/README.md b/charts/clearml/README.md index 7be6c9b..babdddf 100644 --- a/charts/clearml/README.md +++ b/charts/clearml/README.md @@ -1,6 +1,6 @@ # ClearML Ecosystem for Kubernetes -![Version: 5.6.0](https://img.shields.io/badge/Version-5.6.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.9.2](https://img.shields.io/badge/AppVersion-1.9.2-informational?style=flat-square) +![Version: 5.6.1](https://img.shields.io/badge/Version-5.6.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.9.2](https://img.shields.io/badge/AppVersion-1.9.2-informational?style=flat-square) MLOps platform diff --git a/charts/clearml/templates/apiserver-deployment.yaml b/charts/clearml/templates/apiserver-deployment.yaml index 44ad8d2..dfa7214 100644 --- a/charts/clearml/templates/apiserver-deployment.yaml +++ b/charts/clearml/templates/apiserver-deployment.yaml @@ -110,9 +110,9 @@ spec: value: /opt/clearml/config - name: CLEARML__apiserver__default_company_name value: "{{ .Values.clearml.defaultCompany }}" - {{- if not (eq .Values.clearml.cookieDomain "") }} - name: CLEARML__APISERVER__AUTH__SESSION_AUTH_COOKIE_NAME value: {{ .Values.clearml.cookieName }} + {{- if .Values.clearml.cookieDomain }} - name: CLEARML__APISERVER__AUTH__COOKIES__DOMAIN value: ".{{ .Values.clearml.cookieDomain }}" {{- end }} From 3075f5e28093bf169450193eee8abb1c6db1604c Mon Sep 17 00:00:00 2001 From: Valeriano Manassero <14011549+valeriano-manassero@users.noreply.github.com> Date: Tue, 14 Feb 2023 08:44:04 +0100 Subject: [PATCH 5/9] 157 improve documentation (#159) * Changed: updated installation guide * Fixed: typo in copy and paste * Changed: updated install guide * Fixed: use relative path --- INSTALL.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 33 ++++++++----------------------- 2 files changed, 65 insertions(+), 25 deletions(-) create mode 100644 INSTALL.md diff --git a/INSTALL.md b/INSTALL.md new file mode 100644 index 0000000..d152daf --- /dev/null +++ b/INSTALL.md @@ -0,0 +1,57 @@ +# ClearML Helm Charts Installation guide + +## Requirements + +### Setup a Kubernetes Cluster + +For setting up Kubernetes on various platforms refer to the Kubernetes [getting started guide](http://kubernetes.io/docs/getting-started-guides/). + +#### Setup a single node LOCAL Kubernetes on laptop/desktop (development) + +For setting up Kubernetes on your laptop/desktop we suggest [kind](https://kind.sigs.k8s.io). + +#### [Kubernetes Tanzu users only] Additional setup requirements + +For setting up Clear.ML on a Tanzu cluster, check [prerequisites](https://github.com/allegroai/clearml-helm-charts/tree/main/platform-specific-configs/tanzu). + +#### [Kubernetes Openshift users only] Additional setup requirements + +For setting up Clear.ML on a Openshift cluster, check [prerequisites](https://github.com/allegroai/clearml-helm-charts/tree/main/platform-specific-configs/openshift). + +### Install Helm + +Helm is a tool for managing Kubernetes charts. Charts are packages of pre-configured Kubernetes resources. + +To install Helm, refer to the [Helm install guide](https://github.com/helm/helm#install) and ensure that the `helm` binary is in the `PATH` of your shell. + +## Helm charts installation + +### Helm Repo + +```bash +$ helm repo add allegroai https://allegroai.github.io/clearml-helm-charts +$ helm repo update +``` +### ClearML server ecosystem + +```bash +$ helm install clearml allegroai/clearml +``` + +### ClearML agent + +Agent is always related a ClearML server ecosystem (by default using `app.clear.ml` public service but can be on same or another Kubernetes cluster or a single server installation). + +On ClearML UI, Settings -> Workspace and Create new Credentials. + +In following Helm chart install command: + +* set ACCESSKEY to resuted credentials access_key +* set SECRETKEY to resuted credentials secret_key +* set APIERVERURL to resuted credentials api_server +* set FILESSERVERURL to resuted credentials files_server +* set WEBSERVERURL to resuted credentials web_server + +```bash +$ helm install clearml-agent allegroai/clearml-agent --set clearml.agentk8sglueKey=ACCESSKEY --set clearml.agentk8sglueSecret=SECRETKEY --set agentk8sglue.apiServerUrlReference=APISERVERURL --set agentk8sglue.fileServerUrlReference=FILESERVERURL --set agentk8sglue.webServerUrlReference=WEBSERVERURL +``` diff --git a/README.md b/README.md index 0105be6..2f2915a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# ClearML Helm Charts Library for Kubernetes +# ClearML Helm Charts for Kubernetes ## Auto-Magical Experiment Manager & Version Control for AI @@ -23,7 +23,11 @@ Use this repository to deploy **clearml-server** on Kubernetes clusters. ## Provided in this repository -### [All around Helm Chart](https://github.com/allegroai/clearml-helm-charts/tree/main/charts/clearml) +### [ClearML server chart](https://github.com/allegroai/clearml-helm-charts/tree/main/charts/clearml) + +### [ClearML agent chart](https://github.com/allegroai/clearml-helm-charts/tree/main/charts/clearml-agent) + +### [ClearML serving chart](https://github.com/allegroai/clearml-helm-charts/tree/main/charts/clearml-serving) ## Who We Are @@ -40,30 +44,9 @@ will always upgrade with you. Apache License, Version 2.0, (see the [LICENSE](https://www.apache.org/licenses/LICENSE-2.0) for more information) -## Requirements +## Installation guide -### Setup a Kubernetes Cluster - -For setting up Kubernetes on various platforms refer to the Kubernetes [getting started guide](http://kubernetes.io/docs/getting-started-guides/). - -### Setup a single node LOCAL Kubernetes on laptop/desktop - -For setting up Kubernetes on your laptop/desktop we suggest [kind](https://kind.sigs.k8s.io). - -### Install Helm - -Helm is a tool for managing Kubernetes charts. Charts are packages of pre-configured Kubernetes resources. - -To install Helm, refer to the [Helm install guide](https://github.com/helm/helm#install) and ensure that the `helm` binary is in the `PATH` of your shell. - -## Usage - -```bash -$ helm repo add allegroai https://allegroai.github.io/clearml-helm-charts -$ helm repo update -$ helm search repo allegroai -$ helm install allegroai/ -``` +For installation instruction, follow related [Installation Guide](INSTALL.md). ## Documentation, Community & Support From 1f23bcf7cab89b27b4c03771482273788c12dbac Mon Sep 17 00:00:00 2001 From: Valeriano Manassero <14011549+valeriano-manassero@users.noreply.github.com> Date: Tue, 14 Feb 2023 16:31:27 +0100 Subject: [PATCH 6/9] 160 fileserver doesnt have an option to be with ephemeral storage (#164) * Added: fileserver emptyDir support * Changed: bump up version --- charts/clearml/Chart.yaml | 6 +++--- charts/clearml/README.md | 7 ++++--- charts/clearml/templates/fileserver-deployment.yaml | 5 +++++ charts/clearml/templates/fileserver-pvc.yaml | 2 ++ charts/clearml/values.yaml | 2 ++ 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/charts/clearml/Chart.yaml b/charts/clearml/Chart.yaml index 703ce76..e25f6ee 100644 --- a/charts/clearml/Chart.yaml +++ b/charts/clearml/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: clearml description: MLOps platform type: application -version: "5.6.1" +version: "5.7.0" appVersion: "1.9.2" kubeVersion: ">= 1.21.0-0 < 1.27.0-0" home: https://clear.ml @@ -32,5 +32,5 @@ dependencies: condition: elasticsearch.enabled annotations: artifacthub.io/changes: | - - kind: fixed - description: custom cookieName always available + - kind: added + description: fileserver support for emptyDir diff --git a/charts/clearml/README.md b/charts/clearml/README.md index babdddf..aba6e25 100644 --- a/charts/clearml/README.md +++ b/charts/clearml/README.md @@ -1,6 +1,6 @@ # ClearML Ecosystem for Kubernetes -![Version: 5.6.1](https://img.shields.io/badge/Version-5.6.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.9.2](https://img.shields.io/badge/AppVersion-1.9.2-informational?style=flat-square) +![Version: 5.7.0](https://img.shields.io/badge/Version-5.7.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.9.2](https://img.shields.io/badge/AppVersion-1.9.2-informational?style=flat-square) MLOps platform @@ -222,7 +222,7 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0` | externalServices.mongodbConnectionStringBackend | string | `""` | Existing MongoDB connection string for AUTH to use if mongodb.enabled is false | | externalServices.redisHost | string | `""` | Existing Redis Hostname to use if redis.enabled is false | | externalServices.redisPort | int | `6379` | Existing Redis Port to use if redis.enabled is false | -| fileserver | object | `{"affinity":{},"enabled":true,"extraEnvs":[],"image":{"pullPolicy":"IfNotPresent","repository":"allegroai/clearml","tag":"1.9.2-317"},"ingress":{"annotations":{},"enabled":false,"hostName":"files.clearml.127-0-0-1.nip.io","ingressClassName":"","path":"/","tlsSecretName":""},"nodeSelector":{},"podAnnotations":{},"replicaCount":1,"resources":{"limits":{"cpu":"2000m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"256Mi"}},"securityContext":{},"service":{"nodePort":30081,"port":8081,"type":"NodePort"},"storage":{"data":{"accessMode":"ReadWriteOnce","class":"","existingPVC":"","size":"50Gi"}},"tolerations":[]}` | File Server configurations | +| fileserver | object | `{"affinity":{},"enabled":true,"extraEnvs":[],"image":{"pullPolicy":"IfNotPresent","repository":"allegroai/clearml","tag":"1.9.2-317"},"ingress":{"annotations":{},"enabled":false,"hostName":"files.clearml.127-0-0-1.nip.io","ingressClassName":"","path":"/","tlsSecretName":""},"nodeSelector":{},"podAnnotations":{},"replicaCount":1,"resources":{"limits":{"cpu":"2000m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"256Mi"}},"securityContext":{},"service":{"nodePort":30081,"port":8081,"type":"NodePort"},"storage":{"data":{"accessMode":"ReadWriteOnce","class":"","existingPVC":"","size":"50Gi"},"enabled":true},"tolerations":[]}` | File Server configurations | | fileserver.affinity | object | `{}` | File Server affinity setup | | fileserver.enabled | bool | `true` | Enable/Disable component deployment | | fileserver.extraEnvs | list | `[]` | File Server extra envrinoment variables | @@ -241,10 +241,11 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0` | fileserver.securityContext | object | `{}` | File Server pod security context | | fileserver.service | object | `{"nodePort":30081,"port":8081,"type":"NodePort"}` | File Server internal service configuration | | fileserver.service.nodePort | int | `30081` | If service.type set to NodePort, this will be set to service's nodePort field. If service.type is set to others, this field will be ignored | -| fileserver.storage | object | `{"data":{"accessMode":"ReadWriteOnce","class":"","existingPVC":"","size":"50Gi"}}` | File server persistence settings | +| fileserver.storage | object | `{"data":{"accessMode":"ReadWriteOnce","class":"","existingPVC":"","size":"50Gi"},"enabled":true}` | File server persistence settings | | fileserver.storage.data.accessMode | string | `"ReadWriteOnce"` | Access mode (must be ReadWriteMany if fileserver replica > 1) | | fileserver.storage.data.class | string | `""` | Storage class (use default if empty) | | fileserver.storage.data.existingPVC | string | `""` | If set, it uses an already existing PVC instead of dynamic provisioning | +| fileserver.storage.enabled | bool | `true` | If set to false no PVC is created and emptyDir is used | | fileserver.tolerations | list | `[]` | File Server tolerations setup | | imageCredentials | object | `{"email":"someone@host.com","enabled":false,"existingSecret":"","password":"pwd","registry":"docker.io","username":"someone"}` | Container registry configuration | | imageCredentials.email | string | `"someone@host.com"` | Email | diff --git a/charts/clearml/templates/fileserver-deployment.yaml b/charts/clearml/templates/fileserver-deployment.yaml index ef1dfe1..f20ca27 100644 --- a/charts/clearml/templates/fileserver-deployment.yaml +++ b/charts/clearml/templates/fileserver-deployment.yaml @@ -28,6 +28,7 @@ spec: {{- end }} {{- end }} volumes: + {{- if .Values.fileserver.storage.enabled }} {{- if .Values.fileserver.storage.data.existingPVC }} - name: fileserver-data persistentVolumeClaim: @@ -37,6 +38,10 @@ spec: persistentVolumeClaim: claimName: {{ include "fileserver.referenceName" . }}-data {{- end }} + {{- else }} + - name: fileserver-data + emptyDir: {} + {{- end }} securityContext: {{ toYaml .Values.fileserver.podSecurityContext | nindent 8 }} initContainers: - name: init-fileserver diff --git a/charts/clearml/templates/fileserver-pvc.yaml b/charts/clearml/templates/fileserver-pvc.yaml index 0e9d415..f6aa7d3 100644 --- a/charts/clearml/templates/fileserver-pvc.yaml +++ b/charts/clearml/templates/fileserver-pvc.yaml @@ -1,4 +1,5 @@ {{- if .Values.fileserver.enabled }} +{{- if .Values.fileserver.storage.enabled }} {{- if not .Values.fileserver.storage.data.existingPVC }} kind: PersistentVolumeClaim apiVersion: v1 @@ -17,3 +18,4 @@ spec: {{- end -}} {{- end }} {{- end }} +{{- end }} diff --git a/charts/clearml/values.yaml b/charts/clearml/values.yaml index 0a73c7c..dae48f8 100644 --- a/charts/clearml/values.yaml +++ b/charts/clearml/values.yaml @@ -207,6 +207,8 @@ fileserver: # fsGroup: 1001 # -- File server persistence settings storage: + # -- If set to false no PVC is created and emptyDir is used + enabled: true data: # -- If set, it uses an already existing PVC instead of dynamic provisioning existingPVC: "" From 5540188db18e4b3cc3979004885776ca4ad7f68b Mon Sep 17 00:00:00 2001 From: Valeriano Manassero <14011549+valeriano-manassero@users.noreply.github.com> Date: Wed, 15 Feb 2023 15:27:59 +0100 Subject: [PATCH 7/9] Add job support for task pod (#162) * Added: task as job support * Added: template generator * Fixed: typo * Changed: bump version * Added: changelog reference * Fixed: include function name * Fixed: checksum generator * Added: nindent * Added: changelog item * Fixed: job env var switch * Fixed: double Restart policy removed * Fixed: job template apiVersion --- charts/clearml-agent/Chart.yaml | 8 +- charts/clearml-agent/README.md | 9 +- charts/clearml-agent/templates/_helpers.tpl | 133 ++++++++++++- .../templates/agentk8sglue-configmap.yaml | 182 +----------------- .../templates/agentk8sglue-deployment.yaml | 9 +- .../templates/agentk8sglue-rbac.yaml | 16 ++ charts/clearml-agent/values.yaml | 4 +- 7 files changed, 174 insertions(+), 187 deletions(-) diff --git a/charts/clearml-agent/Chart.yaml b/charts/clearml-agent/Chart.yaml index def6d6c..66891df 100644 --- a/charts/clearml-agent/Chart.yaml +++ b/charts/clearml-agent/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: clearml-agent description: MLOps platform Task running agent type: application -version: "3.3.2" +version: "3.4.0" appVersion: "1.24" kubeVersion: ">= 1.21.0-0 < 1.27.0-0" home: https://clear.ml @@ -20,5 +20,9 @@ keywords: - "task agent" annotations: artifacthub.io/changes: | + - kind: added + description: support for parameter Job/Pod task - kind: fixed - description: clearml agent internal helper variable name + description: empty hostAliases map/array mismatch + - kind: fixed + description: agent deployment checksum diff --git a/charts/clearml-agent/README.md b/charts/clearml-agent/README.md index 991608d..10977f8 100644 --- a/charts/clearml-agent/README.md +++ b/charts/clearml-agent/README.md @@ -1,6 +1,6 @@ # ClearML Kubernetes Agent -![Version: 3.3.2](https://img.shields.io/badge/Version-3.3.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square) +![Version: 3.4.0](https://img.shields.io/badge/Version-3.4.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square) MLOps platform Task running agent @@ -30,16 +30,16 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0` | Key | Type | Default | Description | |-----|------|---------|-------------| -| agentk8sglue | object | `{"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"securityContext":{},"serviceExistingAccountName":"","tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue | +| agentk8sglue | object | `{"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"securityContext":{},"serviceExistingAccountName":"","taskAsJob":false,"tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue | | agentk8sglue.affinity | object | `{}` | affinity setup for Agent pod (example in values.yaml comments) | | agentk8sglue.annotations | object | `{}` | annotations setup for Agent pod (example in values.yaml comments) | | agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url | -| agentk8sglue.basePodTemplate | object | `{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task | +| agentk8sglue.basePodTemplate | object | `{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task | | agentk8sglue.basePodTemplate.affinity | object | `{}` | affinity setup for pods spawned to consume ClearML Task | | agentk8sglue.basePodTemplate.annotations | object | `{}` | annotations setup for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.basePodTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.basePodTemplate.fileMounts | list | `[]` | file definition for pods spawned to consume ClearML Task (example in values.yaml comments) | -| agentk8sglue.basePodTemplate.hostAliases | object | `{}` | hostAliases setup for pods spawned to consume ClearML Task (example in values.yaml comments) | +| agentk8sglue.basePodTemplate.hostAliases | list | `[]` | hostAliases setup for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.basePodTemplate.initContainers | list | `[]` | initContainers definition for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.basePodTemplate.labels | object | `{}` | labels setup for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.basePodTemplate.nodeSelector | object | `{}` | nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments) | @@ -63,6 +63,7 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0` | agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods | | agentk8sglue.securityContext | object | `{}` | Web Server pod security context | | agentk8sglue.serviceExistingAccountName | string | `""` | if set, don't create a serviceAccountName but use defined existing one | +| agentk8sglue.taskAsJob | bool | `false` | ClearML spawn tasks as jobs instead of pods | | agentk8sglue.tolerations | list | `[]` | tolerations setup for Agent pod (example in values.yaml comments) | | agentk8sglue.volumeMounts | list | `[]` | volume mounts definition for Glue Agent (example in values.yaml comments) | | agentk8sglue.volumes | list | `[]` | volumes definition for Glue Agent (example in values.yaml comments) | diff --git a/charts/clearml-agent/templates/_helpers.tpl b/charts/clearml-agent/templates/_helpers.tpl index fbed08c..77e52ad 100644 --- a/charts/clearml-agent/templates/_helpers.tpl +++ b/charts/clearml-agent/templates/_helpers.tpl @@ -72,7 +72,6 @@ Create secret to access docker registry {{- end }} {{- end }} - {{/* Create a string composed by queue names */}} @@ -83,3 +82,135 @@ Create a string composed by queue names {{- end }} {{- join " " $list }} {{- end }} + +{{/* +Create a task container template +*/}} +{{- define "taskContainer.containerTemplate" -}} +{{- if .main.Values.imageCredentials.enabled }} +imagePullSecrets: + - name: {{ .main.Values.imageCredentials.existingSecret | default (printf "%s-ark" (include "clearmlAgent.name" .main )) }} +{{- end }} +schedulerName: {{ .value.templateOverrides.schedulerName | default (.main.Values.agentk8sglue.basePodTemplate.schedulerName) }} +restartPolicy: Never +securityContext: + {{- .value.templateOverrides.securityContext | default .main.Values.agentk8sglue.basePodTemplate.securityContext | toYaml | nindent 2 }} +hostAliases: + {{- .value.templateOverrides.hostAliases | default .main.Values.agentk8sglue.basePodTemplate.hostAliases | toYaml | nindent 2 }} +volumes: + {{ $computedvolumes := (.value.templateOverrides.volumes | default .main.Values.agentk8sglue.basePodTemplate.volumes) }} + {{- if $computedvolumes }}{{- $computedvolumes | toYaml | nindent 2 }}{{- end }} + {{- if .value.templateOverrides.fileMounts }} + - name: filemounts + secret: + secretName: {{ include "clearmlAgent.name" .main }}-{{ .key }}-fm + {{- else if .main.Values.agentk8sglue.basePodTemplate.fileMounts }} + - name: filemounts + secret: + secretName: {{ include "clearmlAgent.name" .main }}-fm + {{- end }} +{{- if not .main.Values.enterpriseFeatures.serviceAccountClusterAccess }} +serviceAccountName: {{ include "clearmlAgent.serviceAccountName" .main }} +{{- end }} +initContainers: + {{- .value.templateOverrides.initContainers | default .main.Values.agentk8sglue.basePodTemplate.initContainers | toYaml | nindent 2 }} +containers: +- resources: + {{- .value.templateOverrides.resources | default .main.Values.agentk8sglue.basePodTemplate.resources | toYaml | nindent 4 }} + ports: + - containerPort: 10022 + volumeMounts: + {{ $computedvolumemounts := (.value.templateOverrides.volumeMounts | default .main.Values.agentk8sglue.basePodTemplate.volumeMounts) }} + {{- if $computedvolumemounts }}{{- $computedvolumemounts | toYaml | nindent 4 }}{{- end }} + {{- if .value.templateOverrides.fileMounts }} + {{- range .value.templateOverrides.fileMounts }} + - name: filemounts + mountPath: "{{ .folderPath }}/{{ .name }}" + subPath: "{{ .name }}" + readOnly: true + {{- end }} + {{- else if .main.Values.agentk8sglue.basePodTemplate.fileMounts }} + {{- range .main.Values.agentk8sglue.basePodTemplate.fileMounts }} + - name: filemounts + mountPath: "{{ .folderPath }}/{{ .name }}" + subPath: "{{ .name }}" + readOnly: true + {{- end }} + {{- end }} + env: + - name: CLEARML_API_HOST + value: {{ .main.Values.agentk8sglue.apiServerUrlReference }} + - name: CLEARML_WEB_HOST + value: {{ .main.Values.agentk8sglue.webServerUrlReference }} + - name: CLEARML_FILES_HOST + value: {{ .main.Values.agentk8sglue.fileServerUrlReference }} + {{- if not .main.Values.enterpriseFeatures.useOwnerToken }} + - name: CLEARML_API_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ .main.Values.clearml.existingAgentk8sglueSecret | default (printf "%s-ac" (include "clearmlAgent.name" .main )) }} + key: agentk8sglue_key + - name: CLEARML_API_SECRET_KEY + valueFrom: + secretKeyRef: + name: {{ .main.Values.clearml.existingAgentk8sglueSecret | default (printf "%s-ac" (include "clearmlAgent.name" .main )) }} + key: agentk8sglue_secret + {{- end }} + - name: PYTHONUNBUFFERED + value: "x" + {{- if not .main.Values.agentk8sglue.clearmlcheckCertificate }} + - name: CLEARML_API_HOST_VERIFY_CERT + value: "false" + {{- end }} + {{ $computedenvs := (.value.templateOverrides.env| default .main.Values.agentk8sglue.basePodTemplate.env) }} + {{- if $computedenvs }}{{- $computedenvs | toYaml | nindent 4 }}{{- end }} +nodeSelector: + {{ .value.templateOverrides.nodeSelector | default .main.Values.agentk8sglue.basePodTemplate.nodeSelector | toYaml | nindent 2 }} +tolerations: + {{ .value.templateOverrides.tolerations | default .main.Values.agentk8sglue.basePodTemplate.tolerations | toYaml | nindent 2 }} +affinity: + {{ .value.templateOverrides.affinity | default .main.Values.agentk8sglue.basePodTemplate.affinity | toYaml | nindent 2 }} +{{- end }} + +{{/* +Create a task container template +*/}} +{{- define "taskContainer.podTemplate" -}} +{{- range $key, $value := $.Values.enterpriseFeatures.queues }} +{{ $key }}: + apiVersion: v1 + kind: Pod + metadata: + namespace: {{ $.Release.Namespace }} + labels: + {{ $value.templateOverrides.labels | default $.Values.agentk8sglue.basePodTemplate.labels | toYaml }} + annotations: + {{ $value.templateOverrides.annotations | default $.Values.agentk8sglue.basePodTemplate.annotations | toYaml }} + spec: + {{- $data := dict "main" $ "key" $key "value" $value -}} + {{- include "taskContainer.containerTemplate" $data | nindent 4}} +{{- end }} +{{- end }} + +{{/* +Create a task container template +*/}} +{{- define "taskContainer.jobTemplate" -}} +{{- range $key, $value := $.Values.enterpriseFeatures.queues }} +{{ $key }}: + apiVersion: batch/v1 + kind: Job + metadata: + namespace: {{ $.Release.Namespace }} + labels: + {{ $value.templateOverrides.labels | default $.Values.agentk8sglue.basePodTemplate.labels | toYaml }} + annotations: + {{ $value.templateOverrides.annotations | default $.Values.agentk8sglue.basePodTemplate.annotations | toYaml }} + spec: + template: + spec: + {{- $data := dict "main" $ "key" $key "value" $value -}} + {{- include "taskContainer.containerTemplate" $data | nindent 8 }} + backoffLimit: 0 +{{- end }} +{{- end }} diff --git a/charts/clearml-agent/templates/agentk8sglue-configmap.yaml b/charts/clearml-agent/templates/agentk8sglue-configmap.yaml index df064d0..b0b8a60 100644 --- a/charts/clearml-agent/templates/agentk8sglue-configmap.yaml +++ b/charts/clearml-agent/templates/agentk8sglue-configmap.yaml @@ -5,184 +5,10 @@ metadata: data: {{- if .Values.enterpriseFeatures.enabled }} template.yaml: | - {{- range $key, $value := $.Values.enterpriseFeatures.queues }} - {{ $key }}: - apiVersion: v1 - metadata: - namespace: {{ $.Release.Namespace }} - {{- if $value.templateOverrides.labels }} - labels: - {{- toYaml $value.templateOverrides.labels | nindent 10 }} - {{- else if $.Values.agentk8sglue.basePodTemplate.labels }} - labels: - {{- toYaml $.Values.agentk8sglue.basePodTemplate.labels | nindent 10 }} - {{- end}} - {{- if $value.templateOverrides.annotations }} - annotations: - {{- toYaml $value.templateOverrides.annotations | nindent 10 }} - {{- else if $.Values.agentk8sglue.basePodTemplate.annotations }} - annotations: - {{- toYaml $.Values.agentk8sglue.basePodTemplate.annotations | nindent 10 }} - {{- end}} - spec: - {{- if $.Values.imageCredentials.enabled }} - imagePullSecrets: - {{- if $.Values.imageCredentials.existingSecret }} - - name: {{ $.Values.imageCredentials.existingSecret }} - {{- else }} - - name: {{ include "clearmlAgent.name" $ }}-ark - {{- end }} - {{- end }} - {{- if $value.templateOverrides.schedulerName }} - schedulerName: {{ $value.templateOverrides.schedulerName }} - {{- else if $.Values.agentk8sglue.basePodTemplate.schedulerName }} - schedulerName: {{ $.Values.agentk8sglue.basePodTemplate.schedulerName }} - {{- end}} - restartPolicy: Never - {{- if $value.templateOverrides.securityContext }} - securityContext: - {{- toYaml $value.templateOverrides.securityContext | nindent 10 }} - {{- else if $.Values.agentk8sglue.basePodTemplate.securityContext }} - securityContext: - {{- toYaml $.Values.agentk8sglue.basePodTemplate.securityContext | nindent 10 }} - {{- end}} - {{- if $value.templateOverrides.hostAliases }} - {{- with $value.templateOverrides.hostAliases }} - hostAliases: - {{- toYaml . | nindent 10 }} - {{- end }} - {{- else if $.Values.agentk8sglue.basePodTemplate.hostAliases }} - {{- with $.Values.agentk8sglue.basePodTemplate.hostAliases }} - hostAliases: - {{- toYaml . | nindent 10 }} - {{- end }} - {{- end }} - volumes: - {{- if $value.templateOverrides.volumes }} - {{- toYaml $value.templateOverrides.volumes | nindent 10 }} - {{- else if $.Values.agentk8sglue.basePodTemplate.volumes }} - {{- toYaml $.Values.agentk8sglue.basePodTemplate.volumes | nindent 10 }} - {{- end }} - {{- if $value.templateOverrides.fileMounts }} - - name: filemounts - secret: - secretName: {{ include "clearmlAgent.name" $ }}-{{ $key }}-fm - {{- else if $.Values.agentk8sglue.basePodTemplate.fileMounts }} - - name: filemounts - secret: - secretName: {{ include "clearmlAgent.name" $ }}-fm - {{- end }} - {{- if not $.Values.enterpriseFeatures.serviceAccountClusterAccess }} - serviceAccountName: {{ include "clearmlAgent.serviceAccountName" $ }} - {{- end }} - {{- if $value.templateOverrides.initContainers }} - initContainers: - {{- toYaml $value.templateOverrides.initContainers | nindent 10 }} - {{- else if $.Values.agentk8sglue.basePodTemplate.initContainers }} - initContainers: - {{- toYaml $.Values.agentk8sglue.basePodTemplate.initContainers | nindent 10 }} - {{- end }} - containers: - - resources: - {{- if $value.templateOverrides.resources }} - {{- toYaml $value.templateOverrides.resources | nindent 12 }} - {{- else if $.Values.agentk8sglue.basePodTemplate.resources }} - {{- toYaml $.Values.agentk8sglue.basePodTemplate.resources | nindent 12 }} - {{- end}} - ports: - - containerPort: 10022 - volumeMounts: - {{- if $value.templateOverrides.volumeMounts }} - {{- toYaml $value.templateOverrides.volumeMounts | nindent 12 }} - {{- else if $.Values.agentk8sglue.basePodTemplate.volumeMounts }} - {{- toYaml $.Values.agentk8sglue.basePodTemplate.volumeMounts | nindent 12 }} - {{- end }} - {{- if $value.templateOverrides.fileMounts }} - {{- range $value.templateOverrides.fileMounts }} - - name: filemounts - mountPath: "{{ .folderPath }}/{{ .name }}" - subPath: "{{ .name }}" - readOnly: true - {{- end }} - {{- else if $.Values.agentk8sglue.basePodTemplate.fileMounts }} - {{- range $.Values.agentk8sglue.basePodTemplate.fileMounts }} - - name: filemounts - mountPath: "{{ .folderPath }}/{{ .name }}" - subPath: "{{ .name }}" - readOnly: true - {{- end }} - {{- end }} - env: - - name: CLEARML_API_HOST - value: {{ $.Values.agentk8sglue.apiServerUrlReference }} - - name: CLEARML_WEB_HOST - value: {{ $.Values.agentk8sglue.webServerUrlReference }} - - name: CLEARML_FILES_HOST - value: {{ $.Values.agentk8sglue.fileServerUrlReference }} - {{- if not $.Values.enterpriseFeatures.useOwnerToken }} - - name: CLEARML_API_ACCESS_KEY - valueFrom: - secretKeyRef: - {{- if $.Values.clearml.existingAgentk8sglueSecret }} - name: {{ $.Values.clearml.existingAgentk8sglueSecret }} - {{- else }} - name: {{ include "clearmlAgent.name" $ }}-ac - {{- end }} - key: agentk8sglue_key - - name: CLEARML_API_SECRET_KEY - valueFrom: - secretKeyRef: - {{- if $.Values.clearml.existingAgentk8sglueSecret }} - name: {{ $.Values.clearml.existingAgentk8sglueSecret }} - {{- else }} - name: {{ include "clearmlAgent.name" $ }}-ac - {{- end }} - key: agentk8sglue_secret - {{- end }} - - name: PYTHONUNBUFFERED - value: "x" - {{- if not $.Values.agentk8sglue.clearmlcheckCertificate }} - - name: CLEARML_API_HOST_VERIFY_CERT - value: "false" - {{- end }} - {{- if $value.templateOverrides.env }} - {{- toYaml $value.templateOverrides.env | nindent 12 }} - {{- else if $.Values.agentk8sglue.basePodTemplate.env }} - {{- toYaml $.Values.agentk8sglue.basePodTemplate.env | nindent 12 }} - {{- end }} - {{- if $value.templateOverrides.nodeSelector }} - {{- with $value.templateOverrides.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- else if $.Values.agentk8sglue.basePodTemplate.nodeSelector }} - {{- with $.Values.agentk8sglue.basePodTemplate.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 10 }} - {{- end }} - {{- end }} - {{- if $value.templateOverrides.tolerations }} - {{- with $value.templateOverrides.tolerations }} - tolerations: - {{- toYaml . | nindent 10 }} - {{- end }} - {{- else if $.Values.agentk8sglue.basePodTemplate.tolerations }} - {{- with $.Values.agentk8sglue.basePodTemplate.tolerations }} - tolerations: - {{- toYaml . | nindent 10 }} - {{- end }} - {{- end }} - {{- if $value.templateOverrides.affinity }} - {{- with $value.templateOverrides.affinity }} - affinity: - {{- toYaml . | nindent 10 }} - {{- end }} - {{- else if $.Values.agentk8sglue.basePodTemplate.affinity }} - {{- with $.Values.agentk8sglue.basePodTemplate.affinity }} - affinity: - {{- toYaml . | nindent 10 }} - {{- end }} - {{- end }} + {{- if .Values.agentk8sglue.taskAsJob }} + {{ include "taskContainer.jobTemplate" . | nindent 4}} + {{- else }} + {{ include "taskContainer.podTemplate" . | nindent 4}} {{- end }} secrets.yaml: | {{- range $key, $value := $.Values.enterpriseFeatures.queues }} diff --git a/charts/clearml-agent/templates/agentk8sglue-deployment.yaml b/charts/clearml-agent/templates/agentk8sglue-deployment.yaml index f54b487..e64f8ae 100644 --- a/charts/clearml-agent/templates/agentk8sglue-deployment.yaml +++ b/charts/clearml-agent/templates/agentk8sglue-deployment.yaml @@ -14,7 +14,7 @@ spec: template: metadata: annotations: - checksum/config: {{ printf "%s%s" .Values.clearml .Values.agentk8sglue | sha256sum }} + checksum/config: {{ printf "%s" .Values | sha256sum }} {{- include "clearmlAgent.annotations" . | nindent 8 }} labels: {{- include "clearmlAgent.labels" . | nindent 8 }} @@ -158,6 +158,13 @@ spec: value: "interactive" {{- end }} {{- end }} + {{- if .Values.agentk8sglue.taskAsJob }} + - name: "CLEARML_K8S_GLUE_KIND" + value: "job" + {{- else }} + - name: "CLEARML_K8S_GLUE_KIND" + value: "pod" + {{- end }} {{- if .Values.enterpriseFeatures.enabled }} - name: K8S_GLUE_QUEUE value: {{ include "agentk8sglue.queues" . | quote }} diff --git a/charts/clearml-agent/templates/agentk8sglue-rbac.yaml b/charts/clearml-agent/templates/agentk8sglue-rbac.yaml index 549ca3a..f1693ca 100644 --- a/charts/clearml-agent/templates/agentk8sglue-rbac.yaml +++ b/charts/clearml-agent/templates/agentk8sglue-rbac.yaml @@ -24,6 +24,14 @@ rules: resources: - namespaces verbs: ["list"] + {{- if .Values.agentk8sglue.taskAsJob }} + - apiGroups: + - batch + - extensions + resources: + - jobs + verbs: ["get", "list", "watch", "create", "patch", "delete"] + {{- end }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding @@ -56,6 +64,14 @@ rules: resources: - namespaces verbs: ["list"] + {{- if .Values.agentk8sglue.taskAsJob }} + - apiGroups: + - batch + - extensions + resources: + - jobs + verbs: ["get", "list", "watch", "create", "patch", "delete"] + {{- end }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding diff --git a/charts/clearml-agent/values.yaml b/charts/clearml-agent/values.yaml index 4b5c9b9..97e9591 100644 --- a/charts/clearml-agent/values.yaml +++ b/charts/clearml-agent/values.yaml @@ -61,6 +61,8 @@ agentk8sglue: defaultContainerImage: ubuntu:18.04 # -- ClearML queue this agent will consume queue: default + # -- ClearML spawn tasks as jobs instead of pods + taskAsJob: false # -- Custom Bash script for the Glue Agent # -- labels setup for Agent pod (example in values.yaml comments) labels: {} @@ -181,7 +183,7 @@ agentk8sglue: # runAsUser: 1001 # fsGroup: 1001 # -- hostAliases setup for pods spawned to consume ClearML Task (example in values.yaml comments) - hostAliases: {} + hostAliases: [] # - ip: "127.0.0.1" # hostnames: # - "foo.local" From fded7aa5b49cf25c427433cfaaf2996784c55615 Mon Sep 17 00:00:00 2001 From: Valeriano Manassero <14011549+valeriano-manassero@users.noreply.github.com> Date: Thu, 16 Feb 2023 09:39:23 +0100 Subject: [PATCH 8/9] 165 clearml agent priorityclassname in pod template (#166) * Added: priorityclass name * Changed: bump up version --- charts/clearml-agent/Chart.yaml | 8 ++------ charts/clearml-agent/README.md | 7 ++++--- charts/clearml-agent/templates/_helpers.tpl | 1 + .../clearml-agent/templates/agentk8sglue-configmap.yaml | 1 + charts/clearml-agent/values.yaml | 2 ++ 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/charts/clearml-agent/Chart.yaml b/charts/clearml-agent/Chart.yaml index 66891df..96dac2c 100644 --- a/charts/clearml-agent/Chart.yaml +++ b/charts/clearml-agent/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: clearml-agent description: MLOps platform Task running agent type: application -version: "3.4.0" +version: "3.5.0" appVersion: "1.24" kubeVersion: ">= 1.21.0-0 < 1.27.0-0" home: https://clear.ml @@ -21,8 +21,4 @@ keywords: annotations: artifacthub.io/changes: | - kind: added - description: support for parameter Job/Pod task - - kind: fixed - description: empty hostAliases map/array mismatch - - kind: fixed - description: agent deployment checksum + description: support for priorityClass in task pod template diff --git a/charts/clearml-agent/README.md b/charts/clearml-agent/README.md index 10977f8..d582cb4 100644 --- a/charts/clearml-agent/README.md +++ b/charts/clearml-agent/README.md @@ -1,6 +1,6 @@ # ClearML Kubernetes Agent -![Version: 3.4.0](https://img.shields.io/badge/Version-3.4.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square) +![Version: 3.5.0](https://img.shields.io/badge/Version-3.5.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square) MLOps platform Task running agent @@ -30,11 +30,11 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0` | Key | Type | Default | Description | |-----|------|---------|-------------| -| agentk8sglue | object | `{"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"securityContext":{},"serviceExistingAccountName":"","taskAsJob":false,"tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue | +| agentk8sglue | object | `{"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"priorityClassName":"","resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"securityContext":{},"serviceExistingAccountName":"","taskAsJob":false,"tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue | | agentk8sglue.affinity | object | `{}` | affinity setup for Agent pod (example in values.yaml comments) | | agentk8sglue.annotations | object | `{}` | annotations setup for Agent pod (example in values.yaml comments) | | agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url | -| agentk8sglue.basePodTemplate | object | `{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task | +| agentk8sglue.basePodTemplate | object | `{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"priorityClassName":"","resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task | | agentk8sglue.basePodTemplate.affinity | object | `{}` | affinity setup for pods spawned to consume ClearML Task | | agentk8sglue.basePodTemplate.annotations | object | `{}` | annotations setup for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.basePodTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) | @@ -43,6 +43,7 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0` | agentk8sglue.basePodTemplate.initContainers | list | `[]` | initContainers definition for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.basePodTemplate.labels | object | `{}` | labels setup for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.basePodTemplate.nodeSelector | object | `{}` | nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments) | +| agentk8sglue.basePodTemplate.priorityClassName | string | `""` | priorityClassName setup for pods spawned to consume ClearML Task | | agentk8sglue.basePodTemplate.resources | object | `{}` | resources declaration for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.basePodTemplate.schedulerName | string | `""` | schedulerName setup for pods spawned to consume ClearML Task | | agentk8sglue.basePodTemplate.securityContext | object | `{}` | securityContext setup for pods spawned to consume ClearML Task (example in values.yaml comments) | diff --git a/charts/clearml-agent/templates/_helpers.tpl b/charts/clearml-agent/templates/_helpers.tpl index 77e52ad..312e0f1 100644 --- a/charts/clearml-agent/templates/_helpers.tpl +++ b/charts/clearml-agent/templates/_helpers.tpl @@ -114,6 +114,7 @@ serviceAccountName: {{ include "clearmlAgent.serviceAccountName" .main }} {{- end }} initContainers: {{- .value.templateOverrides.initContainers | default .main.Values.agentk8sglue.basePodTemplate.initContainers | toYaml | nindent 2 }} +priorityClassName: {{ .value.templateOverrides.priorityClassName | default .main.Values.agentk8sglue.basePodTemplate.priorityClassName }} containers: - resources: {{- .value.templateOverrides.resources | default .main.Values.agentk8sglue.basePodTemplate.resources | toYaml | nindent 4 }} diff --git a/charts/clearml-agent/templates/agentk8sglue-configmap.yaml b/charts/clearml-agent/templates/agentk8sglue-configmap.yaml index b0b8a60..3c3aad1 100644 --- a/charts/clearml-agent/templates/agentk8sglue-configmap.yaml +++ b/charts/clearml-agent/templates/agentk8sglue-configmap.yaml @@ -42,6 +42,7 @@ data: {{- toYaml . | nindent 8 }} {{- end }} serviceAccountName: {{ include "clearmlAgent.serviceAccountName" $ }} + priorityClassName: {{ .Values.agentk8sglue.basePodTemplate.priorityClassName }} containers: - resources: {{- toYaml .Values.agentk8sglue.basePodTemplate.resources | nindent 10 }} diff --git a/charts/clearml-agent/values.yaml b/charts/clearml-agent/values.yaml index 97e9591..cc51008 100644 --- a/charts/clearml-agent/values.yaml +++ b/charts/clearml-agent/values.yaml @@ -168,6 +168,8 @@ agentk8sglue: resources: {} # limits: # nvidia.com/gpu: 1 + # -- priorityClassName setup for pods spawned to consume ClearML Task + priorityClassName: "" # -- nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments) nodeSelector: {} # fleet: gpu-nodes From 013734c184e5c2e52631f93c60d346a452ec109e Mon Sep 17 00:00:00 2001 From: pollfly <75068813+pollfly@users.noreply.github.com> Date: Thu, 16 Feb 2023 14:25:00 +0200 Subject: [PATCH 9/9] edits (#168) --- INSTALL.md | 52 +++++++++++++++++++++------------------------------- README.md | 14 +++++++------- 2 files changed, 28 insertions(+), 38 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index d152daf..4645206 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -2,29 +2,17 @@ ## Requirements -### Setup a Kubernetes Cluster +* Set up a Kubernetes Cluster - for setting up Kubernetes on various platforms refer to the Kubernetes [getting started guide](http://kubernetes.io/docs/getting-started-guides/). + * Set up a single-node LOCAL Kubernetes on laptop/desktop - for setting up Kubernetes on your laptop/desktop, we suggest [kind](https://kind.sigs.k8s.io). + * For **Kubernetes Tanzu users** - see [prerequisites](https://github.com/allegroai/clearml-helm-charts/tree/main/platform-specific-configs/tanzu) + for setting up ClearML on a Tanzu cluster + * For **Kubernetes Openshift users** - see [prerequisites](https://github.com/allegroai/clearml-helm-charts/tree/main/platform-specific-configs/openshift) + for setting up ClearML on an Openshift cluster, -For setting up Kubernetes on various platforms refer to the Kubernetes [getting started guide](http://kubernetes.io/docs/getting-started-guides/). +* Install Helm - Helm is a tool for managing Kubernetes charts. Charts are packages of pre-configured Kubernetes +resources. To install Helm, refer to the [Helm install guide](https://github.com/helm/helm#install) and ensure that the `helm` binary is in the `PATH` of your shell. -#### Setup a single node LOCAL Kubernetes on laptop/desktop (development) - -For setting up Kubernetes on your laptop/desktop we suggest [kind](https://kind.sigs.k8s.io). - -#### [Kubernetes Tanzu users only] Additional setup requirements - -For setting up Clear.ML on a Tanzu cluster, check [prerequisites](https://github.com/allegroai/clearml-helm-charts/tree/main/platform-specific-configs/tanzu). - -#### [Kubernetes Openshift users only] Additional setup requirements - -For setting up Clear.ML on a Openshift cluster, check [prerequisites](https://github.com/allegroai/clearml-helm-charts/tree/main/platform-specific-configs/openshift). - -### Install Helm - -Helm is a tool for managing Kubernetes charts. Charts are packages of pre-configured Kubernetes resources. - -To install Helm, refer to the [Helm install guide](https://github.com/helm/helm#install) and ensure that the `helm` binary is in the `PATH` of your shell. - -## Helm charts installation +## Helm Charts Installation ### Helm Repo @@ -32,25 +20,27 @@ To install Helm, refer to the [Helm install guide](https://github.com/helm/helm# $ helm repo add allegroai https://allegroai.github.io/clearml-helm-charts $ helm repo update ``` -### ClearML server ecosystem +### ClearML Server Ecosystem ```bash $ helm install clearml allegroai/clearml ``` -### ClearML agent +### ClearML Agent -Agent is always related a ClearML server ecosystem (by default using `app.clear.ml` public service but can be on same or another Kubernetes cluster or a single server installation). +A ClearML Agent is always related to a ClearML server ecosystem (by default using the `app.clear.ml` hosted server, but +can be on the same or different Kubernetes cluster or a single server installation). -On ClearML UI, Settings -> Workspace and Create new Credentials. +In the ClearML UI, go to **Settings > Workspace** and click **Create New Credentials**. The dialog that pops up displays +the new credentials. -In following Helm chart install command: +In the Helm chart `install` command below: -* set ACCESSKEY to resuted credentials access_key -* set SECRETKEY to resuted credentials secret_key -* set APIERVERURL to resuted credentials api_server -* set FILESSERVERURL to resuted credentials files_server -* set WEBSERVERURL to resuted credentials web_server +* Set `ACCESSKEY` to the new credentials' `access_key` value +* Set `SECRETKEY` to the new credentials' `secret_key` value +* Set `APISERVERURL` to the new credentials' `api_server` value +* Set `FILESSERVERURL` to the new credentials' `files_server` value +* Set `WEBSERVERURL` to the new credentials' `web_server` value ```bash $ helm install clearml-agent allegroai/clearml-agent --set clearml.agentk8sglueKey=ACCESSKEY --set clearml.agentk8sglueSecret=SECRETKEY --set agentk8sglue.apiServerUrlReference=APISERVERURL --set agentk8sglue.fileServerUrlReference=FILESERVERURL --set agentk8sglue.webServerUrlReference=WEBSERVERURL diff --git a/README.md b/README.md index 2f2915a..4b1a56f 100644 --- a/README.md +++ b/README.md @@ -31,32 +31,32 @@ Use this repository to deploy **clearml-server** on Kubernetes clusters. ## Who We Are -ClearML is supported by the team behind *allegro.ai*, -where we build deep learning pipelines and infrastructure for enterprise companies. +ClearML is supported by you :heart: and the [clear.ml](https://clear.ml) team, which helps enterprise companies build +scalable MLOps. We built ClearML to track and control the glorious but messy process of training production-grade deep learning models. We are committed to vigorously supporting and expanding the capabilities of ClearML. -We promise to always be backwardly compatible, making sure all your logs, data and pipelines +We promise to always be backwards compatible, making sure all your logs, data, and pipelines will always upgrade with you. ## License Apache License, Version 2.0, (see the [LICENSE](https://www.apache.org/licenses/LICENSE-2.0) for more information) -## Installation guide +## Installation Guide For installation instruction, follow related [Installation Guide](INSTALL.md). ## Documentation, Community & Support -More information in the [official documentation](https://allegro.ai/clearml/docs) and [on YouTube](https://www.youtube.com/c/ClearML). +See more information in the [official documentation](https://clear.ml/docs/latest/docs) and [on YouTube](https://www.youtube.com/c/ClearML). -If you have any questions: post on our [Slack Channel](https://join.slack.com/t/clearml/shared_invite/zt-c0t13pty-aVUZZW1TSSSg2vyIGVPBhg), or tag your questions on [stackoverflow](https://stackoverflow.com/questions/tagged/clearml) with '**[clearml](https://stackoverflow.com/questions/tagged/clearml)**' tag (*previously [trains](https://stackoverflow.com/questions/tagged/trains) tag*). +If you have any questions, post on our [Slack Channel](https://join.slack.com/t/clearml/shared_invite/zt-c0t13pty-aVUZZW1TSSSg2vyIGVPBhg), or tag your questions on [stackoverflow](https://stackoverflow.com/questions/tagged/clearml) with '**[clearml](https://stackoverflow.com/questions/tagged/clearml)**' tag (*previously [trains](https://stackoverflow.com/questions/tagged/trains) tag*). For feature requests or bug reports, please use [GitHub issues](https://github.com/allegroai/clearml-helm-charts/issues). -Additionally, you can always find us at *clearml@allegro.ai* +Additionally, you can always find us at *support@clear.ml* ## Contributing