diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9ea0800..7587a61 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,7 +1,8 @@ name: Lint and Test Charts on: - pull_request: + pull_request_target: + types: [opened, synchronize, edited, reopened] paths: - 'charts/**' diff --git a/charts/clearml-agent/Chart.yaml b/charts/clearml-agent/Chart.yaml index 98ce6ed..29a0382 100644 --- a/charts/clearml-agent/Chart.yaml +++ b/charts/clearml-agent/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: clearml-agent description: MLOps platform type: application -version: "1.3.0" +version: "2.0.0" appVersion: "1.24" kubeVersion: ">= 1.19.0-0 < 1.25.0-0" home: https://clear.ml diff --git a/charts/clearml-agent/README.md b/charts/clearml-agent/README.md index e82a4dc..e020be2 100644 --- a/charts/clearml-agent/README.md +++ b/charts/clearml-agent/README.md @@ -1,6 +1,6 @@ -# clearml-agent +# ClearML Kubernetes Agent -![Version: 1.3.0](https://img.shields.io/badge/Version-1.3.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square) +![Version: 2.0.0](https://img.shields.io/badge/Version-2.0.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square) MLOps platform @@ -12,6 +12,11 @@ MLOps platform | ---- | ------ | --- | | valeriano-manassero | | | +## Introduction + +The **clearml-agent** is the Kubernetes agent for for [ClearML](https://github.com/allegroai/clearml). +It allows you to schedule distributed experiments on a Kubernetes cluster. + ## Source Code * @@ -25,7 +30,7 @@ Kubernetes: `>= 1.19.0-0 < 1.25.0-0` | Key | Type | Default | Description | |-----|------|---------|-------------| -| agentk8sglue | object | `{"apiServerUrlReference":"https://api.clear.ml","clearmlcheckCertificate":true,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileServerUrlReference":"https://files.clear.ml","id":"k8s-agent","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-18"},"maxPods":10,"podTemplate":{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumes":[]},"queue":"default","replicaCount":1,"serviceAccountName":"default","webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue | +| agentk8sglue | object | `{"apiServerUrlReference":"https://api.clear.ml","clearmlcheckCertificate":true,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileServerUrlReference":"https://files.clear.ml","id":"k8s-agent","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-18"},"maxPods":10,"podTemplate":{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"queue":"default","replicaCount":1,"serviceAccountName":"default","webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue | | agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url | | agentk8sglue.clearmlcheckCertificate | bool | `true` | Check certificates validity for evefry UrlReference below. | | agentk8sglue.defaultContainerImage | string | `"ubuntu:18.04"` | default container image for ClearML Task pod | @@ -34,11 +39,12 @@ Kubernetes: `>= 1.19.0-0 < 1.25.0-0` | agentk8sglue.id | string | `"k8s-agent"` | ClearML worker ID (must be unique across the entire ClearMLenvironment) | | agentk8sglue.image | object | `{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-18"}` | Glue Agent image configuration | | agentk8sglue.maxPods | int | `10` | maximum concurrent consume ClearML Task pod | -| agentk8sglue.podTemplate | object | `{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumes":[]}` | template for pods spawned to consume ClearML Task | +| agentk8sglue.podTemplate | object | `{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | template for pods spawned to consume ClearML Task | | agentk8sglue.podTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.podTemplate.nodeSelector | object | `{}` | nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.podTemplate.resources | object | `{}` | resources declaration for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.podTemplate.tolerations | list | `[]` | tolerations setup for pods spawned to consume ClearML Task (example in values.yaml comments) | +| agentk8sglue.podTemplate.volumeMounts | list | `[]` | volumeMounts definition for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.podTemplate.volumes | list | `[]` | volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.queue | string | `"default"` | ClearML queue this agent will consume | | agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods | @@ -58,5 +64,26 @@ Kubernetes: `>= 1.19.0-0 < 1.25.0-0` | imageCredentials.registry | string | `"docker.io"` | Registry name | | imageCredentials.username | string | `"someone"` | Registry username | ----------------------------------------------- -Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0) +# Upgrading Chart + +### From v1.x to v2.x + +Chart 1.x was under the assumption that all mounted volumes would be PVC's. Version > 2.x allows for more flexibility and will inject the yaml from podTemplate.volumes and podtemplate.volumeMounts directly. + +v1.x +``` + volumes: + - name: "yourvolume" + path: "/yourpath" +``` + +v2.x +``` + volumes: + - name: "yourvolume" + persistentVolumeClaim: + claimName: "yourvolume" + volumeMounts: + - name: "yourvolume" + mountPath: "/yourpath" +``` diff --git a/charts/clearml-agent/README.md.gotmpl b/charts/clearml-agent/README.md.gotmpl new file mode 100644 index 0000000..ee7219f --- /dev/null +++ b/charts/clearml-agent/README.md.gotmpl @@ -0,0 +1,45 @@ +# ClearML Kubernetes Agent +{{ template "chart.deprecationWarning" . }} + +{{ template "chart.badgesSection" . }} + +{{ template "chart.description" . }} + +{{ template "chart.homepageLine" . }} + +{{ template "chart.maintainersSection" . }} + +## Introduction + +The **clearml-agent** is the Kubernetes agent for for [ClearML](https://github.com/allegroai/clearml). +It allows you to schedule distributed experiments on a Kubernetes cluster. + +{{ template "chart.sourcesSection" . }} + +{{ template "chart.requirementsSection" . }} + +{{ template "chart.valuesSection" . }} + +# Upgrading Chart + +### From v1.x to v2.x + +Chart 1.x was under the assumption that all mounted volumes would be PVC's. Version > 2.x allows for more flexibility and will inject the yaml from podTemplate.volumes and podtemplate.volumeMounts directly. + +v1.x +``` + volumes: + - name: "yourvolume" + path: "/yourpath" +``` + +v2.x +``` + volumes: + - name: "yourvolume" + persistentVolumeClaim: + claimName: "yourvolume" + volumeMounts: + - name: "yourvolume" + mountPath: "/yourpath" +``` diff --git a/charts/clearml-agent/templates/agentk8sglue-configmap.yaml b/charts/clearml-agent/templates/agentk8sglue-configmap.yaml index 58a15d8..9cc5bc0 100644 --- a/charts/clearml-agent/templates/agentk8sglue-configmap.yaml +++ b/charts/clearml-agent/templates/agentk8sglue-configmap.yaml @@ -17,21 +17,18 @@ data: {{- end }} {{- end }} serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }} + {{- with .Values.agentk8sglue.podTemplate.volumes }} volumes: - {{- range .Values.agentk8sglue.podTemplate.volumes }} - - name: {{ .name }} - persistentVolumeClaim: - claimName: {{ .name }} + {{- toYaml . | nindent 8 }} {{- end }} containers: - resources: {{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }} ports: - containerPort: 10022 + {{- with .Values.agentk8sglue.podTemplate.volumeMounts }} volumeMounts: - {{- range .Values.agentk8sglue.podTemplate.volumes }} - - mountPath: {{ .path }} - name: {{ .name }} + {{- toYaml . | nindent 10 }} {{- end }} env: - name: CLEARML_API_HOST diff --git a/charts/clearml-agent/templates/agentk8sglue-deployment.yaml b/charts/clearml-agent/templates/agentk8sglue-deployment.yaml index 325678d..225eb76 100644 --- a/charts/clearml-agent/templates/agentk8sglue-deployment.yaml +++ b/charts/clearml-agent/templates/agentk8sglue-deployment.yaml @@ -24,26 +24,6 @@ spec: - name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-registry-key {{- end }} {{- end }} - initContainers: - - name: init-k8s-glue - image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}" - command: - - /bin/sh - - -c - - > - set -x; - while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.apiServerUrlReference}}/debug.ping" -o /dev/null) -ne 200 ] ; do - echo "waiting for apiserver" ; - sleep 5 ; - done; - while [[ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.fileServerUrlReference}}/" -o /dev/null) =~ 403|405 ]] ; do - echo "waiting for fileserver" ; - sleep 5 ; - done; - while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.webServerUrlReference}}/" -o /dev/null) -ne 200 ] ; do - echo "waiting for webserver" ; - sleep 5 ; - done containers: - name: k8s-glue image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}" diff --git a/charts/clearml-agent/values.yaml b/charts/clearml-agent/values.yaml index de8c1cb..bcb9b3a 100644 --- a/charts/clearml-agent/values.yaml +++ b/charts/clearml-agent/values.yaml @@ -71,7 +71,12 @@ agentk8sglue: # -- volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments) volumes: [] # - name: "yourvolume" - # path: "/yourpath" + # persistentVolumeClaim: + # claimName: "yourvolume" + # -- volumeMounts definition for pods spawned to consume ClearML Task (example in values.yaml comments) + volumeMounts: [] + # - name: "yourvolume" + # mountPath: "/yourpath" # -- environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) env: [] # # to setup access to private repo, setup secret with git credentials: