mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-04-17 01:31:13 +00:00
feat: expand volumemount capabilities for agent (#104)
* upgrade * add upgrade instruction * fix readme for agent * Added newline at the end * Try to fix CI * Edited type added * Update README.md Co-authored-by: Valeriano Manassero <14011549+valeriano-manassero@users.noreply.github.com>
This commit is contained in:
parent
19a6785a03
commit
a90b91f024
3
.github/workflows/ci.yaml
vendored
3
.github/workflows/ci.yaml
vendored
@ -1,7 +1,8 @@
|
|||||||
name: Lint and Test Charts
|
name: Lint and Test Charts
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request_target:
|
||||||
|
types: [opened, synchronize, edited, reopened]
|
||||||
paths:
|
paths:
|
||||||
- 'charts/**'
|
- 'charts/**'
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ apiVersion: v2
|
|||||||
name: clearml-agent
|
name: clearml-agent
|
||||||
description: MLOps platform
|
description: MLOps platform
|
||||||
type: application
|
type: application
|
||||||
version: "1.3.0"
|
version: "2.0.0"
|
||||||
appVersion: "1.24"
|
appVersion: "1.24"
|
||||||
kubeVersion: ">= 1.19.0-0 < 1.25.0-0"
|
kubeVersion: ">= 1.19.0-0 < 1.25.0-0"
|
||||||
home: https://clear.ml
|
home: https://clear.ml
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# clearml-agent
|
# ClearML Kubernetes Agent
|
||||||
|
|
||||||
  
|
  
|
||||||
|
|
||||||
MLOps platform
|
MLOps platform
|
||||||
|
|
||||||
@ -12,6 +12,11 @@ MLOps platform
|
|||||||
| ---- | ------ | --- |
|
| ---- | ------ | --- |
|
||||||
| valeriano-manassero | | <https://github.com/valeriano-manassero> |
|
| valeriano-manassero | | <https://github.com/valeriano-manassero> |
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
The **clearml-agent** is the Kubernetes agent for for [ClearML](https://github.com/allegroai/clearml).
|
||||||
|
It allows you to schedule distributed experiments on a Kubernetes cluster.
|
||||||
|
|
||||||
## Source Code
|
## Source Code
|
||||||
|
|
||||||
* <https://github.com/allegroai/clearml-helm-charts>
|
* <https://github.com/allegroai/clearml-helm-charts>
|
||||||
@ -25,7 +30,7 @@ Kubernetes: `>= 1.19.0-0 < 1.25.0-0`
|
|||||||
|
|
||||||
| Key | Type | Default | Description |
|
| Key | Type | Default | Description |
|
||||||
|-----|------|---------|-------------|
|
|-----|------|---------|-------------|
|
||||||
| agentk8sglue | object | `{"apiServerUrlReference":"https://api.clear.ml","clearmlcheckCertificate":true,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileServerUrlReference":"https://files.clear.ml","id":"k8s-agent","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-18"},"maxPods":10,"podTemplate":{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumes":[]},"queue":"default","replicaCount":1,"serviceAccountName":"default","webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
|
| agentk8sglue | object | `{"apiServerUrlReference":"https://api.clear.ml","clearmlcheckCertificate":true,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileServerUrlReference":"https://files.clear.ml","id":"k8s-agent","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-18"},"maxPods":10,"podTemplate":{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"queue":"default","replicaCount":1,"serviceAccountName":"default","webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
|
||||||
| agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url |
|
| agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url |
|
||||||
| agentk8sglue.clearmlcheckCertificate | bool | `true` | Check certificates validity for evefry UrlReference below. |
|
| agentk8sglue.clearmlcheckCertificate | bool | `true` | Check certificates validity for evefry UrlReference below. |
|
||||||
| agentk8sglue.defaultContainerImage | string | `"ubuntu:18.04"` | default container image for ClearML Task pod |
|
| agentk8sglue.defaultContainerImage | string | `"ubuntu:18.04"` | default container image for ClearML Task pod |
|
||||||
@ -34,11 +39,12 @@ Kubernetes: `>= 1.19.0-0 < 1.25.0-0`
|
|||||||
| agentk8sglue.id | string | `"k8s-agent"` | ClearML worker ID (must be unique across the entire ClearMLenvironment) |
|
| agentk8sglue.id | string | `"k8s-agent"` | ClearML worker ID (must be unique across the entire ClearMLenvironment) |
|
||||||
| agentk8sglue.image | object | `{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-18"}` | Glue Agent image configuration |
|
| agentk8sglue.image | object | `{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-18"}` | Glue Agent image configuration |
|
||||||
| agentk8sglue.maxPods | int | `10` | maximum concurrent consume ClearML Task pod |
|
| agentk8sglue.maxPods | int | `10` | maximum concurrent consume ClearML Task pod |
|
||||||
| agentk8sglue.podTemplate | object | `{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumes":[]}` | template for pods spawned to consume ClearML Task |
|
| agentk8sglue.podTemplate | object | `{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | template for pods spawned to consume ClearML Task |
|
||||||
| agentk8sglue.podTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
| agentk8sglue.podTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||||
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||||
| agentk8sglue.podTemplate.resources | object | `{}` | resources declaration for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
| agentk8sglue.podTemplate.resources | object | `{}` | resources declaration for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||||
| agentk8sglue.podTemplate.tolerations | list | `[]` | tolerations setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
| agentk8sglue.podTemplate.tolerations | list | `[]` | tolerations setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||||
|
| agentk8sglue.podTemplate.volumeMounts | list | `[]` | volumeMounts definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||||
| agentk8sglue.podTemplate.volumes | list | `[]` | volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
| agentk8sglue.podTemplate.volumes | list | `[]` | volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||||
| agentk8sglue.queue | string | `"default"` | ClearML queue this agent will consume |
|
| agentk8sglue.queue | string | `"default"` | ClearML queue this agent will consume |
|
||||||
| agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods |
|
| agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods |
|
||||||
@ -58,5 +64,26 @@ Kubernetes: `>= 1.19.0-0 < 1.25.0-0`
|
|||||||
| imageCredentials.registry | string | `"docker.io"` | Registry name |
|
| imageCredentials.registry | string | `"docker.io"` | Registry name |
|
||||||
| imageCredentials.username | string | `"someone"` | Registry username |
|
| imageCredentials.username | string | `"someone"` | Registry username |
|
||||||
|
|
||||||
----------------------------------------------
|
# Upgrading Chart
|
||||||
Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)
|
|
||||||
|
### From v1.x to v2.x
|
||||||
|
|
||||||
|
Chart 1.x was under the assumption that all mounted volumes would be PVC's. Version > 2.x allows for more flexibility and will inject the yaml from podTemplate.volumes and podtemplate.volumeMounts directly.
|
||||||
|
|
||||||
|
v1.x
|
||||||
|
```
|
||||||
|
volumes:
|
||||||
|
- name: "yourvolume"
|
||||||
|
path: "/yourpath"
|
||||||
|
```
|
||||||
|
|
||||||
|
v2.x
|
||||||
|
```
|
||||||
|
volumes:
|
||||||
|
- name: "yourvolume"
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: "yourvolume"
|
||||||
|
volumeMounts:
|
||||||
|
- name: "yourvolume"
|
||||||
|
mountPath: "/yourpath"
|
||||||
|
```
|
||||||
|
45
charts/clearml-agent/README.md.gotmpl
Normal file
45
charts/clearml-agent/README.md.gotmpl
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# ClearML Kubernetes Agent
|
||||||
|
{{ template "chart.deprecationWarning" . }}
|
||||||
|
|
||||||
|
{{ template "chart.badgesSection" . }}
|
||||||
|
|
||||||
|
{{ template "chart.description" . }}
|
||||||
|
|
||||||
|
{{ template "chart.homepageLine" . }}
|
||||||
|
|
||||||
|
{{ template "chart.maintainersSection" . }}
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
The **clearml-agent** is the Kubernetes agent for for [ClearML](https://github.com/allegroai/clearml).
|
||||||
|
It allows you to schedule distributed experiments on a Kubernetes cluster.
|
||||||
|
|
||||||
|
{{ template "chart.sourcesSection" . }}
|
||||||
|
|
||||||
|
{{ template "chart.requirementsSection" . }}
|
||||||
|
|
||||||
|
{{ template "chart.valuesSection" . }}
|
||||||
|
|
||||||
|
# Upgrading Chart
|
||||||
|
|
||||||
|
### From v1.x to v2.x
|
||||||
|
|
||||||
|
Chart 1.x was under the assumption that all mounted volumes would be PVC's. Version > 2.x allows for more flexibility and will inject the yaml from podTemplate.volumes and podtemplate.volumeMounts directly.
|
||||||
|
|
||||||
|
v1.x
|
||||||
|
```
|
||||||
|
volumes:
|
||||||
|
- name: "yourvolume"
|
||||||
|
path: "/yourpath"
|
||||||
|
```
|
||||||
|
|
||||||
|
v2.x
|
||||||
|
```
|
||||||
|
volumes:
|
||||||
|
- name: "yourvolume"
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: "yourvolume"
|
||||||
|
volumeMounts:
|
||||||
|
- name: "yourvolume"
|
||||||
|
mountPath: "/yourpath"
|
||||||
|
```
|
@ -17,21 +17,18 @@ data:
|
|||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }}
|
serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }}
|
||||||
|
{{- with .Values.agentk8sglue.podTemplate.volumes }}
|
||||||
volumes:
|
volumes:
|
||||||
{{- range .Values.agentk8sglue.podTemplate.volumes }}
|
{{- toYaml . | nindent 8 }}
|
||||||
- name: {{ .name }}
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: {{ .name }}
|
|
||||||
{{- end }}
|
{{- end }}
|
||||||
containers:
|
containers:
|
||||||
- resources:
|
- resources:
|
||||||
{{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }}
|
{{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }}
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 10022
|
- containerPort: 10022
|
||||||
|
{{- with .Values.agentk8sglue.podTemplate.volumeMounts }}
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
{{- range .Values.agentk8sglue.podTemplate.volumes }}
|
{{- toYaml . | nindent 10 }}
|
||||||
- mountPath: {{ .path }}
|
|
||||||
name: {{ .name }}
|
|
||||||
{{- end }}
|
{{- end }}
|
||||||
env:
|
env:
|
||||||
- name: CLEARML_API_HOST
|
- name: CLEARML_API_HOST
|
||||||
|
@ -24,26 +24,6 @@ spec:
|
|||||||
- name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-registry-key
|
- name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-registry-key
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
initContainers:
|
|
||||||
- name: init-k8s-glue
|
|
||||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
|
|
||||||
command:
|
|
||||||
- /bin/sh
|
|
||||||
- -c
|
|
||||||
- >
|
|
||||||
set -x;
|
|
||||||
while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.apiServerUrlReference}}/debug.ping" -o /dev/null) -ne 200 ] ; do
|
|
||||||
echo "waiting for apiserver" ;
|
|
||||||
sleep 5 ;
|
|
||||||
done;
|
|
||||||
while [[ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.fileServerUrlReference}}/" -o /dev/null) =~ 403|405 ]] ; do
|
|
||||||
echo "waiting for fileserver" ;
|
|
||||||
sleep 5 ;
|
|
||||||
done;
|
|
||||||
while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.webServerUrlReference}}/" -o /dev/null) -ne 200 ] ; do
|
|
||||||
echo "waiting for webserver" ;
|
|
||||||
sleep 5 ;
|
|
||||||
done
|
|
||||||
containers:
|
containers:
|
||||||
- name: k8s-glue
|
- name: k8s-glue
|
||||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
|
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
|
||||||
|
@ -71,7 +71,12 @@ agentk8sglue:
|
|||||||
# -- volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments)
|
# -- volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||||
volumes: []
|
volumes: []
|
||||||
# - name: "yourvolume"
|
# - name: "yourvolume"
|
||||||
# path: "/yourpath"
|
# persistentVolumeClaim:
|
||||||
|
# claimName: "yourvolume"
|
||||||
|
# -- volumeMounts definition for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||||
|
volumeMounts: []
|
||||||
|
# - name: "yourvolume"
|
||||||
|
# mountPath: "/yourpath"
|
||||||
# -- environment variables for pods spawned to consume ClearML Task (example in values.yaml comments)
|
# -- environment variables for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||||
env: []
|
env: []
|
||||||
# # to setup access to private repo, setup secret with git credentials:
|
# # to setup access to private repo, setup secret with git credentials:
|
||||||
|
Loading…
Reference in New Issue
Block a user