Compare commits

...

13 Commits

Author SHA1 Message Date
Valeriano Manassero
56880de8bb Enable multiple agents installations (#92)
* Changed: dynamic names

* Changed: bump up version
2022-07-15 08:28:40 +02:00
Valeriano Manassero
d778d0ef93 Changed: update helm chart version (#90) 2022-07-13 10:23:34 +02:00
zandolsi-psee
e28a2f991b Fix imagepullsecret error (#89)
* adapt ingress

* update docs

* remove idea

* fix imagepullsecret error
2022-07-13 10:14:16 +02:00
Valeriano Manassero
dc30518c26 ClearML Version upgrade to 1.6.0 (#88)
* Changed: version upgrade

* Changed: helm docs version update

* Changed: helm-docs update
2022-07-12 10:48:16 +02:00
Valeriano Manassero
50237dcb9d Update agent 1.24 (#86)
* Changed: image update to 1.24

* Changed: bump up version
2022-06-23 11:19:03 +02:00
Valeriano Manassero
1b164c2906 Add configurable default base serve url (#83)
* Added: configurable default base serving url

* Changed: chart version bump
2022-06-23 10:46:18 +02:00
Valeriano Manassero
43806b8e21 Add insecure cert check flag (#85)
* Added: clearmlcheckCertificate flag

* Changed: bump chart
2022-06-23 10:43:39 +02:00
Valeriano Manassero
80072c0654 Add editable config for k8s Agent (#84)
* Added: editable configuration

* Changed: bump up version
2022-06-23 09:52:19 +02:00
Valeriano Manassero
e22bd30764 Upgrade to app version 1.5.0 (#81)
* Changed: upgrade to 1.5.0

* Fixed: inject after ct check

* Fixed: list changd

* Fixed: typo
2022-06-23 07:49:45 +02:00
Valeriano Manassero
84a003b7bc Fix fileserver check on Agent (#82)
* Fixed: fileserver check

* Changed: version bump
2022-06-22 16:52:51 +02:00
Valeriano Manassero
1d95f0c27f Pullsecrets pod template (#80)
* Added: pullsecrets management for pod template

* Changed: version bump
2022-06-22 15:52:14 +02:00
Valeriano Manassero
562815e97a ClearML standalone agent chart (#79)
* Added: agent chart

* Changed: base image tag

* Changed: updated helm-docs

* Fixed: maintainers

* Changed: updated radme

* Fixed: http code to check

* Fixed: default values to check

* Changed: updated helm-docs

* Added: default values to be substituted by GH action

* Added: sed on the fly for testing

* Changed: updated CI images for Kind
2022-06-08 10:01:33 +02:00
Leon Rotim
e317610397 Fix linebreak formatting (#77)
* fix wrong line break delete

* version, bugfix inc

* regenerate README.md

Co-authored-by: Valeriano Manassero <14011549+valeriano-manassero@users.noreply.github.com>
2022-06-08 08:16:52 +02:00
31 changed files with 511 additions and 619 deletions

View File

@@ -1,7 +1,7 @@
#!/bin/bash
CHART_DIRS="$(git diff --find-renames --name-only "$(git rev-parse --abbrev-ref HEAD)" remotes/origin/main -- 'charts' | grep '[cC]hart.yaml' | sed -e 's#/[Cc]hart.yaml##g')"
HELM_DOCS_VERSION="1.10.0"
HELM_DOCS_VERSION="1.11.0"
curl --silent --show-error --fail --location --output /tmp/helm-docs.tar.gz https://github.com/norwoodj/helm-docs/releases/download/v"${HELM_DOCS_VERSION}"/helm-docs_"${HELM_DOCS_VERSION}"_Linux_x86_64.tar.gz
tar -xf /tmp/helm-docs.tar.gz helm-docs

View File

@@ -21,16 +21,16 @@ jobs:
strategy:
matrix:
k8s:
- v1.21.2
- v1.22.1
- v1.23.0
- v1.22.7
- v1.23.6
- v1.24.0
steps:
- name: Checkout
uses: actions/checkout@v1
- name: Create kind ${{ matrix.k8s }} cluster
uses: helm/kind-action@v1.1.0
uses: helm/kind-action@v1.2.0
with:
version: v0.11.1
version: v0.13.0
node_image: kindest/node:${{ matrix.k8s }}
- name: Set up chart-testing
uses: helm/chart-testing-action@v2.2.1
@@ -40,6 +40,13 @@ jobs:
changed=$(ct list-changed --chart-dirs=charts --target-branch=main)
if [[ -n "$changed" ]]; then
echo "::set-output name=changed::true"
echo "::set-output name=changed_charts::\"${changed//$'\n'/,}\""
fi
- name: Inject secrets
run: |
find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUEKEY/${{ secrets.agentk8sglueKey }}/g" {} \;
find ./charts/*/ci/*.yaml -type f -exec sed -i "s/AGENTK8SGLUESECRET/${{ secrets.agentk8sglueSecret }}/g" {} \;
if: steps.list-changed.outputs.changed == 'true'
- name: Run chart-testing (lint and install)
run: ct lint-and-install --chart-dirs=charts --target-branch=main --helm-extra-args="--timeout=15m" --debug=true
run: ct lint-and-install --chart-dirs=charts --target-branch=main --helm-extra-args="--timeout=15m" --charts=${{steps.list-changed.outputs.changed_charts}} --debug=true
if: steps.list-changed.outputs.changed == 'true'

View File

@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@@ -0,0 +1,19 @@
apiVersion: v2
name: clearml-agent
description: MLOps platform
type: application
version: "1.2.0"
appVersion: "1.24"
kubeVersion: ">= 1.19.0-0 < 1.25.0-0"
home: https://clear.ml
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
sources:
- https://github.com/allegroai/clearml-helm-charts
- https://github.com/allegroai/clearml
maintainers:
- name: valeriano-manassero
url: https://github.com/valeriano-manassero
keywords:
- clearml
- "machine learning"
- mlops

View File

@@ -0,0 +1,59 @@
# clearml-agent
![Version: 1.2.0](https://img.shields.io/badge/Version-1.2.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square)
MLOps platform
**Homepage:** <https://clear.ml>
## Maintainers
| Name | Email | Url |
| ---- | ------ | --- |
| valeriano-manassero | | <https://github.com/valeriano-manassero> |
## Source Code
* <https://github.com/allegroai/clearml-helm-charts>
* <https://github.com/allegroai/clearml>
## Requirements
Kubernetes: `>= 1.19.0-0 < 1.25.0-0`
## Values
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| agentk8sglue | object | `{"apiServerUrlReference":"https://api.clear.ml","clearmlcheckCertificate":true,"defaultContainerImage":"ubuntu:18.04","fileServerUrlReference":"https://files.clear.ml","id":"k8s-agent","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-18"},"maxPods":10,"podTemplate":{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumes":[]},"queue":"default","replicaCount":1,"serviceAccountName":"default","webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
| agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url |
| agentk8sglue.clearmlcheckCertificate | bool | `true` | Check certificates validity for evefry UrlReference below. |
| agentk8sglue.defaultContainerImage | string | `"ubuntu:18.04"` | default container image for ClearML Task pod |
| agentk8sglue.fileServerUrlReference | string | `"https://files.clear.ml"` | Reference to File server url |
| agentk8sglue.id | string | `"k8s-agent"` | ClearML worker ID (must be unique across the entire ClearMLenvironment) |
| agentk8sglue.image | object | `{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-18"}` | Glue Agent image configuration |
| agentk8sglue.maxPods | int | `10` | maximum concurrent consume ClearML Task pod |
| agentk8sglue.podTemplate | object | `{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumes":[]}` | template for pods spawned to consume ClearML Task |
| agentk8sglue.podTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) |
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
| agentk8sglue.podTemplate.resources | object | `{}` | resources declaration for pods spawned to consume ClearML Task (example in values.yaml comments) |
| agentk8sglue.podTemplate.tolerations | list | `[]` | tolerations setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
| agentk8sglue.podTemplate.volumes | list | `[]` | volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
| agentk8sglue.queue | string | `"default"` | ClearML queue this agent will consume |
| agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods |
| agentk8sglue.serviceAccountName | string | `"default"` | serviceAccountName for pods spawned to consume ClearML Task |
| agentk8sglue.webServerUrlReference | string | `"https://app.clear.ml"` | Reference to Web server url |
| clearml | object | `{"agentk8sglueKey":"ACCESSKEY","agentk8sglueSecret":"SECRETKEY","clearmlConfig":"sdk {\n}"}` | ClearMl generic configurations |
| clearml.agentk8sglueKey | string | `"ACCESSKEY"` | Agent k8s Glue basic auth key |
| clearml.agentk8sglueSecret | string | `"SECRETKEY"` | Agent k8s Glue basic auth secret |
| clearml.clearmlConfig | string | `"sdk {\n}"` | ClearML configuration file |
| imageCredentials | object | `{"email":"someone@host.com","enabled":false,"existingSecret":"","password":"pwd","registry":"docker.io","username":"someone"}` | Private image registry configuration |
| imageCredentials.email | string | `"someone@host.com"` | Email |
| imageCredentials.enabled | bool | `false` | Use private authentication mode |
| imageCredentials.existingSecret | string | `""` | If this is set, chart will not generate a secret but will use what is defined here |
| imageCredentials.password | string | `"pwd"` | Registry password |
| imageCredentials.registry | string | `"docker.io"` | Registry name |
| imageCredentials.username | string | `"someone"` | Registry username |
----------------------------------------------
Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)

View File

@@ -0,0 +1,3 @@
clearml:
agentk8sglueKey: "AGENTK8SGLUEKEY"
agentk8sglueSecret: "AGENTK8SGLUESECRET"

View File

@@ -0,0 +1 @@
Glue Agent deployed.

View File

@@ -0,0 +1,86 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "clearml.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "clearml.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "clearml.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "clearml.labels" -}}
helm.sh/chart: {{ include "clearml.chart" . }}
{{ include "clearml.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "clearml.selectorLabels" -}}
app.kubernetes.io/name: {{ include "clearml.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Reference Name (agentk8sglue)
*/}}
{{- define "agentk8sglue.referenceName" -}}
{{- include "clearml.name" . }}-agentk8sglue
{{- end }}
{{/*
Selector labels (agentk8sglue)
*/}}
{{- define "agentk8sglue.selectorLabels" -}}
app.kubernetes.io/name: {{ include "clearml.name" . }}
app.kubernetes.io/instance: {{ include "agentk8sglue.referenceName" . }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "clearml.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "clearml.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
{{/*
Create secret to access docker registry
*/}}
{{- define "imagePullSecret" }}
{{- with .Values.imageCredentials }}
{{- printf "{\"auths\":{\"%s\":{\"username\":\"%s\",\"password\":\"%s\",\"email\":\"%s\",\"auth\":\"%s\"}}}" .registry .username .password .email (printf "%s:%s" .username .password | b64enc) | b64enc }}
{{- end }}
{{- end }}

View File

@@ -1,14 +1,21 @@
{{- if .Values.agentk8sglue.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: k8sagent-pod-template
name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pod-template
data:
template.yaml: |
apiVersion: v1
metadata:
namespace: {{ .Release.Namespace }}
spec:
{{- if .Values.imageCredentials.enabled }}
imagePullSecrets:
{{- if .Values.imageCredentials.existingSecret }}
- name: .Values.imageCredentials.existingSecret
{{- else }}
- name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-registry-key
{{- end }}
{{- end }}
serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }}
volumes:
{{- range .Values.agentk8sglue.podTemplate.volumes }}
@@ -28,21 +35,21 @@ data:
{{- end }}
env:
- name: CLEARML_API_HOST
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
value: {{.Values.agentk8sglue.apiServerUrlReference}}
- name: CLEARML_WEB_HOST
value: "http://{{ include "clearml.fullname" . }}-webserver"
value: {{.Values.agentk8sglue.webServerUrlReference}}
- name: CLEARML_FILES_HOST
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
value: {{.Values.agentk8sglue.fileServerUrlReference}}
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: apiserver_key
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-conf
key: agentk8sglue_key
- name: CLEARML_API_SECRET_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: apiserver_secret
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-conf
key: agentk8sglue_secret
{{- if .Values.agentk8sglue.podTemplate.env }}
{{ toYaml .Values.agentk8sglue.podTemplate.env | nindent 8 }}
{{- end }}
@@ -54,4 +61,3 @@ data:
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,105 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "agentk8sglue.referenceName" . }}
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.agentk8sglue.replicaCount }}
selector:
matchLabels:
{{- include "agentk8sglue.selectorLabels" . | nindent 6 }}
template:
metadata:
annotations:
checksum/config: {{ printf "%s%s" .Values.clearml .Values.agentk8sglue | sha256sum }}
labels:
{{- include "agentk8sglue.selectorLabels" . | nindent 8 }}
spec:
{{- if .Values.imageCredentials.enabled }}
imagePullSecrets:
{{- if .Values.imageCredentials.existingSecret }}
- name: .Values.imageCredentials.existingSecret
{{- else }}
- name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-registry-key
{{- end }}
{{- end }}
initContainers:
- name: init-k8s-glue
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
command:
- /bin/sh
- -c
- >
set -x;
while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.apiServerUrlReference}}/debug.ping" -o /dev/null) -ne 200 ] ; do
echo "waiting for apiserver" ;
sleep 5 ;
done;
while [[ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.fileServerUrlReference}}/" -o /dev/null) =~ 403|405 ]] ; do
echo "waiting for fileserver" ;
sleep 5 ;
done;
while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.webServerUrlReference}}/" -o /dev/null) -ne 200 ] ; do
echo "waiting for webserver" ;
sleep 5 ;
done
containers:
- name: k8s-glue
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
imagePullPolicy: IfNotPresent
command: ["/bin/bash", "-c", "export PATH=$PATH:$HOME/bin; source /root/.bashrc && /root/entrypoint.sh"]
volumeMounts:
- name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pod-template
mountPath: /root/template
{{ if .Values.clearml.clearmlConfig }}
- name: k8sagent-clearml-conf-volume
mountPath: /root/clearml.conf
subPath: clearml.conf
readOnly: true
{{- end }}
env:
- name: CLEARML_API_HOST
value: "{{.Values.agentk8sglue.apiServerUrlReference}}"
- name: CLEARML_WEB_HOST
value: "{{.Values.agentk8sglue.webServerUrlReference}}"
- name: CLEARML_FILES_HOST
value: "{{.Values.agentk8sglue.fileServerUrlReference}}"
- name: K8S_GLUE_MAX_PODS
value: "{{.Values.agentk8sglue.maxPods}}"
- name: K8S_GLUE_QUEUE
value: "{{.Values.agentk8sglue.queue}}"
- name: K8S_GLUE_EXTRA_ARGS
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml"
- name: K8S_DEFAULT_NAMESPACE
value: "{{ .Release.Namespace }}"
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef:
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-conf
key: agentk8sglue_key
- name: CLEARML_API_SECRET_KEY
valueFrom:
secretKeyRef:
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-conf
key: agentk8sglue_secret
- name: CLEARML_WORKER_ID
value: "{{.Values.agentk8sglue.id}}"
- name: CLEARML_AGENT_UPDATE_REPO
value: ""
- name: FORCE_CLEARML_AGENT_REPO
value: ""
- name: CLEARML_DOCKER_IMAGE
value: "{{.Values.agentk8sglue.defaultContainerImage}}"
volumes:
- name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pod-template
configMap:
name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pod-template
{{ if .Values.clearml.clearmlConfig }}
- name: k8sagent-clearml-conf-volume
secret:
secretName: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-conf
items:
- key: clearml.conf
path: clearml.conf
{{ end }}

View File

@@ -1,8 +1,7 @@
{{- if .Values.agentk8sglue.enabled }}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: k8sagent-pods-access
name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pods-access
rules:
- apiGroups:
- ""
@@ -13,7 +12,7 @@ rules:
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: k8sagent-pods-access
name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pods-access
subjects:
- kind: ServiceAccount
name: default
@@ -21,5 +20,4 @@ subjects:
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: k8sagent-pods-access
{{- end }}
name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pods-access

View File

@@ -0,0 +1,20 @@
apiVersion: v1
kind: Secret
metadata:
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-conf
data:
agentk8sglue_key: {{ .Values.clearml.agentk8sglueKey | b64enc }}
agentk8sglue_secret: {{ .Values.clearml.agentk8sglueSecret | b64enc }}
clearml.conf: {{ .Values.clearml.clearmlConfig | b64enc }}
---
{{- if .Values.imageCredentials.enabled }}
{{- if not .Values.imageCredentials.existingSecret }}
apiVersion: v1
kind: Secret
metadata:
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-registry-key
type: kubernetes.io/dockerconfigjson
data:
.dockerconfigjson: {{ template "imagePullSecret" . }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,88 @@
# -- Private image registry configuration
imageCredentials:
# -- Use private authentication mode
enabled: false
# -- If this is set, chart will not generate a secret but will use what is defined here
existingSecret: ""
# -- Registry name
registry: docker.io
# -- Registry username
username: someone
# -- Registry password
password: pwd
# -- Email
email: someone@host.com
# -- ClearMl generic configurations
clearml:
# -- Agent k8s Glue basic auth key
agentk8sglueKey: "ACCESSKEY"
# -- Agent k8s Glue basic auth secret
agentk8sglueSecret: "SECRETKEY"
# -- ClearML configuration file
clearmlConfig: |-
sdk {
}
# -- This agent will spawn queued experiments in new pods, a good use case is to combine this with
# GPU autoscaling nodes.
# https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue
agentk8sglue:
# -- Glue Agent image configuration
image:
repository: "allegroai/clearml-agent-k8s-base"
tag: "1.24-18"
# -- Glue Agent number of pods
replicaCount: 1
# -- Check certificates validity for evefry UrlReference below.
clearmlcheckCertificate: true
# -- Reference to Api server url
apiServerUrlReference: "https://api.clear.ml"
# -- Reference to File server url
fileServerUrlReference: "https://files.clear.ml"
# -- Reference to Web server url
webServerUrlReference: "https://app.clear.ml"
# -- serviceAccountName for pods spawned to consume ClearML Task
serviceAccountName: default
# -- maximum concurrent consume ClearML Task pod
maxPods: 10
# -- default container image for ClearML Task pod
defaultContainerImage: ubuntu:18.04
# -- ClearML queue this agent will consume
queue: default
# -- ClearML worker ID (must be unique across the entire ClearMLenvironment)
id: k8s-agent
# -- template for pods spawned to consume ClearML Task
podTemplate:
# -- volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments)
volumes: []
# - name: "yourvolume"
# path: "/yourpath"
# -- environment variables for pods spawned to consume ClearML Task (example in values.yaml comments)
env: []
# # to setup access to private repo, setup secret with git credentials:
# - name: CLEARML_AGENT_GIT_USER
# value: mygitusername
# - name: CLEARML_AGENT_GIT_PASS
# valueFrom:
# secretKeyRef:
# name: git-password
# key: git-password
# -- resources declaration for pods spawned to consume ClearML Task (example in values.yaml comments)
resources: {}
# limits:
# nvidia.com/gpu: 1
# -- tolerations setup for pods spawned to consume ClearML Task (example in values.yaml comments)
tolerations: []
# - key: "nvidia.com/gpu"
# operator: Exists
# effect: "NoSchedule"
# -- nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments)
nodeSelector: {}
# fleet: gpu-nodes

View File

@@ -2,7 +2,7 @@ apiVersion: v2
name: clearml-serving
description: ClearML Serving Helm Chart
type: application
version: 0.3.0
version: 0.4.0
appVersion: "0.9.0"
maintainers:
- name: valeriano-manassero

View File

@@ -1,6 +1,6 @@
# clearml-serving
![Version: 0.3.0](https://img.shields.io/badge/Version-0.3.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.9.0](https://img.shields.io/badge/AppVersion-0.9.0-informational?style=flat-square)
![Version: 0.4.0](https://img.shields.io/badge/Version-0.4.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.9.0](https://img.shields.io/badge/AppVersion-0.9.0-informational?style=flat-square)
ClearML Serving Helm Chart
@@ -23,6 +23,7 @@ ClearML Serving Helm Chart
| clearml.apiAccessKey | string | `"ClearML API Access Key"` | |
| clearml.apiHost | string | `"http://clearml-server-apiserver:8008"` | |
| clearml.apiSecretKey | string | `"ClearML API Secret Key"` | |
| clearml.defaultBaseServeUrl | string | `"http://127.0.0.1:8080/serve"` | |
| clearml.filesHost | string | `"http://clearml-server-fileserver:8081"` | |
| clearml.servingTaskId | string | `"ClearML Serving Task ID"` | |
| clearml.webHost | string | `"http://clearml-server-webserver:80"` | |

View File

@@ -35,7 +35,7 @@ spec:
- name: CLEARML_SERVING_POLL_FREQ
value: "1.0"
- name: CLEARML_DEFAULT_BASE_SERVE_URL
value: http://127.0.0.1:8080/serve
value: "{{ .Values.clearml.defaultBaseServeUrl }}"
- name: CLEARML_DEFAULT_TRITON_GRPC_ADDR
{{- if .Values.clearml_serving_triton.enabled }}
value: "clearml-serving-triton:8001"

View File

@@ -6,6 +6,7 @@ clearml:
apiHost: http://clearml-server-apiserver:8008
filesHost: http://clearml-server-fileserver:8081
webHost: http://clearml-server-webserver:80
defaultBaseServeUrl: http://127.0.0.1:8080/serve
servingTaskId: "ClearML Serving Task ID"
zookeeper:

View File

@@ -2,8 +2,8 @@ apiVersion: v2
name: clearml
description: MLOps platform
type: application
version: "3.10.4"
appVersion: "1.4.0"
version: "4.1.1"
appVersion: "1.6.0"
home: https://clear.ml
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
sources:

View File

@@ -1,6 +1,6 @@
# ClearML Ecosystem for Kubernetes
![Version: 3.10.4](https://img.shields.io/badge/Version-3.10.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.4.0](https://img.shields.io/badge/AppVersion-1.4.0-informational?style=flat-square)
![Version: 4.1.1](https://img.shields.io/badge/Version-4.1.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.6.0](https://img.shields.io/badge/AppVersion-1.6.0-informational?style=flat-square)
MLOps platform
@@ -129,94 +129,6 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| agentGroups.agent-group-cpu.affinity | object | `{}` | |
| agentGroups.agent-group-cpu.agentVersion | string | `""` | |
| agentGroups.agent-group-cpu.awsAccessKeyId | string | `nil` | |
| agentGroups.agent-group-cpu.awsDefaultRegion | string | `nil` | |
| agentGroups.agent-group-cpu.awsSecretAccessKey | string | `nil` | |
| agentGroups.agent-group-cpu.azureStorageAccount | string | `nil` | |
| agentGroups.agent-group-cpu.azureStorageKey | string | `nil` | |
| agentGroups.agent-group-cpu.clearmlAccessKey | string | `nil` | |
| agentGroups.agent-group-cpu.clearmlConfig | string | `"sdk {\n}"` | |
| agentGroups.agent-group-cpu.clearmlGitPassword | string | `nil` | |
| agentGroups.agent-group-cpu.clearmlGitUser | string | `nil` | |
| agentGroups.agent-group-cpu.clearmlSecretKey | string | `nil` | |
| agentGroups.agent-group-cpu.enabled | bool | `false` | |
| agentGroups.agent-group-cpu.extraEnvs | list | `[]` | |
| agentGroups.agent-group-cpu.image.pullPolicy | string | `"IfNotPresent"` | |
| agentGroups.agent-group-cpu.image.repository | string | `"ubuntu"` | |
| agentGroups.agent-group-cpu.image.tag | string | `"18.04"` | |
| agentGroups.agent-group-cpu.name | string | `"agent-group-cpu"` | |
| agentGroups.agent-group-cpu.nodeSelector | object | `{}` | |
| agentGroups.agent-group-cpu.nvidiaGpusPerAgent | int | `0` | |
| agentGroups.agent-group-cpu.podAnnotations | object | `{}` | |
| agentGroups.agent-group-cpu.queues | string | `"default"` | |
| agentGroups.agent-group-cpu.replicaCount | int | `1` | |
| agentGroups.agent-group-cpu.tolerations | list | `[]` | |
| agentGroups.agent-group-cpu.updateStrategy | string | `"Recreate"` | |
| agentGroups.agent-group-gpu.affinity | object | `{}` | |
| agentGroups.agent-group-gpu.agentVersion | string | `""` | |
| agentGroups.agent-group-gpu.awsAccessKeyId | string | `nil` | |
| agentGroups.agent-group-gpu.awsDefaultRegion | string | `nil` | |
| agentGroups.agent-group-gpu.awsSecretAccessKey | string | `nil` | |
| agentGroups.agent-group-gpu.azureStorageAccount | string | `nil` | |
| agentGroups.agent-group-gpu.azureStorageKey | string | `nil` | |
| agentGroups.agent-group-gpu.clearmlAccessKey | string | `nil` | |
| agentGroups.agent-group-gpu.clearmlConfig | string | `"sdk {\n}"` | |
| agentGroups.agent-group-gpu.clearmlGitPassword | string | `nil` | |
| agentGroups.agent-group-gpu.clearmlGitUser | string | `nil` | |
| agentGroups.agent-group-gpu.clearmlSecretKey | string | `nil` | |
| agentGroups.agent-group-gpu.enabled | bool | `false` | |
| agentGroups.agent-group-gpu.image.pullPolicy | string | `"IfNotPresent"` | |
| agentGroups.agent-group-gpu.image.repository | string | `"nvidia/cuda"` | |
| agentGroups.agent-group-gpu.image.tag | string | `"11.0-base-ubuntu18.04"` | |
| agentGroups.agent-group-gpu.name | string | `"agent-group-gpu"` | |
| agentGroups.agent-group-gpu.nodeSelector | object | `{}` | |
| agentGroups.agent-group-gpu.nvidiaGpusPerAgent | int | `1` | |
| agentGroups.agent-group-gpu.podAnnotations | object | `{}` | |
| agentGroups.agent-group-gpu.queues | string | `"default"` | |
| agentGroups.agent-group-gpu.replicaCount | int | `0` | |
| agentGroups.agent-group-gpu.tolerations | list | `[]` | |
| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | |
| agentk8sglue.defaultDockerImage | string | `"nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04"` | |
| agentk8sglue.enabled | bool | `true` | |
| agentk8sglue.id | string | `"k8s-agent"` | |
| agentk8sglue.image.repository | string | `"allegroai/clearml-agent-k8s"` | |
| agentk8sglue.image.tag | string | `"base-1.21"` | |
| agentk8sglue.maxPods | int | `10` | |
| agentk8sglue.podTemplate.env | list | `[]` | |
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | |
| agentk8sglue.podTemplate.resources | object | `{}` | |
| agentk8sglue.podTemplate.tolerations | list | `[]` | |
| agentk8sglue.podTemplate.volumes | list | `[]` | |
| agentk8sglue.queue | string | `"default"` | |
| agentk8sglue.serviceAccountName | string | `"default"` | |
| agentservices.affinity | object | `{}` | |
| agentservices.agentVersion | string | `""` | |
| agentservices.awsAccessKeyId | string | `nil` | |
| agentservices.awsDefaultRegion | string | `nil` | |
| agentservices.awsSecretAccessKey | string | `nil` | |
| agentservices.azureStorageAccount | string | `nil` | |
| agentservices.azureStorageKey | string | `nil` | |
| agentservices.clearmlFilesHost | string | `nil` | |
| agentservices.clearmlGitPassword | string | `nil` | |
| agentservices.clearmlGitUser | string | `nil` | |
| agentservices.clearmlHostIp | string | `nil` | |
| agentservices.clearmlWebHost | string | `nil` | |
| agentservices.clearmlWorkerId | string | `"clearml-services"` | |
| agentservices.enabled | bool | `false` | |
| agentservices.extraEnvs | list | `[]` | |
| agentservices.googleCredentials | string | `nil` | |
| agentservices.image.pullPolicy | string | `"IfNotPresent"` | |
| agentservices.image.repository | string | `"allegroai/clearml-agent-services"` | |
| agentservices.image.tag | string | `"latest"` | |
| agentservices.nodeSelector | object | `{}` | |
| agentservices.podAnnotations | object | `{}` | |
| agentservices.replicaCount | int | `1` | |
| agentservices.resources | object | `{}` | |
| agentservices.storage.data.class | string | `""` | |
| agentservices.storage.data.size | string | `"50Gi"` | |
| agentservices.tolerations | list | `[]` | |
| apiserver.additionalConfigs | object | `{}` | additional configurations that can be used by api server; check examples in values.yaml file |
| apiserver.affinity | object | `{}` | |
| apiserver.authCookiesMaxAge | int | `864000` | Amount of seconds the authorization cookie will last in user browser |
@@ -224,7 +136,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| apiserver.extraEnvs | list | `[]` | |
| apiserver.image.pullPolicy | string | `"IfNotPresent"` | |
| apiserver.image.repository | string | `"allegroai/clearml"` | |
| apiserver.image.tag | string | `"1.4.0"` | |
| apiserver.image.tag | string | `"1.6.0"` | |
| apiserver.livenessDelay | int | `60` | |
| apiserver.nodeSelector | object | `{}` | |
| apiserver.podAnnotations | object | `{}` | |
@@ -238,7 +150,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| apiserver.service.port | int | `8008` | |
| apiserver.service.type | string | `"NodePort"` | This will set to service's spec.type field |
| apiserver.tolerations | list | `[]` | |
| clearml.defaultCompany | string | `"d1bd92a3b039400cbafc60a7a5b1e52b"` | |
| clearml | object | `{"defaultCompany":"d1bd92a3b039400cbafc60a7a5b1e52b"}` | ClearMl generic configurations |
| elasticsearch.clusterHealthCheckParams | string | `"wait_for_status=yellow&timeout=1s"` | |
| elasticsearch.clusterName | string | `"clearml-elastic"` | |
| elasticsearch.enabled | bool | `true` | |
@@ -284,7 +196,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| fileserver.extraEnvs | list | `[]` | |
| fileserver.image.pullPolicy | string | `"IfNotPresent"` | |
| fileserver.image.repository | string | `"allegroai/clearml"` | |
| fileserver.image.tag | string | `"1.4.0"` | |
| fileserver.image.tag | string | `"1.6.0"` | |
| fileserver.nodeSelector | object | `{}` | |
| fileserver.podAnnotations | object | `{}` | |
| fileserver.replicaCount | int | `1` | |
@@ -295,6 +207,13 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| fileserver.storage.data.class | string | `""` | |
| fileserver.storage.data.size | string | `"50Gi"` | |
| fileserver.tolerations | list | `[]` | |
| imageCredentials | object | `{"email":"someone@host.com","enabled":false,"existingSecret":"","password":"pwd","registry":"docker.io","username":"someone"}` | Private image registry configuration |
| imageCredentials.email | string | `"someone@host.com"` | Email |
| imageCredentials.enabled | bool | `false` | Use private authentication mode |
| imageCredentials.existingSecret | string | `""` | If this is set, chart will not generate a secret but will use what is defined here |
| imageCredentials.password | string | `"pwd"` | Registry password |
| imageCredentials.registry | string | `"docker.io"` | Registry name |
| imageCredentials.username | string | `"someone"` | Registry username |
| ingress.annotations | object | `{}` | |
| ingress.api.annotations | object | `{}` | |
| ingress.api.enabled | bool | `false` | |
@@ -343,7 +262,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| webserver.extraEnvs | list | `[]` | |
| webserver.image.pullPolicy | string | `"IfNotPresent"` | |
| webserver.image.repository | string | `"allegroai/clearml"` | |
| webserver.image.tag | string | `"1.4.0"` | |
| webserver.image.tag | string | `"1.6.0"` | |
| webserver.nodeSelector | object | `{}` | |
| webserver.podAnnotations | object | `{}` | |
| webserver.replicaCount | int | `1` | |

View File

@@ -144,13 +144,6 @@ Create the name of the Files service to use
{{/*
Return the proper Docker Image Registry Secret Names
*/}}
{{- define "clearml.imagePullSecrets" -}}
{{- if .Values.global }}
{{- if .Values.global.imagePullSecrets }}
imagePullSecrets:
{{- range .Values.global.imagePullSecrets }}
- name: {{ . }}
{{- define "imagePullSecret" }}
{{- printf "{\"auths\": {\"%s\": {\"auth\": \"%s\"}}}" .Values.imageCredentials.registry (printf "%s:%s" .Values.imageCredentials.username .Values.imageCredentials.password | b64enc) | b64enc }}
{{- end }}
{{- end -}}
{{- end -}}
{{- end -}}

View File

@@ -1,122 +0,0 @@
{{- range $key, $value := .Values.agentGroups }}
{{- with $value }}
{{- if .enabled }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "clearml.fullname" $ }}-{{ .name }}-agent
labels:
{{- include "clearml.labels" $ | nindent 4 }}
spec:
replicas: {{ .replicaCount }}
strategy:
type: {{ .updateStrategy }}
selector:
matchLabels:
{{- include "clearml.selectorLabelsAgent" $ | nindent 6 }}
template:
metadata:
annotations:
checksum/secret: {{ include (print $.Template.BasePath "/secrets.yaml") $ | sha256sum }}
{{- with .podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "clearml.selectorLabelsAgent" $ | nindent 8 }}
spec:
volumes:
{{ if .clearmlConfig }}
- name: agent-clearml-conf-volume
secret:
secretName: {{ .name }}-conf
items:
- key: clearml.conf
path: clearml.conf
{{ end }}
initContainers:
- name: init-agent-{{ .name }}
image: "{{ .image.repository }}:{{ .image.tag | default $.Chart.AppVersion }}"
command:
- /bin/sh
- -c
- >
set -x;
while [ $(curl -sw '%{http_code}' "{{ include "clearml.serviceApi" $ }}/debug.ping" -o /dev/null) -ne 200 ] ; do
echo "waiting for apiserver" ;
sleep 5 ;
done
containers:
- name: {{ $.Chart.Name }}-{{ .name }}
image: "{{ .image.repository }}:{{ .image.tag }}"
imagePullPolicy: {{ .image.pullPolicy }}
securityContext:
privileged: true
resources:
limits:
nvidia.com/gpu:
{{ .nvidiaGpusPerAgent }}
env:
- name: CLEARML_API_HOST
value: {{ include "clearml.serviceApi" $ }}
- name: CLEARML_WEB_HOST
value: {{ include "clearml.serviceApp" $ }}
- name: CLEARML_FILES_HOST
value: {{ include "clearml.serviceFiles" $ }}
- name: CLEARML_AGENT_GIT_USER
value: {{ .clearmlGitUser}}
- name: CLEARML_AGENT_GIT_PASS
value: {{ .clearmlGitPassword}}
- name: AWS_ACCESS_KEY_ID
value: {{ .awsAccessKeyId}}
- name: AWS_SECRET_ACCESS_KEY
value: {{ .awsSecretAccessKey}}
- name: AWS_DEFAULT_REGION
value: {{ .awsDefaultRegion}}
- name: AZURE_STORAGE_ACCOUNT
value: {{ .azureStorageAccount}}
- name: AZURE_STORAGE_KEY
value: {{ .azureStorageKey}}
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_key
- name: CLEARML_API_SECRET_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_secret
{{- if .extraEnvs }}
{{ toYaml .extraEnvs | nindent 10 }}
{{- end }}
command:
- /bin/sh
- -c
- "apt-get update ;
apt-get install -y curl python3-pip git;
python3 -m pip install -U pip ;
python3 -m pip install clearml-agent{{ .agentVersion}} ;
CLEARML_AGENT_K8S_HOST_MOUNT=/root/.clearml:/root/.clearml clearml-agent daemon --foreground --queue {{ .queues}}"
{{ if .clearmlConfig }}
volumeMounts:
- name: agent-clearml-conf-volume
mountPath: /root/clearml.conf
subPath: clearml.conf
readOnly: true
{{- end }}
{{- with .nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@@ -1,64 +0,0 @@
{{- if .Values.agentk8sglue.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: "{{ include "clearml.fullname" . }}-k8sagent"
labels:
app: k8sagent
spec:
replicas: 1
selector:
matchLabels:
app: k8sagent
template:
metadata:
labels:
app: k8sagent
spec:
containers:
- name: k8s-glue-container
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
imagePullPolicy: IfNotPresent
command: ["/bin/bash", "-c", "export PATH=$PATH:$HOME/bin; source /root/.bashrc && /root/entrypoint.sh"]
volumeMounts:
- name: k8sagent-pod-template
mountPath: /root/template
env:
- name: CLEARML_API_HOST
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
- name: CLEARML_WEB_HOST
value: "http://{{ include "clearml.fullname" . }}-webserver"
- name: CLEARML_FILES_HOST
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
- name: K8S_GLUE_MAX_PODS
value: "{{.Values.agentk8sglue.maxPods}}"
- name: K8S_GLUE_QUEUE
value: "{{.Values.agentk8sglue.queue}}"
- name: K8S_GLUE_EXTRA_ARGS
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml"
- name: K8S_DEFAULT_NAMESPACE
value: "{{ .Release.Namespace }}"
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: apiserver_key
- name: CLEARML_API_SECRET_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: apiserver_secret
- name: CLEARML_WORKER_ID
value: "{{.Values.agentk8sglue.id}}"
- name: CLEARML_AGENT_UPDATE_REPO
value: ""
- name: FORCE_CLEARML_AGENT_REPO
value: ""
- name: CLEARML_DOCKER_IMAGE
value: "{{.Values.agentk8sglue.defaultDockerImage}}"
volumes:
- name: k8sagent-pod-template
configMap:
name: k8sagent-pod-template
{{- end }}

View File

@@ -1,106 +0,0 @@
{{- if .Values.agentservices.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "clearml.fullname" . }}-agentservices
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.agentservices.replicaCount }}
selector:
matchLabels:
{{- include "clearml.selectorLabelsAgentServices" . | nindent 6 }}
template:
metadata:
annotations:
checksum/secret: {{ include (print $.Template.BasePath "/secrets.yaml") . | sha256sum }}
{{- with .Values.agentservices.podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "clearml.selectorLabelsAgentServices" . | nindent 8 }}
spec:
volumes:
- name: agentservices-data
persistentVolumeClaim:
claimName: {{ include "clearml.fullname" . }}-agentservices-data
initContainers:
- name: init-agentservices
image: "{{ .Values.agentservices.image.repository }}:{{ .Values.agentservices.image.tag | default .Chart.AppVersion }}"
command:
- /bin/sh
- -c
- >
set -x;
while [ $(curl -sw '%{http_code}' "{{ include "clearml.serviceApi" $ }}/debug.ping" -o /dev/null) -ne 200 ] ; do
echo "waiting for apiserver" ;
sleep 5 ;
done
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.agentservices.image.repository }}:{{ .Values.agentservices.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.agentservices.image.pullPolicy }}
env:
- name: CLEARML_HOST_IP
value: {{ .Values.agentservices.clearmlHostIp }}
- name: CLEARML_API_HOST
value: {{ include "clearml.serviceApi" $ }}
- name: CLEARML_WEB_HOST
value: {{ .Values.agentservices.clearmlWebHost }}
- name: CLEARML_FILES_HOST
value: {{ .Values.agentservices.clearmlFilesHost }}
- name: CLEARML_AGENT_GIT_USER
value: {{ .Values.agentservices.clearmlGitUser }}
- name: CLEARML_AGENT_GIT_PASS
value: {{ .Values.agentservices.clearmlGitPassword }}
- name: CLEARML_AGENT_UPDATE_VERSION
value: {{ .Values.agentservices.agentVersion }}
- name: CLEARML_AGENT_DEFAULT_BASE_DOCKER
value: {{ .Values.agentservices.defaultBaseDocker }}
- name: AWS_ACCESS_KEY_ID
value: {{ .Values.agentservices.awsAccessKeyId }}
- name: AWS_SECRET_ACCESS_KEY
value: {{ .Values.agentservices.awsSecretAccessKey }}
- name: AWS_DEFAULT_REGION
value: {{ .Values.agentservices.awsDefaultRegion }}
- name: AZURE_STORAGE_ACCOUNT
value: {{ .Values.agentservices.azureStorageAccount }}
- name: AZURE_STORAGE_KEY
value: {{ .Values.agentservices.azureStorageKey }}
- name: GOOGLE_APPLICATION_CREDENTIALS
value: {{ .Values.agentservices.googleCredentials }}
- name: CLEARML_WORKER_ID
value: {{ .Values.agentservices.clearmlWorkerId }}
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_key
- name: CLEARML_API_SECRET_KEY
valueFrom:
secretKeyRef:
name: clearml-conf
key: tests_user_secret
{{- if .Values.agentservices.extraEnvs }}
{{ toYaml .Values.agentservices.extraEnvs | nindent 10 }}
{{- end }}
args:
- agentservices
volumeMounts:
- name: agentservices-data
mountPath: /root/.clearml
resources:
{{- toYaml .Values.agentservices.resources | nindent 12 }}
{{- with .Values.agentservices.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.agentservices.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.agentservices.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View File

@@ -19,7 +19,14 @@ spec:
labels:
{{- include "clearml.selectorLabelsApiServer" . | nindent 8 }}
spec:
{{- include "clearml.imagePullSecrets" . | indent 6 }}
{{- if .Values.imageCredentials.enabled }}
imagePullSecrets:
{{- if .Values.imageCredentials.existingSecret }}
- name: {{ .Values.imageCredentials.existingSecret }}
{{- else }}
- name: clearml-agent-registry-key
{{- end }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.apiserver.image.repository }}:{{ .Values.apiserver.image.tag | default .Chart.AppVersion }}"

View File

@@ -22,7 +22,14 @@ spec:
- name: fileserver-data
persistentVolumeClaim:
claimName: {{ include "clearml.fullname" . }}-fileserver-data
{{- include "clearml.imagePullSecrets" . | indent 6 }}
{{- if .Values.imageCredentials.enabled }}
imagePullSecrets:
{{- if .Values.imageCredentials.existingSecret }}
- name: {{ .Values.imageCredentials.existingSecret }}
{{- else }}
- name: clearml-agent-registry-key
{{- end }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.fileserver.image.repository }}:{{ .Values.fileserver.image.tag | default .Chart.AppVersion }}"

View File

@@ -18,7 +18,14 @@ spec:
labels:
{{- include "clearml.selectorLabelsWebServer" . | nindent 8 }}
spec:
{{- include "clearml.imagePullSecrets" . | indent 6 }}
{{- if .Values.imageCredentials.enabled }}
imagePullSecrets:
{{- if .Values.imageCredentials.existingSecret }}
- name: {{ .Values.imageCredentials.existingSecret }}
{{- else }}
- name: clearml-agent-registry-key
{{- end }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Chart.AppVersion }}"

View File

@@ -0,0 +1,9 @@
{{- if .Values.imageCredentials.enabled -}}
apiVersion: v1
kind: Secret
metadata:
name: clearml-agent-registry-key
type: kubernetes.io/dockerconfigjson
data:
.dockerconfigjson: {{ template "imagePullSecret" . }}
{{- end }}

View File

@@ -1,17 +0,0 @@
{{- if .Values.agentservices.enabled }}
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: {{ include "clearml.fullname" . }}-agentservices-data
labels:
{{- include "clearml.labels" . | nindent 4 }}
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: {{ .Values.agentservices.storage.data.size | quote }}
{{- if .Values.agentservices.storage.data.class -}}
storageClassName: {{ .Values.agentservices.storage.data.class | quote }}
{{- end -}}
{{- end }}

View File

@@ -10,7 +10,7 @@ spec:
resources:
requests:
storage: {{ .Values.fileserver.storage.data.size | quote }}
{{- if .Values.fileserver.storage.data.class -}}
{{- if .Values.fileserver.storage.data.class }}
storageClassName: {{ .Values.fileserver.storage.data.class | quote }}
{{- end -}}

View File

@@ -1,13 +0,0 @@
{{- range $key, $value := .Values.agentGroups }}
{{- with $value }}
---
{{ if .clearmlConfig }}
apiVersion: v1
kind: Secret
metadata:
name: {{ .name }}-conf
data:
clearml.conf: {{ .clearmlConfig | b64enc }}
{{ end }}
{{- end }}
{{- end }}

View File

@@ -1,6 +1,19 @@
# global:
# imagePullSecrets:
# - docker-cfg
# -- Private image registry configuration
imageCredentials:
# -- Use private authentication mode
enabled: false
# -- If this is set, chart will not generate a secret but will use what is defined here
existingSecret: ""
# -- Registry name
registry: docker.io
# -- Registry username
username: someone
# -- Registry password
password: pwd
# -- Email
email: someone@host.com
# -- ClearMl generic configurations
clearml:
defaultCompany: "d1bd92a3b039400cbafc60a7a5b1e52b"
ingress:
@@ -67,7 +80,7 @@ apiserver:
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.4.0"
tag: "1.6.0"
extraEnvs: []
@@ -136,7 +149,7 @@ fileserver:
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.4.0"
tag: "1.6.0"
extraEnvs: []
@@ -181,7 +194,7 @@ webserver:
image:
repository: "allegroai/clearml"
pullPolicy: IfNotPresent
tag: "1.4.0"
tag: "1.6.0"
podAnnotations: {}
@@ -205,164 +218,6 @@ webserver:
additionalConfigs: {}
agentservices:
enabled: false
clearmlHostIp: null
agentVersion: ""
clearmlWebHost: null
clearmlFilesHost: null
clearmlGitUser: null
clearmlGitPassword: null
awsAccessKeyId: null
awsSecretAccessKey: null
awsDefaultRegion: null
azureStorageAccount: null
azureStorageKey: null
googleCredentials: null
clearmlWorkerId: "clearml-services"
replicaCount: 1
image:
repository: "allegroai/clearml-agent-services"
pullPolicy: IfNotPresent
tag: "latest"
extraEnvs: []
podAnnotations: {}
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
nodeSelector: {}
tolerations: []
affinity: {}
storage:
data:
class: ""
size: 50Gi
agentGroups:
agent-group-cpu:
enabled: false
name: agent-group-cpu
replicaCount: 1
updateStrategy: Recreate
nvidiaGpusPerAgent: 0
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")
clearmlGitUser: null
clearmlGitPassword: null
clearmlAccessKey: null
clearmlSecretKey: null
awsAccessKeyId: null
awsSecretAccessKey: null
awsDefaultRegion: null
azureStorageAccount: null
azureStorageKey: null
clearmlConfig: |-
sdk {
}
image:
repository: "ubuntu"
pullPolicy: IfNotPresent
tag: "18.04"
extraEnvs: []
podAnnotations: {}
nodeSelector: {}
tolerations: []
affinity: {}
agent-group-gpu:
enabled: false
name: agent-group-gpu
replicaCount: 0
updateStrategy: Recreate
nvidiaGpusPerAgent: 1
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")
clearmlGitUser: null
clearmlGitPassword: null
clearmlAccessKey: null
clearmlSecretKey: null
awsAccessKeyId: null
awsSecretAccessKey: null
awsDefaultRegion: null
azureStorageAccount: null
azureStorageKey: null
clearmlConfig: |-
sdk {
}
image:
repository: "nvidia/cuda"
pullPolicy: IfNotPresent
tag: "11.0-base-ubuntu18.04"
podAnnotations: {}
nodeSelector: {}
tolerations: []
affinity: {}
# This agent will spawn queued experiments in new pods, a good use case is to combine this with
# GPU autoscaling nodes.
# https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue
agentk8sglue:
enabled: true
image:
repository: "allegroai/clearml-agent-k8s"
tag: "base-1.21"
serviceAccountName: default
maxPods: 10
defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04 # default docker image that is spawned as new pod
queue: default
id: k8s-agent
podTemplate:
volumes: []
# - name: "yourvolume"
# path: "/yourpath"
env: []
# # to setup access to private repo, setup secret with git credentials:
# - name: CLEARML_AGENT_GIT_USER
# value: mygitusername
# - name: CLEARML_AGENT_GIT_PASS
# valueFrom:
# secretKeyRef:
# name: git-password
# key: git-password
resources: {}
# limits:
# nvidia.com/gpu: 1
tolerations: []
# - key: "nvidia.com/gpu"
# operator: Exists
# effect: "NoSchedule"
nodeSelector: {}
# fleet: gpu-nodes
externalServices:
# -- Existing ElasticSearch Hostname to use if elasticsearch.enabled is false
elasticsearchHost: ""