mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-03-03 10:42:06 +00:00
Clearml agent enterprise features (#121)
* Added: enterprise features alignment * Changed: version bump * Fixed: trailing spaces * Fixed: comment starting space * Changed: owner-token feature * Fixed: secret reference name * Changed: owner-token enterprise reference
This commit is contained in:
parent
cb98ae9a19
commit
7041c62f44
@ -2,7 +2,7 @@ apiVersion: v2
|
||||
name: clearml-agent
|
||||
description: MLOps platform
|
||||
type: application
|
||||
version: "2.0.2"
|
||||
version: "3.0.0"
|
||||
appVersion: "1.24"
|
||||
kubeVersion: ">= 1.19.0-0 < 1.26.0-0"
|
||||
home: https://clear.ml
|
||||
|
@ -1,6 +1,6 @@
|
||||
# ClearML Kubernetes Agent
|
||||
|
||||
  
|
||||
  
|
||||
|
||||
MLOps platform
|
||||
|
||||
@ -30,25 +30,35 @@ Kubernetes: `>= 1.19.0-0 < 1.26.0-0`
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| agentk8sglue | object | `{"apiServerUrlReference":"https://api.clear.ml","clearmlcheckCertificate":true,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileServerUrlReference":"https://files.clear.ml","id":"k8s-agent","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"maxPods":10,"podTemplate":{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"queue":"default","replicaCount":1,"serviceAccountName":"default","webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
|
||||
| agentk8sglue | object | `{"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"queue":"default","replicaCount":1,"serviceExistingAccountName":"","volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
|
||||
| agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url |
|
||||
| agentk8sglue.basePodTemplate | object | `{"env":[],"fileMounts":[],"hostAliases":{},"initContainers":[],"labels":{},"nodeSelector":{},"resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | base template for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.fileMounts | list | `[]` | file definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.hostAliases | object | `{}` | hostAliases setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.initContainers | list | `[]` | initContainers definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.labels | object | `{}` | labels setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.nodeSelector | object | `{}` | nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.resources | object | `{}` | resources declaration for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.schedulerName | string | `""` | schedulerName setup for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.basePodTemplate.securityContext | object | `{}` | securityContext setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.tolerations | list | `[]` | tolerations setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.volumeMounts | list | `[]` | volume mounts definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.basePodTemplate.volumes | list | `[]` | volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.clearmlcheckCertificate | bool | `true` | Check certificates validity for evefry UrlReference below. |
|
||||
| agentk8sglue.containerCustomBashScript | string | `""` | Custom Bash script for the Task Pods ran by Glue Agent |
|
||||
| agentk8sglue.customBashScript | string | `""` | Custom Bash script for the Glue Agent |
|
||||
| agentk8sglue.debugMode | bool | `false` | Enable Debugging logs for Agent pod |
|
||||
| agentk8sglue.defaultContainerImage | string | `"ubuntu:18.04"` | default container image for ClearML Task pod |
|
||||
| agentk8sglue.extraEnvs | list | `[]` | Environment variables to be exposed in the agentk8sglue pods |
|
||||
| agentk8sglue.extraEnvs | list | `[]` | Extra Environment variables for Glue Agent |
|
||||
| agentk8sglue.fileMounts | list | `[]` | file definition for Glue Agent (example in values.yaml comments) |
|
||||
| agentk8sglue.fileServerUrlReference | string | `"https://files.clear.ml"` | Reference to File server url |
|
||||
| agentk8sglue.id | string | `"k8s-agent"` | ClearML worker ID (must be unique across the entire ClearMLenvironment) |
|
||||
| agentk8sglue.image | object | `{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"}` | Glue Agent image configuration |
|
||||
| agentk8sglue.maxPods | int | `10` | maximum concurrent consume ClearML Task pod |
|
||||
| agentk8sglue.podTemplate | object | `{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumeMounts":[],"volumes":[]}` | template for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.podTemplate.env | list | `[]` | environment variables for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.podTemplate.resources | object | `{}` | resources declaration for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.podTemplate.tolerations | list | `[]` | tolerations setup for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.podTemplate.volumeMounts | list | `[]` | volumeMounts definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.podTemplate.volumes | list | `[]` | volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments) |
|
||||
| agentk8sglue.queue | string | `"default"` | ClearML queue this agent will consume |
|
||||
| agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods |
|
||||
| agentk8sglue.serviceAccountName | string | `"default"` | serviceAccountName for pods spawned to consume ClearML Task |
|
||||
| agentk8sglue.serviceExistingAccountName | string | `""` | if set, don't create a serviceAccountName but use defined existing one |
|
||||
| agentk8sglue.volumeMounts | list | `[]` | volume mounts definition for Glue Agent (example in values.yaml comments) |
|
||||
| agentk8sglue.volumes | list | `[]` | volumes definition for Glue Agent (example in values.yaml comments) |
|
||||
| agentk8sglue.webServerUrlReference | string | `"https://app.clear.ml"` | Reference to Web server url |
|
||||
| clearml | object | `{"agentk8sglueKey":"ACCESSKEY","agentk8sglueSecret":"SECRETKEY","clearmlConfig":"sdk {\n}","existingAgentk8sglueSecret":"","existingClearmlConfigSecret":""}` | ClearMl generic configurations |
|
||||
| clearml.agentk8sglueKey | string | `"ACCESSKEY"` | Agent k8s Glue basic auth key |
|
||||
@ -56,6 +66,19 @@ Kubernetes: `>= 1.19.0-0 < 1.26.0-0`
|
||||
| clearml.clearmlConfig | string | `"sdk {\n}"` | ClearML configuration file |
|
||||
| clearml.existingAgentk8sglueSecret | string | `""` | If this is set, chart will not generate a secret but will use what is defined here |
|
||||
| clearml.existingClearmlConfigSecret | string | `""` | If this is set, chart will not generate a secret but will use what is defined here |
|
||||
| enterpriseFeatures | object | `{"applyVaultEnvVars":true,"enabled":false,"maxPods":10,"monitoredResources":{"maxResources":0,"maxResourcesFieldName":"resources|limits|nvidia.com/gpu","minResourcesFieldName":"resources|limits|nvidia.com/gpu"},"queues":{"default":{"templateOverrides":{}}},"serviceAccountClusterAccess":false,"useOwnerToken":true}` | Enterprise features (work only with an Enterprise license) |
|
||||
| enterpriseFeatures.applyVaultEnvVars | bool | `true` | push env vars from Clear.ML Vault to task pods |
|
||||
| enterpriseFeatures.enabled | bool | `false` | Enable/Disable Enterprise features |
|
||||
| enterpriseFeatures.maxPods | int | `10` | maximum concurrent consume ClearML Task pod |
|
||||
| enterpriseFeatures.monitoredResources | object | `{"maxResources":0,"maxResourcesFieldName":"resources|limits|nvidia.com/gpu","minResourcesFieldName":"resources|limits|nvidia.com/gpu"}` | GPU resource general counters |
|
||||
| enterpriseFeatures.monitoredResources.maxResources | int | `0` | Maximum resources counter |
|
||||
| enterpriseFeatures.monitoredResources.maxResourcesFieldName | string | `"resources|limits|nvidia.com/gpu"` | Field name used by Agent to count maximum resources |
|
||||
| enterpriseFeatures.monitoredResources.minResourcesFieldName | string | `"resources|limits|nvidia.com/gpu"` | Field name used by Agent to count minimum resources |
|
||||
| enterpriseFeatures.queues | object | `{"default":{"templateOverrides":{}}}` | ClearML queues and related template OVERRIDES used this agent will consume |
|
||||
| enterpriseFeatures.queues.default | object | `{"templateOverrides":{}}` | name of the queue will be used for this template |
|
||||
| enterpriseFeatures.queues.default.templateOverrides | object | `{}` | overrides of the base template for this queue (must be declared even if empty!) |
|
||||
| enterpriseFeatures.serviceAccountClusterAccess | bool | `false` | service account access every namespace flag |
|
||||
| enterpriseFeatures.useOwnerToken | bool | `true` | Agent must use owner Token |
|
||||
| imageCredentials | object | `{"email":"someone@host.com","enabled":false,"existingSecret":"","password":"pwd","registry":"docker.io","username":"someone"}` | Private image registry configuration |
|
||||
| imageCredentials.email | string | `"someone@host.com"` | Email |
|
||||
| imageCredentials.enabled | bool | `false` | Use private authentication mode |
|
||||
@ -63,6 +86,15 @@ Kubernetes: `>= 1.19.0-0 < 1.26.0-0`
|
||||
| imageCredentials.password | string | `"pwd"` | Registry password |
|
||||
| imageCredentials.registry | string | `"docker.io"` | Registry name |
|
||||
| imageCredentials.username | string | `"someone"` | Registry username |
|
||||
| sessions | object | `{"dynamicSvcs":false,"externalIP":"0.0.0.0","maxServices":20,"portModeEnabled":false,"setInteractiveQueuesTag":true,"startingPort":30000,"svcAnnotations":{},"svcType":"NodePort"}` | Sessions internal service configuration |
|
||||
| sessions.dynamicSvcs | bool | `false` | Enable/Disable dynamic svc for sessions pods |
|
||||
| sessions.externalIP | string | `"0.0.0.0"` | External IP sessions clients can connect to |
|
||||
| sessions.maxServices | int | `20` | maximum number of NodePorts exposed |
|
||||
| sessions.portModeEnabled | bool | `false` | Enable/Disable sessions portmode WARNING: only one Agent deployment can have this set to true |
|
||||
| sessions.setInteractiveQueuesTag | bool | `true` | set interactive queue tags |
|
||||
| sessions.startingPort | int | `30000` | starting range of exposed NodePorts |
|
||||
| sessions.svcAnnotations | object | `{}` | specific annotations for session services |
|
||||
| sessions.svcType | string | `"NodePort"` | service type ("NodePort" or "ClusterIP" or "LoadBalancer") |
|
||||
|
||||
# Upgrading Chart
|
||||
|
||||
|
@ -2,32 +2,14 @@
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "clearml.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "clearml.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- .Release.Name | trunc 59 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "clearml.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 59 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
@ -50,29 +32,22 @@ app.kubernetes.io/name: {{ include "clearml.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Reference Name (agentk8sglue)
|
||||
*/}}
|
||||
{{- define "agentk8sglue.referenceName" -}}
|
||||
{{- include "clearml.fullname" . }}-agentk8sglue
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels (agentk8sglue)
|
||||
*/}}
|
||||
{{- define "agentk8sglue.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "clearml.name" . }}
|
||||
app.kubernetes.io/instance: {{ include "agentk8sglue.referenceName" . }}
|
||||
app.kubernetes.io/instance: {{ include "clearml.name" . }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "clearml.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "clearml.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- if .Values.agentk8sglue.serviceExistingAccountName }}
|
||||
{{- .Values.agentk8sglue.serviceExistingAccountName }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- include "clearml.name" . }}-sa
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
@ -84,3 +59,15 @@ Create secret to access docker registry
|
||||
{{- printf "{\"auths\":{\"%s\":{\"username\":\"%s\",\"password\":\"%s\",\"email\":\"%s\",\"auth\":\"%s\"}}}" .registry .username .password .email (printf "%s:%s" .username .password | b64enc) | b64enc }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
{{/*
|
||||
Create a string composed by queue names
|
||||
*/}}
|
||||
{{- define "agentk8sglue.queues" -}}
|
||||
{{- $list := list }}
|
||||
{{- range $key, $value := .Values.agentk8sglue.queues }}
|
||||
{{- $list = append $list (printf "%s" $key) }}
|
||||
{{- end }}
|
||||
{{- join " " $list }}
|
||||
{{- end }}
|
||||
|
@ -1,8 +1,178 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pod-template
|
||||
name: {{ include "clearml.name" . }}-pt
|
||||
data:
|
||||
{{- if .Values.enterpriseFeatures.enabled }}
|
||||
template.yaml: |
|
||||
{{- range $key, $value := $.Values.agentk8sglue.queues }}
|
||||
{{ $key }}:
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
{{- if $value.templateOverrides.labels }}
|
||||
labels:
|
||||
{{- toYaml $value.templateOverrides.labels | nindent 10 }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.labels }}
|
||||
labels:
|
||||
{{- toYaml $.Values.agentk8sglue.basePodTemplate.labels | nindent 10 }}
|
||||
{{- end}}
|
||||
spec:
|
||||
{{- if $.Values.imageCredentials.enabled }}
|
||||
imagePullSecrets:
|
||||
{{- if $.Values.imageCredentials.existingSecret }}
|
||||
- name: $.Values.imageCredentials.existingSecret
|
||||
{{- else }}
|
||||
- name: {{ include "clearml.name" $ }}-ark
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if $value.templateOverrides.schedulerName }}
|
||||
schedulerName: {{ $value.templateOverrides.schedulerName }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.schedulerName }}
|
||||
schedulerName: {{ $.Values.agentk8sglue.basePodTemplate.schedulerName }}
|
||||
{{- end}}
|
||||
restartPolicy: Never
|
||||
{{- if $value.templateOverrides.securityContext }}
|
||||
securityContext:
|
||||
{{- toYaml $value.templateOverrides.securityContext | nindent 10 }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.securityContext }}
|
||||
securityContext:
|
||||
{{- toYaml $.Values.agentk8sglue.basePodTemplate.securityContext | nindent 10 }}
|
||||
{{- end}}
|
||||
{{- if $value.templateOverrides.hostAliases }}
|
||||
{{- with $value.templateOverrides.hostAliases }}
|
||||
hostAliases:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.hostAliases }}
|
||||
{{- with $.Values.agentk8sglue.basePodTemplate.hostAliases }}
|
||||
hostAliases:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
{{- if $value.templateOverrides.volumes }}
|
||||
{{- toYaml $value.templateOverrides.volumes | nindent 10 }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.volumes }}
|
||||
{{- toYaml $.Values.agentk8sglue.basePodTemplate.volumes | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- if $value.templateOverrides.fileMounts }}
|
||||
- name: filemounts
|
||||
secret:
|
||||
secretName: {{ include "clearml.name" $ }}-{{ $key }}-fm
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.fileMounts }}
|
||||
- name: filemounts
|
||||
secret:
|
||||
secretName: {{ include "clearml.name" $ }}-fm
|
||||
{{- end }}
|
||||
{{- if $value.templateOverrides.initContainers }}
|
||||
initContainers:
|
||||
{{- toYaml $value.templateOverrides.initContainers | nindent 10 }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.initContainers }}
|
||||
initContainers:
|
||||
{{- toYaml $.Values.agentk8sglue.basePodTemplate.initContainers | nindent 10 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- resources:
|
||||
{{- if $value.templateOverrides.resources }}
|
||||
{{- toYaml $value.templateOverrides.resources | nindent 12 }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.resources }}
|
||||
{{- toYaml $.Values.agentk8sglue.basePodTemplate.resources | nindent 12 }}
|
||||
{{- end}}
|
||||
ports:
|
||||
- containerPort: 10022
|
||||
volumeMounts:
|
||||
{{- if $value.templateOverrides.volumeMounts }}
|
||||
{{- toYaml $value.templateOverrides.volumeMounts | nindent 12 }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.volumeMounts }}
|
||||
{{- toYaml $.Values.agentk8sglue.basePodTemplate.volumeMounts | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if $value.templateOverrides.fileMounts }}
|
||||
{{- range $value.templateOverrides.fileMounts }}
|
||||
- name: filemounts
|
||||
mountPath: "{{ .folderPath }}/{{ .name }}"
|
||||
subPath: "{{ .name }}"
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.fileMounts }}
|
||||
{{- range $.Values.agentk8sglue.basePodTemplate.fileMounts }}
|
||||
- name: filemounts
|
||||
mountPath: "{{ .folderPath }}/{{ .name }}"
|
||||
subPath: "{{ .name }}"
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
env:
|
||||
- name: CLEARML_API_HOST
|
||||
value: {{ $.Values.agentk8sglue.apiServerUrlReference }}
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: {{ $.Values.agentk8sglue.webServerUrlReference }}
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: {{ $.Values.agentk8sglue.fileServerUrlReference }}
|
||||
{{- if not $.Values.agentk8sglue.useOwnerToken }}
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
{{- if .Values.clearml.existingAgentk8sglueSecret }}
|
||||
name: {{ .Values.clearml.existingAgentk8sglueSecret }}
|
||||
{{- else }}
|
||||
name: {{ include "clearml.name" . }}-ac
|
||||
{{- end }}
|
||||
key: agentk8sglue_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
{{- if .Values.clearml.existingAgentk8sglueSecret }}
|
||||
name: {{ .Values.clearml.existingAgentk8sglueSecret }}
|
||||
{{- else }}
|
||||
name: {{ include "clearml.name" . }}-ac
|
||||
{{- end }}
|
||||
key: agentk8sglue_secret
|
||||
{{- end }}
|
||||
- name: PYTHONUNBUFFERED
|
||||
value: "x"
|
||||
{{- if not $.Values.agentk8sglue.clearmlcheckCertificate }}
|
||||
- name: CLEARML_API_HOST_VERIFY_CERT
|
||||
value: "false"
|
||||
{{- end }}
|
||||
{{- if $value.templateOverrides.env }}
|
||||
{{- toYaml $value.templateOverrides.env | nindent 12 }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.env }}
|
||||
{{- toYaml $.Values.agentk8sglue.basePodTemplate.env | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if $value.templateOverrides.nodeSelector }}
|
||||
{{- with $value.templateOverrides.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.nodeSelector }}
|
||||
{{- with $.Values.agentk8sglue.basePodTemplate.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if $value.templateOverrides.tolerations }}
|
||||
{{- with $value.templateOverrides.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.tolerations }}
|
||||
{{- with $.Values.agentk8sglue.basePodTemplate.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
secrets.yaml: |
|
||||
{{- range $key, $value := $.Values.agentk8sglue.queues }}
|
||||
{{ $key }}:
|
||||
{{- if $value.templateOverrides.fileMounts }}
|
||||
- {{ include "clearml.name" $ }}-{{ $key }}-fm
|
||||
{{- else if $.Values.agentk8sglue.basePodTemplate.fileMounts }}
|
||||
- {{ include "clearml.name" $ }}-fm
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- else }}
|
||||
template.yaml: |
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
@ -13,20 +183,19 @@ data:
|
||||
{{- if .Values.imageCredentials.existingSecret }}
|
||||
- name: {{.Values.imageCredentials.existingSecret}}
|
||||
{{- else }}
|
||||
- name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-registry-key
|
||||
- name: name: {{ include "clearml.name" $ }}-ark
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }}
|
||||
{{- with .Values.agentk8sglue.podTemplate.volumes }}
|
||||
{{- with .Values.agentk8sglue.basePodTemplate.volumes }}
|
||||
volumes:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- resources:
|
||||
{{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }}
|
||||
{{- toYaml .Values.agentk8sglue.basePodTemplate.resources | nindent 10 }}
|
||||
ports:
|
||||
- containerPort: 10022
|
||||
{{- with .Values.agentk8sglue.podTemplate.volumeMounts }}
|
||||
{{- with .Values.agentk8sglue.basePodTemplate.volumeMounts }}
|
||||
volumeMounts:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
@ -43,7 +212,7 @@ data:
|
||||
{{- if .Values.clearml.existingAgentk8sglueSecret }}
|
||||
name: {{ .Values.clearml.existingAgentk8sglueSecret }}
|
||||
{{- else }}
|
||||
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-k8sglue
|
||||
name: {{ include "clearml.name" . }}-ac
|
||||
{{- end }}
|
||||
key: agentk8sglue_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
@ -52,17 +221,48 @@ data:
|
||||
{{- if .Values.clearml.existingAgentk8sglueSecret }}
|
||||
name: {{ .Values.clearml.existingAgentk8sglueSecret }}
|
||||
{{- else }}
|
||||
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-k8sglue
|
||||
name: {{ include "clearml.name" . }}-ac
|
||||
{{- end }}
|
||||
key: agentk8sglue_secret
|
||||
{{- if .Values.agentk8sglue.podTemplate.env }}
|
||||
{{ toYaml .Values.agentk8sglue.podTemplate.env | nindent 8 }}
|
||||
{{- if .Values.agentk8sglue.basePodTemplate.env }}
|
||||
{{ toYaml .Values.agentk8sglue.basePodTemplate.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.podTemplate.nodeSelector}}
|
||||
{{- with .Values.agentk8sglue.basePodTemplate.nodeSelector}}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.podTemplate.tolerations }}
|
||||
{{- with .Values.agentk8sglue.basePodTemplate.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.sessions.portModeEnabled }}
|
||||
{{- range untilStep 1 ( ( add .Values.sessions.maxServices 1 ) | int ) 1 }}
|
||||
services-{{ . }}.yaml: |
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: clearml-session-{{ . }}
|
||||
labels:
|
||||
{{- include "clearml.labels" $ | nindent 8 }}
|
||||
{{- with $.Values.sessions.svcAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
type: {{ $.Values.sessions.svcType }}
|
||||
ports:
|
||||
- targetPort: 10022
|
||||
{{- if eq $.Values.sessions.svcType "NodePort" }}
|
||||
port: 10022
|
||||
{{- else }}
|
||||
port: {{ add $.Values.sessions.startingPort . }}
|
||||
{{- end }}
|
||||
protocol: TCP
|
||||
{{- if eq $.Values.sessions.svcType "NodePort" }}
|
||||
nodePort: {{ add $.Values.sessions.startingPort . }}
|
||||
{{- end }}
|
||||
selector:
|
||||
ai-allegro-agent-serial: pod-{{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
@ -1,7 +1,7 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "agentk8sglue.referenceName" . }}
|
||||
name: {{ include "clearml.name" . }}
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
spec:
|
||||
@ -19,25 +19,60 @@ spec:
|
||||
{{- if .Values.imageCredentials.enabled }}
|
||||
imagePullSecrets:
|
||||
{{- if .Values.imageCredentials.existingSecret }}
|
||||
- name: "{{.Values.imageCredentials.existingSecret}}"
|
||||
- name: .Values.imageCredentials.existingSecret
|
||||
{{- else }}
|
||||
- name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-registry-key
|
||||
- name: {{ include "clearml.name" . }}-ark
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "clearml.serviceAccountName" . }}
|
||||
initContainers:
|
||||
- name: init-k8s-glue
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- >
|
||||
set -x;
|
||||
while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.apiServerUrlReference}}/debug.ping" -o /dev/null) -ne 200 ] ; do
|
||||
echo "waiting for apiserver" ;
|
||||
sleep 5 ;
|
||||
done;
|
||||
while [[ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.fileServerUrlReference}}/" -o /dev/null) =~ 403|405 ]] ; do
|
||||
echo "waiting for fileserver" ;
|
||||
sleep 5 ;
|
||||
done;
|
||||
while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.webServerUrlReference}}/" -o /dev/null) -ne 200 ] ; do
|
||||
echo "waiting for webserver" ;
|
||||
sleep 5 ;
|
||||
done
|
||||
containers:
|
||||
- name: k8s-glue
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["/bin/bash", "-c", "export PATH=$PATH:$HOME/bin; source /root/.bashrc && /root/entrypoint.sh"]
|
||||
command:
|
||||
- /bin/bash
|
||||
- -c
|
||||
- >
|
||||
export PATH=$PATH:$HOME/bin;
|
||||
source /root/.bashrc && /root/entrypoint.sh
|
||||
volumeMounts:
|
||||
- name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pod-template
|
||||
- name: {{ include "clearml.name" . }}-pt
|
||||
mountPath: /root/template
|
||||
{{- if or .Values.clearml.clearmlConfig .Values.clearml.existingClearmlConfigSecret }}
|
||||
{{ if .Values.clearml.clearmlConfig }}
|
||||
- name: k8sagent-clearml-conf-volume
|
||||
mountPath: /root/clearml.conf
|
||||
subPath: clearml.conf
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
{{- if .Values.agentk8sglue.volumeMounts }}
|
||||
{{- toYaml .Values.agentk8sglue.volumeMounts | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- range .Values.agentk8sglue.fileMounts }}
|
||||
- name: filemounts
|
||||
mountPath: "{{ .folderPath }}/{{ .name }}"
|
||||
subPath: "{{ .name }}"
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
env:
|
||||
- name: CLEARML_API_HOST
|
||||
value: "{{.Values.agentk8sglue.apiServerUrlReference}}"
|
||||
@ -45,56 +80,104 @@ spec:
|
||||
value: "{{.Values.agentk8sglue.webServerUrlReference}}"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "{{.Values.agentk8sglue.fileServerUrlReference}}"
|
||||
- name: K8S_GLUE_MAX_PODS
|
||||
value: "{{.Values.agentk8sglue.maxPods}}"
|
||||
- name: K8S_GLUE_QUEUE
|
||||
value: "{{.Values.agentk8sglue.queue}}"
|
||||
{{- if not .Values.agentk8sglue.clearmlcheckCertificate }}
|
||||
- name: CLEARML_API_HOST_VERIFY_CERT
|
||||
value: "false"
|
||||
{{- end }}
|
||||
{{- if .Values.sessions.portModeEnabled }}
|
||||
- name: K8S_GLUE_EXTRA_ARGS
|
||||
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml"
|
||||
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml \
|
||||
--ports-mode --num-of-services {{ .Values.sessions.maxServices }} \
|
||||
--base-port {{ .Values.sessions.startingPort }} \
|
||||
--gateway-address {{ .Values.sessions.externalIP }}{{ if .Values.enterpriseFeatures.enabled }}{{ if .Values.enterpriseFeatures.useOwnerToken }} --use-owner-token{{ end }}{{ end }}"
|
||||
{{- if .Values.sessions.dynamicSvcs }}
|
||||
- name: CLEARML_K8S_GLUE_POD_POST_APPLY_CMD
|
||||
value: "kubectl -n {namespace} apply -f ~/template/services-{pod_number}.yaml ; kubectl -n {namespace} label svc clearml-session-{pod_number} service-for={pod_name}"
|
||||
- name: CLEARML_K8S_GLUE_POD_POST_DELETE_CMD
|
||||
value: "kubectl -n {namespace} delete svc -l service-for={pod_name}"
|
||||
{{- end }}
|
||||
{{- else}}
|
||||
- name: K8S_GLUE_EXTRA_ARGS
|
||||
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml \
|
||||
--max-pods {{.Values.enterpriseFeatures.maxPods}}{{ if .Values.enterpriseFeatures.enabled }}{{ if .Values.enterpriseFeatures.useOwnerToken }} --use-owner-token{{ end }}{{ end }}"
|
||||
{{- end }}
|
||||
- name: CLEARML_K8S_GLUE_LIMIT_POD_LABEL
|
||||
value: "ai-allegro-agent-serial=pod-{pod_number}"
|
||||
- name: CLEARML_K8S_SECRETS_LIST_FILE
|
||||
value: /root/template/secrets.yaml
|
||||
- name: K8S_DEFAULT_NAMESPACE
|
||||
value: "{{ .Release.Namespace }}"
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
{{- if .Values.clearml.existingAgentk8sglueSecret }}
|
||||
name: {{ .Values.clearml.existingAgentk8sglueSecret }}
|
||||
{{- else }}
|
||||
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-k8sglue
|
||||
{{- end }}
|
||||
name: {{ include "clearml.name" . }}-ac
|
||||
key: agentk8sglue_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
{{- if .Values.clearml.existingAgentk8sglueSecret }}
|
||||
name: {{ .Values.clearml.existingAgentk8sglueSecret }}
|
||||
{{- else }}
|
||||
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-k8sglue
|
||||
{{- end }}
|
||||
name: {{ include "clearml.name" . }}-ac
|
||||
key: agentk8sglue_secret
|
||||
- name: CLEARML_WORKER_ID
|
||||
value: "{{.Values.agentk8sglue.id}}"
|
||||
value: {{ include "clearml.name" . }}
|
||||
- name: CLEARML_AGENT_UPDATE_REPO
|
||||
value: ""
|
||||
- name: FORCE_CLEARML_AGENT_REPO
|
||||
value: ""
|
||||
value: ""
|
||||
- name: CLEARML_DOCKER_IMAGE
|
||||
value: "{{.Values.agentk8sglue.defaultContainerImage}}"
|
||||
{{ if .Values.agentk8sglue.customBashScript }}
|
||||
- name: CLEARML_K8S_GLUE_EXTRA_BASH_SCRIPT
|
||||
value: "{{.Values.agentk8sglue.customBashScript}}"
|
||||
{{- end }}
|
||||
{{ if .Values.agentk8sglue.containerCustomBashScript }}
|
||||
- name: CLEARML_K8S_GLUE_POD_BASH_SCRIPT
|
||||
value: "{{.Values.agentk8sglue.containerCustomBashScript}}"
|
||||
{{- end }}
|
||||
{{- if .Values.agentk8sglue.debugMode }}
|
||||
- name: "CLEARML_K8S_GLUE_DEBUG"
|
||||
value: "1"
|
||||
{{- end }}
|
||||
{{- if .Values.agentk8sglue.extraEnvs }}
|
||||
{{ toYaml .Values.agentk8sglue.extraEnvs | nindent 10 }}
|
||||
{{- end }}
|
||||
{{- if .Values.sessions.portModeEnabled }}
|
||||
{{- if .Values.sessions.setInteractiveQueuesTag }}
|
||||
- name: "CLEARML_K8S_GLUE_SET_QUEUE_SYSTEM_TAGS"
|
||||
value: "interactive"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.enterpriseFeatures.enabled }}
|
||||
- name: K8S_GLUE_QUEUE
|
||||
value: {{ include "agentk8sglue.queues" . | quote }}
|
||||
- name: CLEARML_K8S_GLUE_APPLY_VAULT_ENV_VARS
|
||||
value: {{ .Values.enterpriseFeatures.applyVaultEnvVars | quote }}
|
||||
- name: "CLEARML_K8S_GLUE_POD_MIN_RES_FIELD"
|
||||
value: {{.Values.enterpriseFeatures.monitoredResources.minResourcesFieldName}}
|
||||
- name: "CLEARML_K8S_GLUE_MAX_RESOURCES"
|
||||
value: "{{.Values.agentk8sglue.monitoredResources.maxResources}}"
|
||||
- name: "CLEARML_K8S_GLUE_POD_MAX_RES_FIELD"
|
||||
value: {{.Values.enterpriseFeatures.monitoredResources.maxResourcesFieldName}}
|
||||
{{- else }}
|
||||
- name: K8S_GLUE_QUEUE
|
||||
value: {{ .Values.agentk8sglue.queue }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pod-template
|
||||
configMap:
|
||||
name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pod-template
|
||||
{{- if or .Values.clearml.clearmlConfig .Values.clearml.existingClearmlConfigSecret }}
|
||||
- name: {{ include "clearml.name" . }}-pt
|
||||
configMap:
|
||||
name: {{ include "clearml.name" . }}-pt
|
||||
{{ if .Values.clearml.clearmlConfig }}
|
||||
- name: k8sagent-clearml-conf-volume
|
||||
secret:
|
||||
{{- if .Values.clearml.existingClearmlConfigSecret }}
|
||||
secretName: {{ .Values.clearml.existingClearmlConfigSecret }}
|
||||
{{- else }}
|
||||
secretName: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-conf
|
||||
{{- end }}
|
||||
secretName: {{ include "clearml.name" . }}-ac
|
||||
items:
|
||||
- key: clearml.conf
|
||||
path: clearml.conf
|
||||
{{ end }}
|
||||
{{- if .Values.agentk8sglue.volumes }}
|
||||
{{- toYaml .Values.agentk8sglue.volumes | nindent 8 }}
|
||||
{{- end }}
|
||||
{{ if .Values.agentk8sglue.fileMounts }}
|
||||
- name: filemounts
|
||||
secret:
|
||||
secretName: {{ include "clearml.name" . }}-afm
|
||||
{{- end }}
|
||||
|
@ -1,23 +1,72 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
{{- if not .Values.agentk8sglue.serviceExistingAccountName }}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pods-access
|
||||
name: {{ include "clearml.serviceAccountName" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
{{- end }}
|
||||
{{- if .Values.enterpriseFeatures.serviceAccountClusterAccess }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: {{ include "clearml.name" . }}-kpa
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
- secrets
|
||||
- services
|
||||
verbs: ["get", "list", "watch", "create", "patch", "delete"]
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- namespaces
|
||||
verbs: ["list"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: {{ include "clearml.name" . }}-kpa
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ include "clearml.serviceAccountName" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: {{ include "clearml.name" . }}-kpa
|
||||
{{- else }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: {{ include "clearml.name" . }}-kpa
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
- secrets
|
||||
- services
|
||||
verbs: ["get", "list", "watch", "create", "patch", "delete"]
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- namespaces
|
||||
verbs: ["list"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pods-access
|
||||
name: {{ include "clearml.name" . }}-kpa
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: default
|
||||
name: {{ include "clearml.serviceAccountName" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: {{ include "agentk8sglue.referenceName" . }}-k8sagent-pods-access
|
||||
name: {{ include "clearml.name" . }}-kpa
|
||||
{{- end }}
|
||||
|
@ -1,28 +1,18 @@
|
||||
{{- if not .Values.clearml.existingAgentk8sglueSecret }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-k8sglue
|
||||
name: {{ include "clearml.name" . }}-ac
|
||||
data:
|
||||
agentk8sglue_key: {{ .Values.clearml.agentk8sglueKey | b64enc }}
|
||||
agentk8sglue_secret: {{ .Values.clearml.agentk8sglueSecret | b64enc }}
|
||||
{{- end }}
|
||||
---
|
||||
{{- if not .Values.clearml.existingClearmlConfigSecret }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-conf
|
||||
data:
|
||||
clearml.conf: {{ .Values.clearml.clearmlConfig | b64enc }}
|
||||
---
|
||||
{{- end }}
|
||||
{{- if .Values.imageCredentials.enabled }}
|
||||
{{- if not .Values.imageCredentials.existingSecret }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "agentk8sglue.referenceName" . }}-clearml-agent-registry-key
|
||||
name: {{ include "clearml.name" . }}-ark
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
.dockerconfigjson: {{ template "imagePullSecret" . }}
|
||||
|
37
charts/clearml-agent/templates/service-secret.yaml
Normal file
37
charts/clearml-agent/templates/service-secret.yaml
Normal file
@ -0,0 +1,37 @@
|
||||
{{ if .Values.agentk8sglue.fileMounts }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "clearml.name" . }}-afm
|
||||
data:
|
||||
{{- range .Values.agentk8sglue.fileMounts }}
|
||||
{{ .name }}: {{ .fileContent | b64enc }}
|
||||
{{- end }}
|
||||
{{ end }}
|
||||
---
|
||||
{{- if .Values.enterpriseFeatures.enabled }}
|
||||
{{ if .Values.agentk8sglue.basePodTemplate.fileMounts }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "clearml.name" . }}-fm
|
||||
data:
|
||||
{{- range .Values.agentk8sglue.basePodTemplate.fileMounts }}
|
||||
{{ .name }}: {{ .fileContent | b64enc }}
|
||||
{{- end }}
|
||||
{{ end }}
|
||||
---
|
||||
{{- range $key, $value := $.Values.agentk8sglue.queues }}
|
||||
{{ if .templateOverrides.fileMounts }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "clearml.name" $ }}-{{ $key }}-fm
|
||||
data:
|
||||
{{- range .templateOverrides.fileMounts }}
|
||||
{{ .name }}: {{ .fileContent | b64enc }}
|
||||
{{- end }}
|
||||
{{ end }}
|
||||
---
|
||||
{{- end }}
|
||||
{{- end }}
|
32
charts/clearml-agent/templates/service-sessions.yaml
Normal file
32
charts/clearml-agent/templates/service-sessions.yaml
Normal file
@ -0,0 +1,32 @@
|
||||
{{- if .Values.sessions.portModeEnabled }}
|
||||
{{- if not .Values.sessions.dynamicSvcs }}
|
||||
{{- range untilStep 1 ( ( add .Values.sessions.maxServices 1 ) | int ) 1 }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: clearml-session-{{ . }}
|
||||
labels:
|
||||
{{- include "clearml.labels" $ | nindent 4 }}
|
||||
{{- with $.Values.sessions.svcAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
type: {{ $.Values.sessions.svcType }}
|
||||
ports:
|
||||
- targetPort: 10022
|
||||
{{- if eq $.Values.sessions.svcType "NodePort" }}
|
||||
port: 10022
|
||||
{{- else }}
|
||||
port: {{ add $.Values.sessions.startingPort . }}
|
||||
{{- end }}
|
||||
protocol: TCP
|
||||
{{- if eq $.Values.sessions.svcType "NodePort" }}
|
||||
nodePort: {{ add $.Values.sessions.startingPort . }}
|
||||
{{- end }}
|
||||
selector:
|
||||
ai.allegro.agent.serial: pod-{{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
@ -41,9 +41,15 @@ agentk8sglue:
|
||||
# -- Glue Agent number of pods
|
||||
replicaCount: 1
|
||||
|
||||
# -- if set, don't create a serviceAccountName but use defined existing one
|
||||
serviceExistingAccountName: ""
|
||||
|
||||
# -- Check certificates validity for evefry UrlReference below.
|
||||
clearmlcheckCertificate: true
|
||||
|
||||
# -- Enable Debugging logs for Agent pod
|
||||
debugMode: false
|
||||
|
||||
# -- Reference to Api server url
|
||||
apiServerUrlReference: "https://api.clear.ml"
|
||||
# -- Reference to File server url
|
||||
@ -51,32 +57,78 @@ agentk8sglue:
|
||||
# -- Reference to Web server url
|
||||
webServerUrlReference: "https://app.clear.ml"
|
||||
|
||||
# -- serviceAccountName for pods spawned to consume ClearML Task
|
||||
serviceAccountName: default
|
||||
# -- maximum concurrent consume ClearML Task pod
|
||||
maxPods: 10
|
||||
# -- default container image for ClearML Task pod
|
||||
defaultContainerImage: ubuntu:18.04
|
||||
# -- ClearML queue this agent will consume
|
||||
queue: default
|
||||
|
||||
# -- ClearML worker ID (must be unique across the entire ClearMLenvironment)
|
||||
id: k8s-agent
|
||||
|
||||
# -- Environment variables to be exposed in the agentk8sglue pods
|
||||
# -- Custom Bash script for the Glue Agent
|
||||
customBashScript: ""
|
||||
# -- Custom Bash script for the Task Pods ran by Glue Agent
|
||||
containerCustomBashScript: ""
|
||||
# -- Extra Environment variables for Glue Agent
|
||||
extraEnvs: []
|
||||
# - name: PYTHONPATH
|
||||
# value: "somepath"
|
||||
|
||||
# -- template for pods spawned to consume ClearML Task
|
||||
podTemplate:
|
||||
# -- volumes definition for Glue Agent (example in values.yaml comments)
|
||||
volumes: []
|
||||
# - name: "yourvolume"
|
||||
# nfs:
|
||||
# server: 192.168.0.1
|
||||
# path: /var/nfs/mount
|
||||
# -- volume mounts definition for Glue Agent (example in values.yaml comments)
|
||||
volumeMounts: []
|
||||
# - name: yourvolume
|
||||
# mountPath: /yourpath
|
||||
# subPath: userfolder
|
||||
|
||||
# -- file definition for Glue Agent (example in values.yaml comments)
|
||||
fileMounts: []
|
||||
# - name: "integration.py"
|
||||
# folderPath: "/mnt/python"
|
||||
# fileContent: |-
|
||||
# def get_template(*args, **kwargs):
|
||||
# print("args: {}".format(args))
|
||||
# print("kwargs: {}".format(kwargs))
|
||||
# return {
|
||||
# "template": {
|
||||
# }
|
||||
# }
|
||||
|
||||
# -- base template for pods spawned to consume ClearML Task
|
||||
basePodTemplate:
|
||||
# -- initContainers definition for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
initContainers: []
|
||||
# - name: volume-dirs-init-cntr
|
||||
# image: busybox:1.35
|
||||
# command:
|
||||
# - /bin/bash
|
||||
# - -c
|
||||
# - >
|
||||
# /bin/echo "this is an init";
|
||||
# -- labels setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
labels: {}
|
||||
# schedulerName: scheduler
|
||||
# -- schedulerName setup for pods spawned to consume ClearML Task
|
||||
schedulerName: ""
|
||||
# -- volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
volumes: []
|
||||
# - name: "yourvolume"
|
||||
# persistentVolumeClaim:
|
||||
# claimName: "yourvolume"
|
||||
# -- volumeMounts definition for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
# nfs:
|
||||
# server: 192.168.0.1
|
||||
# path: /var/nfs/mount
|
||||
# -- volume mounts definition for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
volumeMounts: []
|
||||
# - name: "yourvolume"
|
||||
# mountPath: "/yourpath"
|
||||
# - name: yourvolume
|
||||
# mountPath: /yourpath
|
||||
# subPath: userfolder
|
||||
# -- file definition for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
fileMounts: []
|
||||
# - name: "mounted-file.txt"
|
||||
# folderPath: "/mnt/"
|
||||
# fileContent: |-
|
||||
# this is a test file
|
||||
# with test content
|
||||
# -- environment variables for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
env: []
|
||||
# # to setup access to private repo, setup secret with git credentials:
|
||||
@ -87,6 +139,10 @@ agentk8sglue:
|
||||
# secretKeyRef:
|
||||
# name: git-password
|
||||
# key: git-password
|
||||
# - name: CURL_CA_BUNDLE
|
||||
# value: ""
|
||||
# - name: PYTHONWARNINGS
|
||||
# value: "=\"ignore:Unverified HTTPS request\""
|
||||
# -- resources declaration for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
resources: {}
|
||||
# limits:
|
||||
@ -99,3 +155,66 @@ agentk8sglue:
|
||||
# -- nodeSelector setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
nodeSelector: {}
|
||||
# fleet: gpu-nodes
|
||||
# -- securityContext setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
securityContext: {}
|
||||
# runAsUser: 1000
|
||||
# -- hostAliases setup for pods spawned to consume ClearML Task (example in values.yaml comments)
|
||||
hostAliases: {}
|
||||
# - ip: "127.0.0.1"
|
||||
# hostnames:
|
||||
# - "foo.local"
|
||||
# - "bar.local"
|
||||
|
||||
# -- Sessions internal service configuration
|
||||
sessions:
|
||||
# -- Enable/Disable sessions portmode WARNING: only one Agent deployment can have this set to true
|
||||
portModeEnabled: false
|
||||
# -- Enable/Disable dynamic svc for sessions pods
|
||||
dynamicSvcs: false
|
||||
# -- specific annotations for session services
|
||||
svcAnnotations: {}
|
||||
# -- service type ("NodePort" or "ClusterIP" or "LoadBalancer")
|
||||
svcType: "NodePort"
|
||||
# -- External IP sessions clients can connect to
|
||||
externalIP: 0.0.0.0
|
||||
# -- starting range of exposed NodePorts
|
||||
startingPort: 30000
|
||||
# -- maximum number of NodePorts exposed
|
||||
maxServices: 20
|
||||
# -- set interactive queue tags
|
||||
setInteractiveQueuesTag: true
|
||||
|
||||
# -- Enterprise features (work only with an Enterprise license)
|
||||
enterpriseFeatures:
|
||||
# -- Enable/Disable Enterprise features
|
||||
enabled: false
|
||||
# -- service account access every namespace flag
|
||||
serviceAccountClusterAccess: false
|
||||
# -- push env vars from Clear.ML Vault to task pods
|
||||
applyVaultEnvVars: true
|
||||
# -- GPU resource general counters
|
||||
monitoredResources:
|
||||
# -- Field name used by Agent to count minimum resources
|
||||
minResourcesFieldName: "resources|limits|nvidia.com/gpu"
|
||||
# -- Maximum resources counter
|
||||
maxResources: 0
|
||||
# -- Field name used by Agent to count maximum resources
|
||||
maxResourcesFieldName: "resources|limits|nvidia.com/gpu"
|
||||
# -- maximum concurrent consume ClearML Task pod
|
||||
maxPods: 10
|
||||
# -- Agent must use owner Token
|
||||
useOwnerToken: true
|
||||
# -- ClearML queues and related template OVERRIDES used this agent will consume
|
||||
queues:
|
||||
# -- name of the queue will be used for this template
|
||||
default:
|
||||
# -- overrides of the base template for this queue (must be declared even if empty!)
|
||||
templateOverrides: {}
|
||||
## -- name of the queue will be used for this template
|
||||
# default-gpu:
|
||||
# # -- overrides of the base template for this queue
|
||||
# templateOverrides:
|
||||
# # -- resources declaration for pods spawned to consume ClearML Task
|
||||
# resources:
|
||||
# limits:
|
||||
# nvidia.com/gpu: 1
|
||||
|
Loading…
Reference in New Issue
Block a user