mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-04-17 01:31:13 +00:00
Improvements k8sagent (#54)
This commit is contained in:
parent
018348bc1d
commit
fa3739b643
@ -2,7 +2,7 @@ apiVersion: v2
|
|||||||
name: clearml
|
name: clearml
|
||||||
description: MLOps platform
|
description: MLOps platform
|
||||||
type: application
|
type: application
|
||||||
version: "3.5.1"
|
version: "3.6.0"
|
||||||
appVersion: "1.2.0"
|
appVersion: "1.2.0"
|
||||||
home: https://clear.ml
|
home: https://clear.ml
|
||||||
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
|
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# ClearML Ecosystem for Kubernetes
|
# ClearML Ecosystem for Kubernetes
|
||||||
|
|
||||||
  
|
  
|
||||||
|
|
||||||
MLOps platform
|
MLOps platform
|
||||||
|
|
||||||
@ -163,16 +163,19 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
|||||||
| agentGroups.agent-group-gpu.replicaCount | int | `0` | |
|
| agentGroups.agent-group-gpu.replicaCount | int | `0` | |
|
||||||
| agentGroups.agent-group-gpu.tolerations | list | `[]` | |
|
| agentGroups.agent-group-gpu.tolerations | list | `[]` | |
|
||||||
| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | |
|
| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | |
|
||||||
| agentk8sglue.defaultDockerImage | string | `"nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04"` | |
|
| agentk8sglue.defaultDockerImage | string | `"nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04"` | |
|
||||||
| agentk8sglue.enabled | bool | `false` | |
|
| agentk8sglue.enabled | bool | `false` | |
|
||||||
| agentk8sglue.id | string | `"k8s-agent"` | |
|
| agentk8sglue.id | string | `"k8s-agent"` | |
|
||||||
| agentk8sglue.image.repository | string | `"allegroai/clearml-agent-k8s"` | |
|
| agentk8sglue.image.repository | string | `"allegroai/clearml-agent-k8s"` | |
|
||||||
| agentk8sglue.image.tag | string | `"aws-latest-1.21"` | |
|
| agentk8sglue.image.tag | string | `"aws-latest-1.21"` | |
|
||||||
| agentk8sglue.maxPods | int | `10` | |
|
| agentk8sglue.maxPods | int | `10` | |
|
||||||
|
| agentk8sglue.podTemplate.env | list | `[]` | |
|
||||||
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | |
|
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | |
|
||||||
| agentk8sglue.podTemplate.resources | object | `{}` | |
|
| agentk8sglue.podTemplate.resources | object | `{}` | |
|
||||||
| agentk8sglue.podTemplate.tolerations | object | `{}` | |
|
| agentk8sglue.podTemplate.tolerations | list | `[]` | |
|
||||||
|
| agentk8sglue.podTemplate.volumes | list | `[]` | |
|
||||||
| agentk8sglue.queue | string | `"aws-instances"` | |
|
| agentk8sglue.queue | string | `"aws-instances"` | |
|
||||||
|
| agentk8sglue.serviceAccountName | string | `"default"` | |
|
||||||
| agentservices.affinity | object | `{}` | |
|
| agentservices.affinity | object | `{}` | |
|
||||||
| agentservices.agentVersion | string | `""` | |
|
| agentservices.agentVersion | string | `""` | |
|
||||||
| agentservices.awsAccessKeyId | string | `nil` | |
|
| agentservices.awsAccessKeyId | string | `nil` | |
|
||||||
|
@ -9,9 +9,23 @@ data:
|
|||||||
metadata:
|
metadata:
|
||||||
namespace: {{ .Release.namespace }}
|
namespace: {{ .Release.namespace }}
|
||||||
spec:
|
spec:
|
||||||
|
serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }}
|
||||||
|
volumes:
|
||||||
|
{{- range .Values.agentk8sglue.podTemplate.volumes }}
|
||||||
|
- name: {{ .name }}
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ .name }}
|
||||||
|
{{- end }}
|
||||||
containers:
|
containers:
|
||||||
- resources:
|
- resources:
|
||||||
{{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }}
|
{{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }}
|
||||||
|
ports:
|
||||||
|
- containerPort: 10022
|
||||||
|
volumeMounts:
|
||||||
|
{{- range .Values.agentk8sglue.podTemplate.volumes }}
|
||||||
|
- mountPath: {{ .path }}
|
||||||
|
name: {{ .name }}
|
||||||
|
{{- end }}
|
||||||
env:
|
env:
|
||||||
- name: CLEARML_API_HOST
|
- name: CLEARML_API_HOST
|
||||||
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
|
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
|
||||||
@ -29,6 +43,7 @@ data:
|
|||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: clearml-conf
|
name: clearml-conf
|
||||||
key: apiserver_secret
|
key: apiserver_secret
|
||||||
|
{{- toYaml .Values.agentk8sglue.podTemplate.env | nindent 8 }}
|
||||||
tolerations:
|
tolerations:
|
||||||
{{- toYaml .Values.agentk8sglue.podTemplate.tolerations | nindent 8 }}
|
{{- toYaml .Values.agentk8sglue.podTemplate.tolerations | nindent 8 }}
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
|
@ -8,7 +8,7 @@ rules:
|
|||||||
- ""
|
- ""
|
||||||
resources:
|
resources:
|
||||||
- pods
|
- pods
|
||||||
verbs: ["get", "list", "watch", "create", "patch"]
|
verbs: ["get", "list", "watch", "create", "patch", "delete"]
|
||||||
---
|
---
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: RoleBinding
|
kind: RoleBinding
|
||||||
|
@ -302,15 +302,28 @@ agentk8sglue:
|
|||||||
image:
|
image:
|
||||||
repository: "allegroai/clearml-agent-k8s"
|
repository: "allegroai/clearml-agent-k8s"
|
||||||
tag: "aws-latest-1.21"
|
tag: "aws-latest-1.21"
|
||||||
|
serviceAccountName: default
|
||||||
maxPods: 10
|
maxPods: 10
|
||||||
defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04 # default docker image that is spawned as new pod
|
defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04 # default docker image that is spawned as new pod
|
||||||
queue: aws-instances # create this queue manually in the UI first for it to work
|
queue: aws-instances # create this queue manually in the UI first for it to work
|
||||||
id: k8s-agent
|
id: k8s-agent
|
||||||
podTemplate:
|
podTemplate:
|
||||||
|
volumes: []
|
||||||
|
# - name: "yourvolume"
|
||||||
|
# path: "/yourpath"
|
||||||
|
env: []
|
||||||
|
# # to setup access to private repo, setup secret with git credentials:
|
||||||
|
# - name: CLEARML_AGENT_GIT_USER
|
||||||
|
# value: mygitusername
|
||||||
|
# - name: CLEARML_AGENT_GIT_PASS
|
||||||
|
# valueFrom:
|
||||||
|
# secretKeyRef:
|
||||||
|
# name: git-password
|
||||||
|
# key: git-password
|
||||||
resources: {}
|
resources: {}
|
||||||
# limits:
|
# limits:
|
||||||
# nvidia.com/gpu: 1
|
# nvidia.com/gpu: 1
|
||||||
tolerations: {}
|
tolerations: []
|
||||||
# - key: "nvidia.com/gpu"
|
# - key: "nvidia.com/gpu"
|
||||||
# operator: Exists
|
# operator: Exists
|
||||||
# effect: "NoSchedule"
|
# effect: "NoSchedule"
|
||||||
|
Loading…
Reference in New Issue
Block a user