fix: faulty service values references in k8s agent (#50)

* add k8s glue deployment

* more docs

* bump

* disabled by default

* run helm-docs

* fix service references

* fix readme

* add values file where k8sagent enabled

* empty files

* newline

* fix linter

Co-authored-by: Valeriano Manassero <14011549+valeriano-manassero@users.noreply.github.com>
This commit is contained in:
Niels ten Boom 2022-01-21 16:15:09 +01:00 committed by GitHub
parent cd7f22f7d8
commit 9c15a8a348
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 23 additions and 7 deletions

View File

@ -2,7 +2,7 @@ apiVersion: v2
name: clearml name: clearml
description: MLOps platform description: MLOps platform
type: application type: application
version: "3.4.0" version: "3.4.1"
appVersion: "1.1.1" appVersion: "1.1.1"
home: https://clear.ml home: https://clear.ml
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg

View File

@ -1,6 +1,6 @@
# ClearML Ecosystem for Kubernetes # ClearML Ecosystem for Kubernetes
![Version: 3.4.0](https://img.shields.io/badge/Version-3.4.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.1.1](https://img.shields.io/badge/AppVersion-1.1.1-informational?style=flat-square) ![Version: 3.4.1](https://img.shields.io/badge/Version-3.4.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.1.1](https://img.shields.io/badge/AppVersion-1.1.1-informational?style=flat-square)
MLOps platform MLOps platform
@ -170,6 +170,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
| agentk8sglue.image.tag | string | `"aws-latest-1.21"` | | | agentk8sglue.image.tag | string | `"aws-latest-1.21"` | |
| agentk8sglue.maxPods | int | `10` | | | agentk8sglue.maxPods | int | `10` | |
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | | | agentk8sglue.podTemplate.nodeSelector | object | `{}` | |
| agentk8sglue.podTemplate.resources | object | `{}` | |
| agentk8sglue.podTemplate.tolerations | object | `{}` | | | agentk8sglue.podTemplate.tolerations | object | `{}` | |
| agentk8sglue.queue | string | `"aws-instances"` | | | agentk8sglue.queue | string | `"aws-instances"` | |
| agentservices.affinity | object | `{}` | | | agentservices.affinity | object | `{}` | |

View File

@ -0,0 +1,7 @@
Place values files with different values in this directory to ensure these cases are tested by the CI as well.
https://github.com/helm/chart-testing/blob/main/doc/ct_install.md
```
"Charts may have multiple custom values files matching the glob pattern '*-values.yaml' in a directory named 'ci' in the root of the chart's directory. The chart is installed and tested for each of these files. If no custom values file is present, the chart is installed and tested with defaults."
```

View File

@ -0,0 +1 @@
# empty so default values.yaml gets tested

View File

@ -0,0 +1,2 @@
agentk8sglue:
enabled: true

View File

@ -10,13 +10,15 @@ data:
namespace: {{ .Release.namespace }} namespace: {{ .Release.namespace }}
spec: spec:
containers: containers:
- env: - resources:
{{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }}
env:
- name: CLEARML_API_HOST - name: CLEARML_API_HOST
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.clearml.apiserver.service.port }}" value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
- name: CLEARML_WEB_HOST - name: CLEARML_WEB_HOST
value: "http://{{ include "clearml.fullname" . }}-webserver" value: "http://{{ include "clearml.fullname" . }}-webserver"
- name: CLEARML_FILES_HOST - name: CLEARML_FILES_HOST
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.clearml.fileserver.service.port }}" value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
- name: CLEARML_API_ACCESS_KEY - name: CLEARML_API_ACCESS_KEY
valueFrom: valueFrom:
secretKeyRef: secretKeyRef:

View File

@ -25,11 +25,11 @@ spec:
mountPath: /root/template mountPath: /root/template
env: env:
- name: CLEARML_API_HOST - name: CLEARML_API_HOST
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.clearml.apiserver.service.port }}" value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
- name: CLEARML_WEB_HOST - name: CLEARML_WEB_HOST
value: "http://{{ include "clearml.fullname" . }}-webserver" value: "http://{{ include "clearml.fullname" . }}-webserver"
- name: CLEARML_FILES_HOST - name: CLEARML_FILES_HOST
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.clearml.fileserver.service.port }}" value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
- name: K8S_GLUE_MAX_PODS - name: K8S_GLUE_MAX_PODS
value: "{{.Values.agentk8sglue.maxPods}}" value: "{{.Values.agentk8sglue.maxPods}}"
- name: K8S_GLUE_QUEUE - name: K8S_GLUE_QUEUE

View File

@ -307,6 +307,9 @@ agentk8sglue:
queue: aws-instances # create this queue manually in the UI first for it to work queue: aws-instances # create this queue manually in the UI first for it to work
id: k8s-agent id: k8s-agent
podTemplate: podTemplate:
resources: {}
# limits:
# nvidia.com/gpu: 1
tolerations: {} tolerations: {}
# - key: "nvidia.com/gpu" # - key: "nvidia.com/gpu"
# operator: Exists # operator: Exists