mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-04-17 01:31:13 +00:00
Compare commits
12 Commits
clearml-3.
...
clearml-se
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9cf2868738 | ||
|
|
8098fd82df | ||
|
|
4422cf433d | ||
|
|
10296ac979 | ||
|
|
06070a5c20 | ||
|
|
5972fd8e5f | ||
|
|
7a7bd930f8 | ||
|
|
25dfbd12d6 | ||
|
|
d7c3b9d5d9 | ||
|
|
e16060f2ad | ||
|
|
27a666d2ae | ||
|
|
d7bef0ff9d |
2
.github/helm-docs.sh
vendored
2
.github/helm-docs.sh
vendored
@@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
CHART_DIRS="$(git diff --find-renames --name-only "$(git rev-parse --abbrev-ref HEAD)" remotes/origin/main -- 'charts' | grep '[cC]hart.yaml' | sed -e 's#/[Cc]hart.yaml##g')"
|
||||
HELM_DOCS_VERSION="1.7.0"
|
||||
HELM_DOCS_VERSION="1.10.0"
|
||||
|
||||
curl --silent --show-error --fail --location --output /tmp/helm-docs.tar.gz https://github.com/norwoodj/helm-docs/releases/download/v"${HELM_DOCS_VERSION}"/helm-docs_"${HELM_DOCS_VERSION}"_Linux_x86_64.tar.gz
|
||||
tar -xf /tmp/helm-docs.tar.gz helm-docs
|
||||
|
||||
23
charts/clearml-serving/.helmignore
Normal file
23
charts/clearml-serving/.helmignore
Normal file
@@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
16
charts/clearml-serving/Chart.yaml
Normal file
16
charts/clearml-serving/Chart.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v2
|
||||
name: clearml-serving
|
||||
description: ClearML Serving Helm Chart
|
||||
type: application
|
||||
version: 0.3.0
|
||||
appVersion: "0.9.0"
|
||||
maintainers:
|
||||
- name: valeriano-manassero
|
||||
url: https://github.com/valeriano-manassero
|
||||
- name: stefano-cherchi
|
||||
url: https://github.com/stefano-cherchi
|
||||
keywords:
|
||||
- clearml
|
||||
- "machine learning"
|
||||
- mlops
|
||||
- "model serving"
|
||||
70
charts/clearml-serving/README.md
Normal file
70
charts/clearml-serving/README.md
Normal file
@@ -0,0 +1,70 @@
|
||||
# clearml-serving
|
||||
|
||||
  
|
||||
|
||||
ClearML Serving Helm Chart
|
||||
|
||||
## Maintainers
|
||||
|
||||
| Name | Email | Url |
|
||||
| ---- | ------ | --- |
|
||||
| valeriano-manassero | | <https://github.com/valeriano-manassero> |
|
||||
| stefano-cherchi | | <https://github.com/stefano-cherchi> |
|
||||
|
||||
## Values
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| alertmanager.affinity | object | `{}` | |
|
||||
| alertmanager.image | string | `"prom/alertmanager:v0.23.0"` | |
|
||||
| alertmanager.nodeSelector | object | `{}` | |
|
||||
| alertmanager.resources | object | `{}` | |
|
||||
| alertmanager.tolerations | list | `[]` | |
|
||||
| clearml.apiAccessKey | string | `"ClearML API Access Key"` | |
|
||||
| clearml.apiHost | string | `"http://clearml-server-apiserver:8008"` | |
|
||||
| clearml.apiSecretKey | string | `"ClearML API Secret Key"` | |
|
||||
| clearml.filesHost | string | `"http://clearml-server-fileserver:8081"` | |
|
||||
| clearml.servingTaskId | string | `"ClearML Serving Task ID"` | |
|
||||
| clearml.webHost | string | `"http://clearml-server-webserver:80"` | |
|
||||
| clearml_serving_inference.affinity | object | `{}` | |
|
||||
| clearml_serving_inference.extraPythonPackages | list | `[]` | Extra Python Packages to be installed in running pods |
|
||||
| clearml_serving_inference.image | string | `"allegroai/clearml-serving-inference"` | |
|
||||
| clearml_serving_inference.nodeSelector | object | `{}` | |
|
||||
| clearml_serving_inference.resources | object | `{}` | |
|
||||
| clearml_serving_inference.tolerations | list | `[]` | |
|
||||
| clearml_serving_statistics.affinity | object | `{}` | |
|
||||
| clearml_serving_statistics.extraPythonPackages | list | `[]` | Extra Python Packages to be installed in running pods |
|
||||
| clearml_serving_statistics.image | string | `"allegroai/clearml-serving-statistics"` | |
|
||||
| clearml_serving_statistics.nodeSelector | object | `{}` | |
|
||||
| clearml_serving_statistics.resources | object | `{}` | |
|
||||
| clearml_serving_statistics.tolerations | list | `[]` | |
|
||||
| clearml_serving_triton.affinity | object | `{}` | |
|
||||
| clearml_serving_triton.enabled | bool | `true` | |
|
||||
| clearml_serving_triton.extraPythonPackages | list | `[]` | Extra Python Packages to be installed in running pods |
|
||||
| clearml_serving_triton.image | string | `"allegroai/clearml-serving-triton"` | |
|
||||
| clearml_serving_triton.nodeSelector | object | `{}` | |
|
||||
| clearml_serving_triton.resources | object | `{}` | |
|
||||
| clearml_serving_triton.tolerations | list | `[]` | |
|
||||
| grafana.affinity | object | `{}` | |
|
||||
| grafana.image | string | `"grafana/grafana:8.4.4-ubuntu"` | |
|
||||
| grafana.nodeSelector | object | `{}` | |
|
||||
| grafana.resources | object | `{}` | |
|
||||
| grafana.tolerations | list | `[]` | |
|
||||
| kafka.affinity | object | `{}` | |
|
||||
| kafka.image | string | `"bitnami/kafka:3.1.0"` | |
|
||||
| kafka.nodeSelector | object | `{}` | |
|
||||
| kafka.resources | object | `{}` | |
|
||||
| kafka.tolerations | list | `[]` | |
|
||||
| prometheus.affinity | object | `{}` | |
|
||||
| prometheus.image | string | `"prom/prometheus:v2.34.0"` | |
|
||||
| prometheus.nodeSelector | object | `{}` | |
|
||||
| prometheus.resources | object | `{}` | |
|
||||
| prometheus.tolerations | list | `[]` | |
|
||||
| zookeeper.affinity | object | `{}` | |
|
||||
| zookeeper.image | string | `"bitnami/zookeeper:3.7.0"` | |
|
||||
| zookeeper.nodeSelector | object | `{}` | |
|
||||
| zookeeper.resources | object | `{}` | |
|
||||
| zookeeper.tolerations | list | `[]` | |
|
||||
|
||||
----------------------------------------------
|
||||
Autogenerated from chart metadata using [helm-docs v1.10.0](https://github.com/norwoodj/helm-docs/releases/v1.10.0)
|
||||
62
charts/clearml-serving/templates/_helpers.tpl
Normal file
62
charts/clearml-serving/templates/_helpers.tpl
Normal file
@@ -0,0 +1,62 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "clearml-serving.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "clearml-serving.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "clearml-serving.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "clearml-serving.labels" -}}
|
||||
helm.sh/chart: {{ include "clearml-serving.chart" . }}
|
||||
{{ include "clearml-serving.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "clearml-serving.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "clearml-serving.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "clearml-serving.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "clearml-serving.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,28 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: alertmanager
|
||||
name: alertmanager
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: alertmanager
|
||||
strategy: {}
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: alertmanager
|
||||
spec:
|
||||
containers:
|
||||
- image: {{ .Values.alertmanager.image }}
|
||||
name: clearml-serving-alertmanager
|
||||
ports:
|
||||
- containerPort: 9093
|
||||
resources: {}
|
||||
restartPolicy: Always
|
||||
status: {}
|
||||
16
charts/clearml-serving/templates/alertmanager-service.yaml
Normal file
16
charts/clearml-serving/templates/alertmanager-service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: alertmanager
|
||||
name: clearml-serving-alertmanager
|
||||
spec:
|
||||
ports:
|
||||
- name: "9093"
|
||||
port: 9093
|
||||
targetPort: 9093
|
||||
selector:
|
||||
clearml.serving.service: alertmanager
|
||||
status:
|
||||
loadBalancer: {}
|
||||
@@ -0,0 +1,13 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: clearml-serving-backend
|
||||
spec:
|
||||
ingress:
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
podSelector:
|
||||
matchLabels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
@@ -0,0 +1,63 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: clearml-serving-inference
|
||||
name: clearml-serving-inference
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: clearml-serving-inference
|
||||
strategy: {}
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: clearml-serving-inference
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
value: "{{ .Values.clearml.apiAccessKey }}"
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
value: "{{ .Values.clearml.apiSecretKey }}"
|
||||
- name: CLEARML_API_HOST
|
||||
value: "{{ .Values.clearml.apiHost }}"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "{{ .Values.clearml.filesHost }}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "{{ .Values.clearml.webHost }}"
|
||||
- name: CLEARML_DEFAULT_KAFKA_SERVE_URL
|
||||
value: clearml-serving-kafka:9092
|
||||
- name: CLEARML_SERVING_POLL_FREQ
|
||||
value: "1.0"
|
||||
- name: CLEARML_DEFAULT_BASE_SERVE_URL
|
||||
value: http://127.0.0.1:8080/serve
|
||||
- name: CLEARML_DEFAULT_TRITON_GRPC_ADDR
|
||||
{{- if .Values.clearml_serving_triton.enabled }}
|
||||
value: "clearml-serving-triton:8001"
|
||||
{{- else }}
|
||||
value: ""
|
||||
{{- end }}
|
||||
- name: CLEARML_SERVING_NUM_PROCESS
|
||||
value: "2"
|
||||
- name: CLEARML_SERVING_PORT
|
||||
value: "8080"
|
||||
- name: CLEARML_SERVING_TASK_ID
|
||||
value: "{{ .Values.clearml.servingTaskId }}"
|
||||
- name: CLEARML_USE_GUNICORN
|
||||
value: "true"
|
||||
{{- if .Values.clearml_serving_inference.extraPythonPackages }}
|
||||
- name: EXTRA_PYTHON_PACKAGES
|
||||
value: '{{ join " " .Values.clearml_serving_inference.extraPythonPackages }}'
|
||||
{{- end }}
|
||||
image: "{{ .Values.clearml_serving_inference.image }}:{{ .Chart.AppVersion }}"
|
||||
name: clearml-serving-inference
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources: {}
|
||||
restartPolicy: Always
|
||||
status: {}
|
||||
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: clearml-serving-inference
|
||||
name: clearml-serving-inference
|
||||
spec:
|
||||
ports:
|
||||
- name: "8080"
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
selector:
|
||||
clearml.serving.service: clearml-serving-inference
|
||||
status:
|
||||
loadBalancer: {}
|
||||
@@ -0,0 +1,49 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: clearml-serving-statistics
|
||||
name: clearml-serving-statistics
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: clearml-serving-statistics
|
||||
strategy: {}
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: clearml-serving-statistics
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
value: "{{ .Values.clearml.apiAccessKey }}"
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
value: "{{ .Values.clearml.apiSecretKey }}"
|
||||
- name: CLEARML_API_HOST
|
||||
value: "{{ .Values.clearml.apiHost }}"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "{{ .Values.clearml.filesHost }}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "{{ .Values.clearml.webHost }}"
|
||||
- name: CLEARML_DEFAULT_KAFKA_SERVE_URL
|
||||
value: clearml-serving-kafka:9092
|
||||
- name: CLEARML_SERVING_POLL_FREQ
|
||||
value: "1.0"
|
||||
- name: CLEARML_SERVING_TASK_ID
|
||||
value: "{{ .Values.clearml.servingTaskId }}"
|
||||
{{- if .Values.clearml_serving_statistics.extraPythonPackages }}
|
||||
- name: EXTRA_PYTHON_PACKAGES
|
||||
value: '{{ join " " .Values.clearml_serving_statistics.extraPythonPackages }}'
|
||||
{{- end }}
|
||||
image: "{{ .Values.clearml_serving_statistics.image }}:{{ .Chart.AppVersion }}"
|
||||
name: clearml-serving-statistics
|
||||
ports:
|
||||
- containerPort: 9999
|
||||
resources: {}
|
||||
restartPolicy: Always
|
||||
status: {}
|
||||
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: clearml-serving-statistics
|
||||
name: clearml-serving-statistics
|
||||
spec:
|
||||
ports:
|
||||
- name: "9999"
|
||||
port: 9999
|
||||
targetPort: 9999
|
||||
selector:
|
||||
clearml.serving.service: clearml-serving-statistics
|
||||
status:
|
||||
loadBalancer: {}
|
||||
@@ -0,0 +1,52 @@
|
||||
{{ if .Values.clearml_serving_triton.enabled }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: clearml-serving-triton
|
||||
name: clearml-serving-triton
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: clearml-serving-triton
|
||||
strategy: {}
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: clearml-serving-triton
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
value: "{{ .Values.clearml.apiAccessKey }}"
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
value: "{{ .Values.clearml.apiSecretKey }}"
|
||||
- name: CLEARML_API_HOST
|
||||
value: "{{ .Values.clearml.apiHost }}"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "{{ .Values.clearml.filesHost }}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "{{ .Values.clearml.webHost }}"
|
||||
- name: CLEARML_SERVING_TASK_ID
|
||||
value: "{{ .Values.clearml.servingTaskId }}"
|
||||
- name: CLEARML_TRITON_POLL_FREQ
|
||||
value: "1.0"
|
||||
- name: CLEARML_TRITON_METRIC_FREQ
|
||||
value: "1.0"
|
||||
{{- if .Values.clearml_serving_triton.extraPythonPackages }}
|
||||
- name: EXTRA_PYTHON_PACKAGES
|
||||
value: '{{ join " " .Values.clearml_serving_triton.extraPythonPackages }}'
|
||||
{{- end }}
|
||||
image: "{{ .Values.clearml_serving_triton.image }}:{{ .Chart.AppVersion }}"
|
||||
name: clearml-serving-triton
|
||||
ports:
|
||||
- containerPort: 8001
|
||||
resources: {}
|
||||
restartPolicy: Always
|
||||
status: {}
|
||||
{{ end }}
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
{{ if .Values.clearml_serving_triton.enabled }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: clearml-serving-triton
|
||||
name: clearml-serving-triton
|
||||
spec:
|
||||
ports:
|
||||
- name: "8001"
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
selector:
|
||||
clearml.serving.service: clearml-serving-triton
|
||||
status:
|
||||
loadBalancer: {}
|
||||
{{ end }}
|
||||
14
charts/clearml-serving/templates/grafana-config-secret.yaml
Normal file
14
charts/clearml-serving/templates/grafana-config-secret.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: grafana-config
|
||||
stringData:
|
||||
datasource.yaml: |-
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
# Access mode - proxy (server in the UI) or direct (browser in the UI).
|
||||
access: proxy
|
||||
url: http://clearml-serving-prometheus:9090
|
||||
36
charts/clearml-serving/templates/grafana-deployment.yaml
Normal file
36
charts/clearml-serving/templates/grafana-deployment.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: grafana
|
||||
name: grafana
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: grafana
|
||||
strategy:
|
||||
type: Recreate
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: grafana
|
||||
spec:
|
||||
containers:
|
||||
- image: {{ .Values.grafana.image }}
|
||||
name: clearml-serving-grafana
|
||||
ports:
|
||||
- containerPort: 3000
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- mountPath: /etc/grafana/provisioning/datasources/
|
||||
name: grafana-conf
|
||||
restartPolicy: Always
|
||||
volumes:
|
||||
- name: grafana-conf
|
||||
secret:
|
||||
secretName: grafana-config
|
||||
status: {}
|
||||
16
charts/clearml-serving/templates/grafana-service.yaml
Normal file
16
charts/clearml-serving/templates/grafana-service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: grafana
|
||||
name: clearml-serving-grafana
|
||||
spec:
|
||||
ports:
|
||||
- name: "3000"
|
||||
port: 3000
|
||||
targetPort: 3000
|
||||
selector:
|
||||
clearml.serving.service: grafana
|
||||
status:
|
||||
loadBalancer: {}
|
||||
41
charts/clearml-serving/templates/kafka-deployment.yaml
Normal file
41
charts/clearml-serving/templates/kafka-deployment.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: kafka
|
||||
name: kafka
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: kafka
|
||||
strategy: {}
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: kafka
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: ALLOW_PLAINTEXT_LISTENER
|
||||
value: "yes"
|
||||
- name: KAFKA_BROKER_ID
|
||||
value: "1"
|
||||
- name: KAFKA_CFG_ADVERTISED_LISTENERS
|
||||
value: PLAINTEXT://clearml-serving-kafka:9092
|
||||
- name: KAFKA_CFG_LISTENERS
|
||||
value: PLAINTEXT://0.0.0.0:9092
|
||||
- name: KAFKA_CFG_ZOOKEEPER_CONNECT
|
||||
value: clearml-serving-zookeeper:2181
|
||||
- name: KAFKA_CREATE_TOPICS
|
||||
value: '"topic_test:1:1"'
|
||||
image: {{ .Values.kafka.image }}
|
||||
name: clearml-serving-kafka
|
||||
ports:
|
||||
- containerPort: 9092
|
||||
resources: {}
|
||||
restartPolicy: Always
|
||||
status: {}
|
||||
16
charts/clearml-serving/templates/kafka-service.yaml
Normal file
16
charts/clearml-serving/templates/kafka-service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: kafka
|
||||
name: clearml-serving-kafka
|
||||
spec:
|
||||
ports:
|
||||
- name: "9092"
|
||||
port: 9092
|
||||
targetPort: 9092
|
||||
selector:
|
||||
clearml.serving.service: kafka
|
||||
status:
|
||||
loadBalancer: {}
|
||||
@@ -0,0 +1,28 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: prometheus-config
|
||||
stringData:
|
||||
prometheus.yml: |-
|
||||
global:
|
||||
scrape_interval: "15s" # By default, scrape targets every 15 seconds.
|
||||
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
external_labels:
|
||||
monitor: 'clearml-serving'
|
||||
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: 'prometheus'
|
||||
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: 'clearml-inference-stats'
|
||||
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['clearml-serving-statistics:9999']
|
||||
43
charts/clearml-serving/templates/prometheus-deployment.yaml
Normal file
43
charts/clearml-serving/templates/prometheus-deployment.yaml
Normal file
@@ -0,0 +1,43 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: prometheus
|
||||
name: prometheus
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: prometheus
|
||||
strategy:
|
||||
type: Recreate
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: prometheus
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --config.file=/mnt/prometheus.yml
|
||||
- --storage.tsdb.path=/prometheus
|
||||
- --web.console.libraries=/etc/prometheus/console_libraries
|
||||
- --web.console.templates=/etc/prometheus/consoles
|
||||
- --storage.tsdb.retention.time=200h
|
||||
- --web.enable-lifecycle
|
||||
image: {{ .Values.prometheus.image }}
|
||||
name: clearml-serving-prometheus
|
||||
ports:
|
||||
- containerPort: 9090
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- mountPath: /mnt
|
||||
name: prometheus-conf
|
||||
restartPolicy: Always
|
||||
volumes:
|
||||
- name: prometheus-conf
|
||||
secret:
|
||||
secretName: prometheus-config
|
||||
status: {}
|
||||
16
charts/clearml-serving/templates/prometheus-service.yaml
Normal file
16
charts/clearml-serving/templates/prometheus-service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: prometheus
|
||||
name: clearml-serving-prometheus
|
||||
spec:
|
||||
ports:
|
||||
- name: "9090"
|
||||
port: 9090
|
||||
targetPort: 9090
|
||||
selector:
|
||||
clearml.serving.service: prometheus
|
||||
status:
|
||||
loadBalancer: {}
|
||||
31
charts/clearml-serving/templates/zookeeper-deployment.yaml
Normal file
31
charts/clearml-serving/templates/zookeeper-deployment.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: zookeeper
|
||||
name: zookeeper
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: zookeeper
|
||||
strategy: {}
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: zookeeper
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: ALLOW_ANONYMOUS_LOGIN
|
||||
value: "yes"
|
||||
image: {{ .Values.zookeeper.image }}
|
||||
name: clearml-serving-zookeeper
|
||||
ports:
|
||||
- containerPort: 2181
|
||||
resources: {}
|
||||
restartPolicy: Always
|
||||
status: {}
|
||||
16
charts/clearml-serving/templates/zookeeper-service.yaml
Normal file
16
charts/clearml-serving/templates/zookeeper-service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: zookeeper
|
||||
name: clearml-serving-zookeeper
|
||||
spec:
|
||||
ports:
|
||||
- name: "2181"
|
||||
port: 2181
|
||||
targetPort: 2181
|
||||
selector:
|
||||
clearml.serving.service: zookeeper
|
||||
status:
|
||||
loadBalancer: {}
|
||||
78
charts/clearml-serving/values.yaml
Normal file
78
charts/clearml-serving/values.yaml
Normal file
@@ -0,0 +1,78 @@
|
||||
# Default values for clearml-serving.
|
||||
|
||||
clearml:
|
||||
apiAccessKey: "ClearML API Access Key"
|
||||
apiSecretKey: "ClearML API Secret Key"
|
||||
apiHost: http://clearml-server-apiserver:8008
|
||||
filesHost: http://clearml-server-fileserver:8081
|
||||
webHost: http://clearml-server-webserver:80
|
||||
servingTaskId: "ClearML Serving Task ID"
|
||||
|
||||
zookeeper:
|
||||
image: bitnami/zookeeper:3.7.0
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
|
||||
kafka:
|
||||
image: bitnami/kafka:3.1.0
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:v2.34.0
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:8.4.4-ubuntu
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.23.0
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
|
||||
clearml_serving_statistics:
|
||||
image: allegroai/clearml-serving-statistics
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
# -- Extra Python Packages to be installed in running pods
|
||||
extraPythonPackages: []
|
||||
# - numpy==1.22.4
|
||||
# - pandas==1.4.2
|
||||
|
||||
clearml_serving_inference:
|
||||
image: allegroai/clearml-serving-inference
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
# -- Extra Python Packages to be installed in running pods
|
||||
extraPythonPackages: []
|
||||
# - numpy==1.22.4
|
||||
# - pandas==1.4.2
|
||||
|
||||
clearml_serving_triton:
|
||||
enabled: true
|
||||
image: allegroai/clearml-serving-triton
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
# -- Extra Python Packages to be installed in running pods
|
||||
extraPythonPackages: []
|
||||
# - numpy==1.22.4
|
||||
# - pandas==1.4.2
|
||||
@@ -2,8 +2,8 @@ apiVersion: v2
|
||||
name: clearml
|
||||
description: MLOps platform
|
||||
type: application
|
||||
version: "3.7.0"
|
||||
appVersion: "1.2.0"
|
||||
version: "3.10.2"
|
||||
appVersion: "1.4.0"
|
||||
home: https://clear.ml
|
||||
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
|
||||
sources:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# ClearML Ecosystem for Kubernetes
|
||||
|
||||
  
|
||||
  
|
||||
|
||||
MLOps platform
|
||||
|
||||
@@ -10,7 +10,7 @@ MLOps platform
|
||||
|
||||
| Name | Email | Url |
|
||||
| ---- | ------ | --- |
|
||||
| valeriano-manassero | | https://github.com/valeriano-manassero |
|
||||
| valeriano-manassero | | <https://github.com/valeriano-manassero> |
|
||||
|
||||
## Introduction
|
||||
|
||||
@@ -31,9 +31,7 @@ For development/evaluation it's possible to use [kind](https://kind.sigs.k8s.io)
|
||||
After installation, following commands will create a complete ClearML insatllation:
|
||||
|
||||
```
|
||||
mkdir -pm 777 /tmp/clearml-kind
|
||||
|
||||
cat <<EOF > /tmp/clearml-kind.yaml
|
||||
cat <<EOF | kind create cluster --config=- ─╯
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
@@ -62,8 +60,6 @@ nodes:
|
||||
containerPath: /var/local-path-provisioner
|
||||
EOF
|
||||
|
||||
kind create cluster --config /tmp/clearml-kind.yaml
|
||||
|
||||
helm install clearml allegroai/clearml
|
||||
```
|
||||
|
||||
@@ -89,6 +85,24 @@ This will create 3 ingress rules:
|
||||
|
||||
Just pointing the domain records to the IP where ingress controller is responding will complete the deployment process.
|
||||
|
||||
## Upgrades/ Values upgrades
|
||||
|
||||
Updating to latest version of this chart can be done in two steps:
|
||||
|
||||
```
|
||||
helm repo update
|
||||
helm upgrade clearml allegroai/clearml
|
||||
```
|
||||
|
||||
Changing values on existing installation can be done with:
|
||||
|
||||
```
|
||||
helm upgrade clearml allegroai/clearml --version <CURRENT CHART VERSION> -f custom_values.yaml
|
||||
```
|
||||
|
||||
Please note: updating values only should always be done setting explicit chart version to avoid a possible chart update.
|
||||
Keeping separate updates procedures between version and values can be a good practice to seprate potential concerns.
|
||||
|
||||
## Additional Configuration for ClearML Server
|
||||
|
||||
You can also configure the **clearml-server** for:
|
||||
@@ -127,7 +141,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-cpu.clearmlGitPassword | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.clearmlGitUser | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.clearmlSecretKey | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.enabled | bool | `true` | |
|
||||
| agentGroups.agent-group-cpu.enabled | bool | `false` | |
|
||||
| agentGroups.agent-group-cpu.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| agentGroups.agent-group-cpu.image.repository | string | `"ubuntu"` | |
|
||||
| agentGroups.agent-group-cpu.image.tag | string | `"18.04"` | |
|
||||
@@ -151,7 +165,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-gpu.clearmlGitPassword | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.clearmlGitUser | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.clearmlSecretKey | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.enabled | bool | `true` | |
|
||||
| agentGroups.agent-group-gpu.enabled | bool | `false` | |
|
||||
| agentGroups.agent-group-gpu.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| agentGroups.agent-group-gpu.image.repository | string | `"nvidia/cuda"` | |
|
||||
| agentGroups.agent-group-gpu.image.tag | string | `"11.0-base-ubuntu18.04"` | |
|
||||
@@ -164,17 +178,17 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-gpu.tolerations | list | `[]` | |
|
||||
| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | |
|
||||
| agentk8sglue.defaultDockerImage | string | `"nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04"` | |
|
||||
| agentk8sglue.enabled | bool | `false` | |
|
||||
| agentk8sglue.enabled | bool | `true` | |
|
||||
| agentk8sglue.id | string | `"k8s-agent"` | |
|
||||
| agentk8sglue.image.repository | string | `"allegroai/clearml-agent-k8s"` | |
|
||||
| agentk8sglue.image.tag | string | `"aws-latest-1.21"` | |
|
||||
| agentk8sglue.image.tag | string | `"latest"` | |
|
||||
| agentk8sglue.maxPods | int | `10` | |
|
||||
| agentk8sglue.podTemplate.env | list | `[]` | |
|
||||
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | |
|
||||
| agentk8sglue.podTemplate.resources | object | `{}` | |
|
||||
| agentk8sglue.podTemplate.tolerations | list | `[]` | |
|
||||
| agentk8sglue.podTemplate.volumes | list | `[]` | |
|
||||
| agentk8sglue.queue | string | `"aws-instances"` | |
|
||||
| agentk8sglue.queue | string | `"default"` | |
|
||||
| agentk8sglue.serviceAccountName | string | `"default"` | |
|
||||
| agentservices.affinity | object | `{}` | |
|
||||
| agentservices.agentVersion | string | `""` | |
|
||||
@@ -199,17 +213,17 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentservices.podAnnotations | object | `{}` | |
|
||||
| agentservices.replicaCount | int | `1` | |
|
||||
| agentservices.resources | object | `{}` | |
|
||||
| agentservices.storage.data.class | string | `"standard"` | |
|
||||
| agentservices.storage.data.class | string | `""` | |
|
||||
| agentservices.storage.data.size | string | `"50Gi"` | |
|
||||
| agentservices.tolerations | list | `[]` | |
|
||||
| apiserver.additionalConfigs | object | `{}` | |
|
||||
| apiserver.additionalConfigs | object | `{}` | additional configurations that can be used by api server; check examples in values.yaml file |
|
||||
| apiserver.affinity | object | `{}` | |
|
||||
| apiserver.authCookiesMaxAge | int | `864000` | Amount of seconds the authorization cookie will last in user browser |
|
||||
| apiserver.configDir | string | `"/opt/clearml/config"` | |
|
||||
| apiserver.extraEnvs | list | `[]` | |
|
||||
| apiserver.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| apiserver.image.repository | string | `"allegroai/clearml"` | |
|
||||
| apiserver.image.tag | string | `"1.2.0"` | |
|
||||
| apiserver.image.tag | string | `"1.4.0"` | |
|
||||
| apiserver.livenessDelay | int | `60` | |
|
||||
| apiserver.nodeSelector | object | `{}` | |
|
||||
| apiserver.podAnnotations | object | `{}` | |
|
||||
@@ -269,7 +283,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| fileserver.extraEnvs | list | `[]` | |
|
||||
| fileserver.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| fileserver.image.repository | string | `"allegroai/clearml"` | |
|
||||
| fileserver.image.tag | string | `"1.2.0"` | |
|
||||
| fileserver.image.tag | string | `"1.4.0"` | |
|
||||
| fileserver.nodeSelector | object | `{}` | |
|
||||
| fileserver.podAnnotations | object | `{}` | |
|
||||
| fileserver.replicaCount | int | `1` | |
|
||||
@@ -277,7 +291,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| fileserver.service.nodePort | int | `30081` | If service.type set to NodePort, this will be set to service's nodePort field. If service.type is set to others, this field will be ignored |
|
||||
| fileserver.service.port | int | `8081` | |
|
||||
| fileserver.service.type | string | `"NodePort"` | This will set to service's spec.type field |
|
||||
| fileserver.storage.data.class | string | `"standard"` | |
|
||||
| fileserver.storage.data.class | string | `""` | |
|
||||
| fileserver.storage.data.size | string | `"50Gi"` | |
|
||||
| fileserver.tolerations | list | `[]` | |
|
||||
| ingress.annotations | object | `{}` | |
|
||||
@@ -328,7 +342,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| webserver.extraEnvs | list | `[]` | |
|
||||
| webserver.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| webserver.image.repository | string | `"allegroai/clearml"` | |
|
||||
| webserver.image.tag | string | `"1.2.0"` | |
|
||||
| webserver.image.tag | string | `"1.4.0"` | |
|
||||
| webserver.nodeSelector | object | `{}` | |
|
||||
| webserver.podAnnotations | object | `{}` | |
|
||||
| webserver.replicaCount | int | `1` | |
|
||||
|
||||
@@ -28,9 +28,7 @@ For development/evaluation it's possible to use [kind](https://kind.sigs.k8s.io)
|
||||
After installation, following commands will create a complete ClearML insatllation:
|
||||
|
||||
```
|
||||
mkdir -pm 777 /tmp/clearml-kind
|
||||
|
||||
cat <<EOF > /tmp/clearml-kind.yaml
|
||||
cat <<EOF | kind create cluster --config=- ─╯
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
@@ -59,8 +57,6 @@ nodes:
|
||||
containerPath: /var/local-path-provisioner
|
||||
EOF
|
||||
|
||||
kind create cluster --config /tmp/clearml-kind.yaml
|
||||
|
||||
helm install clearml allegroai/clearml
|
||||
```
|
||||
|
||||
@@ -86,6 +82,24 @@ This will create 3 ingress rules:
|
||||
|
||||
Just pointing the domain records to the IP where ingress controller is responding will complete the deployment process.
|
||||
|
||||
## Upgrades/ Values upgrades
|
||||
|
||||
Updating to latest version of this chart can be done in two steps:
|
||||
|
||||
```
|
||||
helm repo update
|
||||
helm upgrade clearml allegroai/clearml
|
||||
```
|
||||
|
||||
Changing values on existing installation can be done with:
|
||||
|
||||
```
|
||||
helm upgrade clearml allegroai/clearml --version <CURRENT CHART VERSION> -f custom_values.yaml
|
||||
```
|
||||
|
||||
Please note: updating values only should always be done setting explicit chart version to avoid a possible chart update.
|
||||
Keeping separate updates procedures between version and values can be a good practice to seprate potential concerns.
|
||||
|
||||
## Additional Configuration for ClearML Server
|
||||
|
||||
You can also configure the **clearml-server** for:
|
||||
|
||||
@@ -7,7 +7,7 @@ data:
|
||||
template.yaml: |
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
namespace: {{ .Release.namespace }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }}
|
||||
volumes:
|
||||
@@ -43,10 +43,15 @@ data:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_secret
|
||||
{{- toYaml .Values.agentk8sglue.podTemplate.env | nindent 8 }}
|
||||
tolerations:
|
||||
{{- toYaml .Values.agentk8sglue.podTemplate.tolerations | nindent 8 }}
|
||||
{{- if .Values.agentk8sglue.podTemplate.env }}
|
||||
{{ toYaml .Values.agentk8sglue.podTemplate.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.podTemplate.nodeSelector}}
|
||||
nodeSelector:
|
||||
{{- toYaml .Values.agentk8sglue.podTemplate.nodeSelector | nindent 8 }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.podTemplate.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
|
||||
@@ -35,7 +35,9 @@ spec:
|
||||
- name: K8S_GLUE_QUEUE
|
||||
value: "{{.Values.agentk8sglue.queue}}"
|
||||
- name: K8S_GLUE_EXTRA_ARGS
|
||||
value: "--template-yaml /root/template/template.yaml"
|
||||
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml"
|
||||
- name: K8S_DEFAULT_NAMESPACE
|
||||
value: "{{ .Release.Namespace }}"
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
|
||||
@@ -11,5 +11,7 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.agentservices.storage.data.size | quote }}
|
||||
{{- if .Values.agentservices.storage.data.class -}}
|
||||
storageClassName: {{ .Values.agentservices.storage.data.class | quote }}
|
||||
{{- end -}}
|
||||
{{- end }}
|
||||
|
||||
@@ -10,4 +10,7 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.fileserver.storage.data.size | quote }}
|
||||
storageClassName: {{ .Values.fileserver.storage.data.class | quote }}
|
||||
{{- if .Values.fileserver.storage.data.class -}}
|
||||
storageClassName: {{ .Values.fileserver.storage.data.class | quote }}
|
||||
{{- end -}}
|
||||
|
||||
@@ -17,7 +17,7 @@ metadata:
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: default
|
||||
namespace: {{ .Release.namespace }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
|
||||
@@ -67,7 +67,7 @@ apiserver:
|
||||
image:
|
||||
repository: "allegroai/clearml"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "1.2.0"
|
||||
tag: "1.4.0"
|
||||
|
||||
extraEnvs: []
|
||||
|
||||
@@ -91,6 +91,7 @@ apiserver:
|
||||
|
||||
affinity: {}
|
||||
|
||||
# -- additional configurations that can be used by api server; check examples in values.yaml file
|
||||
additionalConfigs: {}
|
||||
# services.conf: |
|
||||
# tasks {
|
||||
@@ -101,6 +102,25 @@ apiserver:
|
||||
# watch_interval_sec: 900
|
||||
# }
|
||||
# }
|
||||
# apiserver.conf: |
|
||||
# auth {
|
||||
# fixed_users {
|
||||
# enabled: true
|
||||
# pass_hashed: false
|
||||
# users: [
|
||||
# {
|
||||
# username: "jane"
|
||||
# password: "12345678"
|
||||
# name: "Jane Doe"
|
||||
# },
|
||||
# {
|
||||
# username: "john"
|
||||
# password: "12345678"
|
||||
# name: "John Doe"
|
||||
# },
|
||||
# ]
|
||||
# }
|
||||
# }
|
||||
|
||||
fileserver:
|
||||
service:
|
||||
@@ -116,7 +136,7 @@ fileserver:
|
||||
image:
|
||||
repository: "allegroai/clearml"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "1.2.0"
|
||||
tag: "1.4.0"
|
||||
|
||||
extraEnvs: []
|
||||
|
||||
@@ -142,7 +162,7 @@ fileserver:
|
||||
|
||||
storage:
|
||||
data:
|
||||
class: "standard"
|
||||
class: ""
|
||||
size: 50Gi
|
||||
|
||||
webserver:
|
||||
@@ -161,7 +181,7 @@ webserver:
|
||||
image:
|
||||
repository: "allegroai/clearml"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "1.2.0"
|
||||
tag: "1.4.0"
|
||||
|
||||
podAnnotations: {}
|
||||
|
||||
@@ -232,12 +252,12 @@ agentservices:
|
||||
|
||||
storage:
|
||||
data:
|
||||
class: "standard"
|
||||
class: ""
|
||||
size: 50Gi
|
||||
|
||||
agentGroups:
|
||||
agent-group-cpu:
|
||||
enabled: true
|
||||
enabled: false
|
||||
name: agent-group-cpu
|
||||
replicaCount: 1
|
||||
updateStrategy: Recreate
|
||||
@@ -271,7 +291,7 @@ agentGroups:
|
||||
affinity: {}
|
||||
|
||||
agent-group-gpu:
|
||||
enabled: true
|
||||
enabled: false
|
||||
name: agent-group-gpu
|
||||
replicaCount: 0
|
||||
updateStrategy: Recreate
|
||||
@@ -308,14 +328,14 @@ agentGroups:
|
||||
# GPU autoscaling nodes.
|
||||
# https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue
|
||||
agentk8sglue:
|
||||
enabled: false
|
||||
enabled: true
|
||||
image:
|
||||
repository: "allegroai/clearml-agent-k8s"
|
||||
tag: "aws-latest-1.21"
|
||||
tag: "latest"
|
||||
serviceAccountName: default
|
||||
maxPods: 10
|
||||
defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04 # default docker image that is spawned as new pod
|
||||
queue: aws-instances # create this queue manually in the UI first for it to work
|
||||
queue: default
|
||||
id: k8s-agent
|
||||
podTemplate:
|
||||
volumes: []
|
||||
|
||||
Reference in New Issue
Block a user