mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-04-17 01:31:13 +00:00
Compare commits
19 Commits
clearml-3.
...
clearml-se
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8098fd82df | ||
|
|
4422cf433d | ||
|
|
10296ac979 | ||
|
|
06070a5c20 | ||
|
|
5972fd8e5f | ||
|
|
7a7bd930f8 | ||
|
|
25dfbd12d6 | ||
|
|
d7c3b9d5d9 | ||
|
|
e16060f2ad | ||
|
|
27a666d2ae | ||
|
|
d7bef0ff9d | ||
|
|
049e609ce0 | ||
|
|
fa3739b643 | ||
|
|
018348bc1d | ||
|
|
57b85cbfce | ||
|
|
9c15a8a348 | ||
|
|
cd7f22f7d8 | ||
|
|
078e394e24 | ||
|
|
70b07c637a |
2
.github/helm-docs.sh
vendored
2
.github/helm-docs.sh
vendored
@@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
CHART_DIRS="$(git diff --find-renames --name-only "$(git rev-parse --abbrev-ref HEAD)" remotes/origin/main -- 'charts' | grep '[cC]hart.yaml' | sed -e 's#/[Cc]hart.yaml##g')"
|
||||
HELM_DOCS_VERSION="1.5.0"
|
||||
HELM_DOCS_VERSION="1.10.0"
|
||||
|
||||
curl --silent --show-error --fail --location --output /tmp/helm-docs.tar.gz https://github.com/norwoodj/helm-docs/releases/download/v"${HELM_DOCS_VERSION}"/helm-docs_"${HELM_DOCS_VERSION}"_Linux_x86_64.tar.gz
|
||||
tar -xf /tmp/helm-docs.tar.gz helm-docs
|
||||
|
||||
23
charts/clearml-serving/.helmignore
Normal file
23
charts/clearml-serving/.helmignore
Normal file
@@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
16
charts/clearml-serving/Chart.yaml
Normal file
16
charts/clearml-serving/Chart.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v2
|
||||
name: clearml-serving
|
||||
description: ClearML Serving Helm Chart
|
||||
type: application
|
||||
version: 0.2.0
|
||||
appVersion: "0.9.0"
|
||||
maintainers:
|
||||
- name: valeriano-manassero
|
||||
url: https://github.com/valeriano-manassero
|
||||
- name: stefano-cherchi
|
||||
url: https://github.com/stefano-cherchi
|
||||
keywords:
|
||||
- clearml
|
||||
- "machine learning"
|
||||
- mlops
|
||||
- "model serving"
|
||||
62
charts/clearml-serving/README.md
Normal file
62
charts/clearml-serving/README.md
Normal file
@@ -0,0 +1,62 @@
|
||||
# clearml-serving
|
||||
|
||||
  
|
||||
|
||||
ClearML Serving Helm Chart
|
||||
|
||||
## Maintainers
|
||||
|
||||
| Name | Email | Url |
|
||||
| ---- | ------ | --- |
|
||||
| valeriano-manassero | | <https://github.com/valeriano-manassero> |
|
||||
| stefano-cherchi | | <https://github.com/stefano-cherchi> |
|
||||
|
||||
## Values
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| alertmanager.affinity | object | `{}` | |
|
||||
| alertmanager.image | string | `"prom/alertmanager:v0.23.0"` | |
|
||||
| alertmanager.nodeSelector | object | `{}` | |
|
||||
| alertmanager.resources | object | `{}` | |
|
||||
| alertmanager.tolerations | list | `[]` | |
|
||||
| clearml.apiAccessKey | string | `"ClearML API Access Key"` | |
|
||||
| clearml.apiHost | string | `"http://clearml-server-apiserver:8008"` | |
|
||||
| clearml.apiSecretKey | string | `"ClearML API Secret Key"` | |
|
||||
| clearml.extraPythonPackages | list | `[]` | Extra Python Packages to be installed in running pods |
|
||||
| clearml.filesHost | string | `"http://clearml-server-fileserver:8081"` | |
|
||||
| clearml.servingTaskId | string | `"ClearML Serving Task ID"` | |
|
||||
| clearml.webHost | string | `"http://clearml-server-webserver:80"` | |
|
||||
| clearml_serving_inference.affinity | object | `{}` | |
|
||||
| clearml_serving_inference.image | string | `"allegroai/clearml-serving-inference"` | |
|
||||
| clearml_serving_inference.nodeSelector | object | `{}` | |
|
||||
| clearml_serving_inference.resources | object | `{}` | |
|
||||
| clearml_serving_inference.tolerations | list | `[]` | |
|
||||
| clearml_serving_statistics.affinity | object | `{}` | |
|
||||
| clearml_serving_statistics.image | string | `"allegroai/clearml-serving-statistics"` | |
|
||||
| clearml_serving_statistics.nodeSelector | object | `{}` | |
|
||||
| clearml_serving_statistics.resources | object | `{}` | |
|
||||
| clearml_serving_statistics.tolerations | list | `[]` | |
|
||||
| grafana.affinity | object | `{}` | |
|
||||
| grafana.image | string | `"grafana/grafana:8.4.4-ubuntu"` | |
|
||||
| grafana.nodeSelector | object | `{}` | |
|
||||
| grafana.resources | object | `{}` | |
|
||||
| grafana.tolerations | list | `[]` | |
|
||||
| kafka.affinity | object | `{}` | |
|
||||
| kafka.image | string | `"bitnami/kafka:3.1.0"` | |
|
||||
| kafka.nodeSelector | object | `{}` | |
|
||||
| kafka.resources | object | `{}` | |
|
||||
| kafka.tolerations | list | `[]` | |
|
||||
| prometheus.affinity | object | `{}` | |
|
||||
| prometheus.image | string | `"prom/prometheus:v2.34.0"` | |
|
||||
| prometheus.nodeSelector | object | `{}` | |
|
||||
| prometheus.resources | object | `{}` | |
|
||||
| prometheus.tolerations | list | `[]` | |
|
||||
| zookeeper.affinity | object | `{}` | |
|
||||
| zookeeper.image | string | `"bitnami/zookeeper:3.7.0"` | |
|
||||
| zookeeper.nodeSelector | object | `{}` | |
|
||||
| zookeeper.resources | object | `{}` | |
|
||||
| zookeeper.tolerations | list | `[]` | |
|
||||
|
||||
----------------------------------------------
|
||||
Autogenerated from chart metadata using [helm-docs v1.10.0](https://github.com/norwoodj/helm-docs/releases/v1.10.0)
|
||||
62
charts/clearml-serving/templates/_helpers.tpl
Normal file
62
charts/clearml-serving/templates/_helpers.tpl
Normal file
@@ -0,0 +1,62 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "clearml-serving.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "clearml-serving.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "clearml-serving.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "clearml-serving.labels" -}}
|
||||
helm.sh/chart: {{ include "clearml-serving.chart" . }}
|
||||
{{ include "clearml-serving.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "clearml-serving.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "clearml-serving.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "clearml-serving.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "clearml-serving.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,28 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: alertmanager
|
||||
name: alertmanager
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: alertmanager
|
||||
strategy: {}
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: alertmanager
|
||||
spec:
|
||||
containers:
|
||||
- image: {{ .Values.alertmanager.image }}
|
||||
name: clearml-serving-alertmanager
|
||||
ports:
|
||||
- containerPort: 9093
|
||||
resources: {}
|
||||
restartPolicy: Always
|
||||
status: {}
|
||||
16
charts/clearml-serving/templates/alertmanager-service.yaml
Normal file
16
charts/clearml-serving/templates/alertmanager-service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: alertmanager
|
||||
name: clearml-serving-alertmanager
|
||||
spec:
|
||||
ports:
|
||||
- name: "9093"
|
||||
port: 9093
|
||||
targetPort: 9093
|
||||
selector:
|
||||
clearml.serving.service: alertmanager
|
||||
status:
|
||||
loadBalancer: {}
|
||||
@@ -0,0 +1,13 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: clearml-serving-backend
|
||||
spec:
|
||||
ingress:
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
podSelector:
|
||||
matchLabels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
@@ -0,0 +1,59 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: clearml-serving-inference
|
||||
name: clearml-serving-inference
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: clearml-serving-inference
|
||||
strategy: {}
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: clearml-serving-inference
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
value: "{{ .Values.clearml.apiAccessKey }}"
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
value: "{{ .Values.clearml.apiSecretKey }}"
|
||||
- name: CLEARML_API_HOST
|
||||
value: "{{ .Values.clearml.apiHost }}"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "{{ .Values.clearml.filesHost }}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "{{ .Values.clearml.webHost }}"
|
||||
- name: CLEARML_DEFAULT_KAFKA_SERVE_URL
|
||||
value: clearml-serving-kafka:9092
|
||||
- name: CLEARML_SERVING_POLL_FREQ
|
||||
value: "1.0"
|
||||
- name: CLEARML_DEFAULT_BASE_SERVE_URL
|
||||
value: http://127.0.0.1:8080/serve
|
||||
- name: CLEARML_DEFAULT_TRITON_GRPC_ADDR
|
||||
value: ""
|
||||
- name: CLEARML_SERVING_NUM_PROCESS
|
||||
value: "2"
|
||||
- name: CLEARML_SERVING_PORT
|
||||
value: "8080"
|
||||
- name: CLEARML_SERVING_TASK_ID
|
||||
value: "{{ .Values.clearml.servingTaskId }}"
|
||||
- name: CLEARML_USE_GUNICORN
|
||||
value: "true"
|
||||
{{- if .Values.clearml.extraPythonPackages }}
|
||||
- name: EXTRA_PYTHON_PACKAGES
|
||||
value: '{{ join " " .Values.clearml.extraPythonPackages }}'
|
||||
{{- end }}
|
||||
image: "{{ .Values.clearml_serving_inference.image }}:{{ .Chart.AppVersion }}"
|
||||
name: clearml-serving-inference
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources: {}
|
||||
restartPolicy: Always
|
||||
status: {}
|
||||
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: clearml-serving-inference
|
||||
name: clearml-serving-inference
|
||||
spec:
|
||||
ports:
|
||||
- name: "8080"
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
selector:
|
||||
clearml.serving.service: clearml-serving-inference
|
||||
status:
|
||||
loadBalancer: {}
|
||||
@@ -0,0 +1,49 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: clearml-serving-statistics
|
||||
name: clearml-serving-statistics
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: clearml-serving-statistics
|
||||
strategy: {}
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: clearml-serving-statistics
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
value: "{{ .Values.clearml.apiAccessKey }}"
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
value: "{{ .Values.clearml.apiSecretKey }}"
|
||||
- name: CLEARML_API_HOST
|
||||
value: "{{ .Values.clearml.apiHost }}"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "{{ .Values.clearml.filesHost }}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "{{ .Values.clearml.webHost }}"
|
||||
- name: CLEARML_DEFAULT_KAFKA_SERVE_URL
|
||||
value: clearml-serving-kafka:9092
|
||||
- name: CLEARML_SERVING_POLL_FREQ
|
||||
value: "1.0"
|
||||
- name: CLEARML_SERVING_TASK_ID
|
||||
value: "{{ .Values.clearml.servingTaskId }}"
|
||||
{{- if .Values.clearml.extraPythonPackages }}
|
||||
- name: EXTRA_PYTHON_PACKAGES
|
||||
value: '{{ join " " .Values.clearml.extraPythonPackages }}'
|
||||
{{- end }}
|
||||
image: "{{ .Values.clearml_serving_statistics.image }}:{{ .Chart.AppVersion }}"
|
||||
name: clearml-serving-statistics
|
||||
ports:
|
||||
- containerPort: 9999
|
||||
resources: {}
|
||||
restartPolicy: Always
|
||||
status: {}
|
||||
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: clearml-serving-statistics
|
||||
name: clearml-serving-statistics
|
||||
spec:
|
||||
ports:
|
||||
- name: "9999"
|
||||
port: 9999
|
||||
targetPort: 9999
|
||||
selector:
|
||||
clearml.serving.service: clearml-serving-statistics
|
||||
status:
|
||||
loadBalancer: {}
|
||||
14
charts/clearml-serving/templates/grafana-config-secret.yaml
Normal file
14
charts/clearml-serving/templates/grafana-config-secret.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: grafana-config
|
||||
stringData:
|
||||
datasource.yaml: |-
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
# Access mode - proxy (server in the UI) or direct (browser in the UI).
|
||||
access: proxy
|
||||
url: http://clearml-serving-prometheus:9090
|
||||
36
charts/clearml-serving/templates/grafana-deployment.yaml
Normal file
36
charts/clearml-serving/templates/grafana-deployment.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: grafana
|
||||
name: grafana
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: grafana
|
||||
strategy:
|
||||
type: Recreate
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: grafana
|
||||
spec:
|
||||
containers:
|
||||
- image: {{ .Values.grafana.image }}
|
||||
name: clearml-serving-grafana
|
||||
ports:
|
||||
- containerPort: 3000
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- mountPath: /etc/grafana/provisioning/datasources/
|
||||
name: grafana-conf
|
||||
restartPolicy: Always
|
||||
volumes:
|
||||
- name: grafana-conf
|
||||
secret:
|
||||
secretName: grafana-config
|
||||
status: {}
|
||||
16
charts/clearml-serving/templates/grafana-service.yaml
Normal file
16
charts/clearml-serving/templates/grafana-service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: grafana
|
||||
name: clearml-serving-grafana
|
||||
spec:
|
||||
ports:
|
||||
- name: "3000"
|
||||
port: 3000
|
||||
targetPort: 3000
|
||||
selector:
|
||||
clearml.serving.service: grafana
|
||||
status:
|
||||
loadBalancer: {}
|
||||
41
charts/clearml-serving/templates/kafka-deployment.yaml
Normal file
41
charts/clearml-serving/templates/kafka-deployment.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: kafka
|
||||
name: kafka
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: kafka
|
||||
strategy: {}
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: kafka
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: ALLOW_PLAINTEXT_LISTENER
|
||||
value: "yes"
|
||||
- name: KAFKA_BROKER_ID
|
||||
value: "1"
|
||||
- name: KAFKA_CFG_ADVERTISED_LISTENERS
|
||||
value: PLAINTEXT://clearml-serving-kafka:9092
|
||||
- name: KAFKA_CFG_LISTENERS
|
||||
value: PLAINTEXT://0.0.0.0:9092
|
||||
- name: KAFKA_CFG_ZOOKEEPER_CONNECT
|
||||
value: clearml-serving-zookeeper:2181
|
||||
- name: KAFKA_CREATE_TOPICS
|
||||
value: '"topic_test:1:1"'
|
||||
image: {{ .Values.kafka.image }}
|
||||
name: clearml-serving-kafka
|
||||
ports:
|
||||
- containerPort: 9092
|
||||
resources: {}
|
||||
restartPolicy: Always
|
||||
status: {}
|
||||
16
charts/clearml-serving/templates/kafka-service.yaml
Normal file
16
charts/clearml-serving/templates/kafka-service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: kafka
|
||||
name: clearml-serving-kafka
|
||||
spec:
|
||||
ports:
|
||||
- name: "9092"
|
||||
port: 9092
|
||||
targetPort: 9092
|
||||
selector:
|
||||
clearml.serving.service: kafka
|
||||
status:
|
||||
loadBalancer: {}
|
||||
@@ -0,0 +1,28 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: prometheus-config
|
||||
stringData:
|
||||
prometheus.yml: |-
|
||||
global:
|
||||
scrape_interval: "15s" # By default, scrape targets every 15 seconds.
|
||||
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
external_labels:
|
||||
monitor: 'clearml-serving'
|
||||
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: 'prometheus'
|
||||
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: 'clearml-inference-stats'
|
||||
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['clearml-serving-statistics:9999']
|
||||
43
charts/clearml-serving/templates/prometheus-deployment.yaml
Normal file
43
charts/clearml-serving/templates/prometheus-deployment.yaml
Normal file
@@ -0,0 +1,43 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: prometheus
|
||||
name: prometheus
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: prometheus
|
||||
strategy:
|
||||
type: Recreate
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: prometheus
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
- --config.file=/mnt/prometheus.yml
|
||||
- --storage.tsdb.path=/prometheus
|
||||
- --web.console.libraries=/etc/prometheus/console_libraries
|
||||
- --web.console.templates=/etc/prometheus/consoles
|
||||
- --storage.tsdb.retention.time=200h
|
||||
- --web.enable-lifecycle
|
||||
image: {{ .Values.prometheus.image }}
|
||||
name: clearml-serving-prometheus
|
||||
ports:
|
||||
- containerPort: 9090
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- mountPath: /mnt
|
||||
name: prometheus-conf
|
||||
restartPolicy: Always
|
||||
volumes:
|
||||
- name: prometheus-conf
|
||||
secret:
|
||||
secretName: prometheus-config
|
||||
status: {}
|
||||
16
charts/clearml-serving/templates/prometheus-service.yaml
Normal file
16
charts/clearml-serving/templates/prometheus-service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: prometheus
|
||||
name: clearml-serving-prometheus
|
||||
spec:
|
||||
ports:
|
||||
- name: "9090"
|
||||
port: 9090
|
||||
targetPort: 9090
|
||||
selector:
|
||||
clearml.serving.service: prometheus
|
||||
status:
|
||||
loadBalancer: {}
|
||||
31
charts/clearml-serving/templates/zookeeper-deployment.yaml
Normal file
31
charts/clearml-serving/templates/zookeeper-deployment.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: zookeeper
|
||||
name: zookeeper
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
clearml.serving.service: zookeeper
|
||||
strategy: {}
|
||||
template:
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.network/clearml-serving-backend: "true"
|
||||
clearml.serving.service: zookeeper
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: ALLOW_ANONYMOUS_LOGIN
|
||||
value: "yes"
|
||||
image: {{ .Values.zookeeper.image }}
|
||||
name: clearml-serving-zookeeper
|
||||
ports:
|
||||
- containerPort: 2181
|
||||
resources: {}
|
||||
restartPolicy: Always
|
||||
status: {}
|
||||
16
charts/clearml-serving/templates/zookeeper-service.yaml
Normal file
16
charts/clearml-serving/templates/zookeeper-service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations: {}
|
||||
labels:
|
||||
clearml.serving.service: zookeeper
|
||||
name: clearml-serving-zookeeper
|
||||
spec:
|
||||
ports:
|
||||
- name: "2181"
|
||||
port: 2181
|
||||
targetPort: 2181
|
||||
selector:
|
||||
clearml.serving.service: zookeeper
|
||||
status:
|
||||
loadBalancer: {}
|
||||
63
charts/clearml-serving/values.yaml
Normal file
63
charts/clearml-serving/values.yaml
Normal file
@@ -0,0 +1,63 @@
|
||||
# Default values for clearml-serving.
|
||||
|
||||
clearml:
|
||||
apiAccessKey: "ClearML API Access Key"
|
||||
apiSecretKey: "ClearML API Secret Key"
|
||||
apiHost: http://clearml-server-apiserver:8008
|
||||
filesHost: http://clearml-server-fileserver:8081
|
||||
webHost: http://clearml-server-webserver:80
|
||||
servingTaskId: "ClearML Serving Task ID"
|
||||
|
||||
# -- Extra Python Packages to be installed in running pods
|
||||
extraPythonPackages: []
|
||||
# - numpy==1.22.4
|
||||
# - pandas==1.4.2
|
||||
|
||||
zookeeper:
|
||||
image: bitnami/zookeeper:3.7.0
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
|
||||
kafka:
|
||||
image: bitnami/kafka:3.1.0
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:v2.34.0
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:8.4.4-ubuntu
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.23.0
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
|
||||
clearml_serving_statistics:
|
||||
image: allegroai/clearml-serving-statistics
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
|
||||
clearml_serving_inference:
|
||||
image: allegroai/clearml-serving-inference
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
affinity: {}
|
||||
resources: {}
|
||||
@@ -7,6 +7,6 @@ dependencies:
|
||||
version: 10.3.4
|
||||
- name: elasticsearch
|
||||
repository: https://helm.elastic.co
|
||||
version: 7.10.1
|
||||
digest: sha256:aefd3992b2ab085161e4cca35c6f73dd33f8d19272a9405b5ee4e8c2a0e79bba
|
||||
generated: "2021-01-05T14:26:33.629164+01:00"
|
||||
version: 7.16.2
|
||||
digest: sha256:ac733cb02d50e8398c1d2832988333896f1c7b765c19a0f1eea5e9b24bdb8207
|
||||
generated: "2022-01-05T07:52:34.913745+01:00"
|
||||
|
||||
@@ -2,8 +2,8 @@ apiVersion: v2
|
||||
name: clearml
|
||||
description: MLOps platform
|
||||
type: application
|
||||
version: "3.2.2"
|
||||
appVersion: "1.1.1"
|
||||
version: "3.10.2"
|
||||
appVersion: "1.4.0"
|
||||
home: https://clear.ml
|
||||
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg
|
||||
sources:
|
||||
@@ -18,14 +18,14 @@ keywords:
|
||||
- mlops
|
||||
dependencies:
|
||||
- name: redis
|
||||
version: "~10.9.0"
|
||||
version: "10.9.0"
|
||||
repository: "https://charts.bitnami.com/bitnami"
|
||||
condition: redis.enabled
|
||||
- name: mongodb
|
||||
version: "~10.3.2"
|
||||
version: "10.3.4"
|
||||
repository: "https://charts.bitnami.com/bitnami"
|
||||
condition: mongodb.enabled
|
||||
- name: elasticsearch
|
||||
version: "~7.10.1"
|
||||
version: "7.16.2"
|
||||
repository: "https://helm.elastic.co"
|
||||
condition: elasticsearch.enabled
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# ClearML Ecosystem for Kubernetes
|
||||
|
||||
  
|
||||
  
|
||||
|
||||
MLOps platform
|
||||
|
||||
@@ -10,7 +10,7 @@ MLOps platform
|
||||
|
||||
| Name | Email | Url |
|
||||
| ---- | ------ | --- |
|
||||
| valeriano-manassero | | https://github.com/valeriano-manassero |
|
||||
| valeriano-manassero | | <https://github.com/valeriano-manassero> |
|
||||
|
||||
## Introduction
|
||||
|
||||
@@ -31,9 +31,7 @@ For development/evaluation it's possible to use [kind](https://kind.sigs.k8s.io)
|
||||
After installation, following commands will create a complete ClearML insatllation:
|
||||
|
||||
```
|
||||
mkdir -pm 777 /tmp/clearml-kind
|
||||
|
||||
cat <<EOF > /tmp/clearml-kind.yaml
|
||||
cat <<EOF | kind create cluster --config=- ─╯
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
@@ -62,8 +60,6 @@ nodes:
|
||||
containerPath: /var/local-path-provisioner
|
||||
EOF
|
||||
|
||||
kind create cluster --config /tmp/clearml-kind.yaml
|
||||
|
||||
helm install clearml allegroai/clearml
|
||||
```
|
||||
|
||||
@@ -89,6 +85,24 @@ This will create 3 ingress rules:
|
||||
|
||||
Just pointing the domain records to the IP where ingress controller is responding will complete the deployment process.
|
||||
|
||||
## Upgrades/ Values upgrades
|
||||
|
||||
Updating to latest version of this chart can be done in two steps:
|
||||
|
||||
```
|
||||
helm repo update
|
||||
helm upgrade clearml allegroai/clearml
|
||||
```
|
||||
|
||||
Changing values on existing installation can be done with:
|
||||
|
||||
```
|
||||
helm upgrade clearml allegroai/clearml --version <CURRENT CHART VERSION> -f custom_values.yaml
|
||||
```
|
||||
|
||||
Please note: updating values only should always be done setting explicit chart version to avoid a possible chart update.
|
||||
Keeping separate updates procedures between version and values can be a good practice to seprate potential concerns.
|
||||
|
||||
## Additional Configuration for ClearML Server
|
||||
|
||||
You can also configure the **clearml-server** for:
|
||||
@@ -107,9 +121,9 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
|
||||
| Repository | Name | Version |
|
||||
|------------|------|---------|
|
||||
| https://charts.bitnami.com/bitnami | mongodb | ~10.3.2 |
|
||||
| https://charts.bitnami.com/bitnami | redis | ~10.9.0 |
|
||||
| https://helm.elastic.co | elasticsearch | ~7.10.1 |
|
||||
| https://charts.bitnami.com/bitnami | mongodb | 10.3.4 |
|
||||
| https://charts.bitnami.com/bitnami | redis | 10.9.0 |
|
||||
| https://helm.elastic.co | elasticsearch | 7.16.2 |
|
||||
|
||||
## Values
|
||||
|
||||
@@ -127,7 +141,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-cpu.clearmlGitPassword | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.clearmlGitUser | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.clearmlSecretKey | string | `nil` | |
|
||||
| agentGroups.agent-group-cpu.enabled | bool | `true` | |
|
||||
| agentGroups.agent-group-cpu.enabled | bool | `false` | |
|
||||
| agentGroups.agent-group-cpu.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| agentGroups.agent-group-cpu.image.repository | string | `"ubuntu"` | |
|
||||
| agentGroups.agent-group-cpu.image.tag | string | `"18.04"` | |
|
||||
@@ -151,7 +165,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-gpu.clearmlGitPassword | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.clearmlGitUser | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.clearmlSecretKey | string | `nil` | |
|
||||
| agentGroups.agent-group-gpu.enabled | bool | `true` | |
|
||||
| agentGroups.agent-group-gpu.enabled | bool | `false` | |
|
||||
| agentGroups.agent-group-gpu.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| agentGroups.agent-group-gpu.image.repository | string | `"nvidia/cuda"` | |
|
||||
| agentGroups.agent-group-gpu.image.tag | string | `"11.0-base-ubuntu18.04"` | |
|
||||
@@ -163,6 +177,19 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentGroups.agent-group-gpu.replicaCount | int | `0` | |
|
||||
| agentGroups.agent-group-gpu.tolerations | list | `[]` | |
|
||||
| agentGroups.agent-group-gpu.updateStrategy | string | `"Recreate"` | |
|
||||
| agentk8sglue.defaultDockerImage | string | `"nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04"` | |
|
||||
| agentk8sglue.enabled | bool | `true` | |
|
||||
| agentk8sglue.id | string | `"k8s-agent"` | |
|
||||
| agentk8sglue.image.repository | string | `"allegroai/clearml-agent-k8s"` | |
|
||||
| agentk8sglue.image.tag | string | `"latest"` | |
|
||||
| agentk8sglue.maxPods | int | `10` | |
|
||||
| agentk8sglue.podTemplate.env | list | `[]` | |
|
||||
| agentk8sglue.podTemplate.nodeSelector | object | `{}` | |
|
||||
| agentk8sglue.podTemplate.resources | object | `{}` | |
|
||||
| agentk8sglue.podTemplate.tolerations | list | `[]` | |
|
||||
| agentk8sglue.podTemplate.volumes | list | `[]` | |
|
||||
| agentk8sglue.queue | string | `"default"` | |
|
||||
| agentk8sglue.serviceAccountName | string | `"default"` | |
|
||||
| agentservices.affinity | object | `{}` | |
|
||||
| agentservices.agentVersion | string | `""` | |
|
||||
| agentservices.awsAccessKeyId | string | `nil` | |
|
||||
@@ -186,17 +213,17 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| agentservices.podAnnotations | object | `{}` | |
|
||||
| agentservices.replicaCount | int | `1` | |
|
||||
| agentservices.resources | object | `{}` | |
|
||||
| agentservices.storage.data.class | string | `"standard"` | |
|
||||
| agentservices.storage.data.class | string | `""` | |
|
||||
| agentservices.storage.data.size | string | `"50Gi"` | |
|
||||
| agentservices.tolerations | list | `[]` | |
|
||||
| apiserver.additionalConfigs | object | `{}` | |
|
||||
| apiserver.additionalConfigs | object | `{}` | additional configurations that can be used by api server; check examples in values.yaml file |
|
||||
| apiserver.affinity | object | `{}` | |
|
||||
| apiserver.authCookiesMaxAge | int | `864000` | Amount of seconds the authorization cookie will last in user browser |
|
||||
| apiserver.configDir | string | `"/opt/clearml/config"` | |
|
||||
| apiserver.extraEnvs | list | `[]` | |
|
||||
| apiserver.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| apiserver.image.repository | string | `"allegroai/clearml"` | |
|
||||
| apiserver.image.tag | string | `"1.1.1"` | |
|
||||
| apiserver.image.tag | string | `"1.4.0"` | |
|
||||
| apiserver.livenessDelay | int | `60` | |
|
||||
| apiserver.nodeSelector | object | `{}` | |
|
||||
| apiserver.podAnnotations | object | `{}` | |
|
||||
@@ -256,7 +283,7 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| fileserver.extraEnvs | list | `[]` | |
|
||||
| fileserver.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| fileserver.image.repository | string | `"allegroai/clearml"` | |
|
||||
| fileserver.image.tag | string | `"1.1.1"` | |
|
||||
| fileserver.image.tag | string | `"1.4.0"` | |
|
||||
| fileserver.nodeSelector | object | `{}` | |
|
||||
| fileserver.podAnnotations | object | `{}` | |
|
||||
| fileserver.replicaCount | int | `1` | |
|
||||
@@ -264,16 +291,24 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| fileserver.service.nodePort | int | `30081` | If service.type set to NodePort, this will be set to service's nodePort field. If service.type is set to others, this field will be ignored |
|
||||
| fileserver.service.port | int | `8081` | |
|
||||
| fileserver.service.type | string | `"NodePort"` | This will set to service's spec.type field |
|
||||
| fileserver.storage.data.class | string | `"standard"` | |
|
||||
| fileserver.storage.data.class | string | `""` | |
|
||||
| fileserver.storage.data.size | string | `"50Gi"` | |
|
||||
| fileserver.tolerations | list | `[]` | |
|
||||
| ingress.annotations | object | `{}` | |
|
||||
| ingress.api.annotations | object | `{}` | |
|
||||
| ingress.api.enabled | bool | `false` | |
|
||||
| ingress.api.hostName | string | `"api.clearml.127-0-0-1.nip.io"` | |
|
||||
| ingress.api.path | string | `"/"` | |
|
||||
| ingress.api.tlsSecretName | string | `""` | |
|
||||
| ingress.app.annotations | object | `{}` | |
|
||||
| ingress.app.enabled | bool | `false` | |
|
||||
| ingress.app.hostName | string | `"app.clearml.127-0-0-1.nip.io"` | |
|
||||
| ingress.app.path | string | `"/"` | |
|
||||
| ingress.app.tlsSecretName | string | `""` | |
|
||||
| ingress.enabled | bool | `false` | |
|
||||
| ingress.files.annotations | object | `{}` | |
|
||||
| ingress.files.enabled | bool | `false` | |
|
||||
| ingress.files.hostName | string | `"files.clearml.127-0-0-1.nip.io"` | |
|
||||
| ingress.files.path | string | `"/"` | |
|
||||
| ingress.files.tlsSecretName | string | `""` | |
|
||||
| ingress.name | string | `"clearml-server-ingress"` | |
|
||||
| mongodb.architecture | string | `"standalone"` | |
|
||||
@@ -302,11 +337,12 @@ For detailed instructions, see the [Optional Configuration](https://github.com/a
|
||||
| secret.credentials.tests.accessKey | string | `"ENP39EQM4SLACGD5FXB7"` | Set for tests_user_key field |
|
||||
| secret.credentials.tests.secretKey | string | `"lPcm0imbcBZ8mwgO7tpadutiS3gnJD05x9j7afwXPS35IKbpiQ"` | Set for tests_user_secret field |
|
||||
| secret.httpSession | string | `"9Tw20RbhJ1bLBiHEOWXvhplKGUbTgLzAtwFN2oLQvWwS0uRpD5"` | Set for http_session field |
|
||||
| webserver.additionalConfigs | object | `{}` | |
|
||||
| webserver.affinity | object | `{}` | |
|
||||
| webserver.extraEnvs | list | `[]` | |
|
||||
| webserver.image.pullPolicy | string | `"IfNotPresent"` | |
|
||||
| webserver.image.repository | string | `"allegroai/clearml"` | |
|
||||
| webserver.image.tag | string | `"1.1.1"` | |
|
||||
| webserver.image.tag | string | `"1.4.0"` | |
|
||||
| webserver.nodeSelector | object | `{}` | |
|
||||
| webserver.podAnnotations | object | `{}` | |
|
||||
| webserver.replicaCount | int | `1` | |
|
||||
|
||||
@@ -28,9 +28,7 @@ For development/evaluation it's possible to use [kind](https://kind.sigs.k8s.io)
|
||||
After installation, following commands will create a complete ClearML insatllation:
|
||||
|
||||
```
|
||||
mkdir -pm 777 /tmp/clearml-kind
|
||||
|
||||
cat <<EOF > /tmp/clearml-kind.yaml
|
||||
cat <<EOF | kind create cluster --config=- ─╯
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
@@ -59,8 +57,6 @@ nodes:
|
||||
containerPath: /var/local-path-provisioner
|
||||
EOF
|
||||
|
||||
kind create cluster --config /tmp/clearml-kind.yaml
|
||||
|
||||
helm install clearml allegroai/clearml
|
||||
```
|
||||
|
||||
@@ -86,6 +82,24 @@ This will create 3 ingress rules:
|
||||
|
||||
Just pointing the domain records to the IP where ingress controller is responding will complete the deployment process.
|
||||
|
||||
## Upgrades/ Values upgrades
|
||||
|
||||
Updating to latest version of this chart can be done in two steps:
|
||||
|
||||
```
|
||||
helm repo update
|
||||
helm upgrade clearml allegroai/clearml
|
||||
```
|
||||
|
||||
Changing values on existing installation can be done with:
|
||||
|
||||
```
|
||||
helm upgrade clearml allegroai/clearml --version <CURRENT CHART VERSION> -f custom_values.yaml
|
||||
```
|
||||
|
||||
Please note: updating values only should always be done setting explicit chart version to avoid a possible chart update.
|
||||
Keeping separate updates procedures between version and values can be a good practice to seprate potential concerns.
|
||||
|
||||
## Additional Configuration for ClearML Server
|
||||
|
||||
You can also configure the **clearml-server** for:
|
||||
|
||||
Binary file not shown.
BIN
charts/clearml/charts/elasticsearch-7.16.2.tgz
Normal file
BIN
charts/clearml/charts/elasticsearch-7.16.2.tgz
Normal file
Binary file not shown.
7
charts/clearml/ci/README.md
Normal file
7
charts/clearml/ci/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
Place values files with different values in this directory to ensure these cases are tested by the CI as well.
|
||||
|
||||
https://github.com/helm/chart-testing/blob/main/doc/ct_install.md
|
||||
|
||||
```
|
||||
"Charts may have multiple custom values files matching the glob pattern '*-values.yaml' in a directory named 'ci' in the root of the chart's directory. The chart is installed and tested for each of these files. If no custom values file is present, the chart is installed and tested with defaults."
|
||||
```
|
||||
1
charts/clearml/ci/default-values.yaml
Normal file
1
charts/clearml/ci/default-values.yaml
Normal file
@@ -0,0 +1 @@
|
||||
# empty so default values.yaml gets tested
|
||||
2
charts/clearml/ci/k8sagent-values.yaml
Normal file
2
charts/clearml/ci/k8sagent-values.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
agentk8sglue:
|
||||
enabled: true
|
||||
@@ -140,3 +140,17 @@ Create the name of the Files service to use
|
||||
{{- printf "%s%s%s%s" "http://" (include "clearml.fullname" .) "-fileserver:" (.Values.fileserver.service.port | toString) }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Return the proper Docker Image Registry Secret Names
|
||||
*/}}
|
||||
{{- define "clearml.imagePullSecrets" -}}
|
||||
{{- if .Values.global }}
|
||||
{{- if .Values.global.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- range .Values.global.imagePullSecrets }}
|
||||
- name: {{ . }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
57
charts/clearml/templates/configmap-agentk8s-template.yaml
Normal file
57
charts/clearml/templates/configmap-agentk8s-template.yaml
Normal file
@@ -0,0 +1,57 @@
|
||||
{{- if .Values.agentk8sglue.enabled }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: k8sagent-pod-template
|
||||
data:
|
||||
template.yaml: |
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
serviceAccountName: {{ .Values.agentk8sglue.serviceAccountName }}
|
||||
volumes:
|
||||
{{- range .Values.agentk8sglue.podTemplate.volumes }}
|
||||
- name: {{ .name }}
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .name }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- resources:
|
||||
{{- toYaml .Values.agentk8sglue.podTemplate.resources | nindent 10 }}
|
||||
ports:
|
||||
- containerPort: 10022
|
||||
volumeMounts:
|
||||
{{- range .Values.agentk8sglue.podTemplate.volumes }}
|
||||
- mountPath: {{ .path }}
|
||||
name: {{ .name }}
|
||||
{{- end }}
|
||||
env:
|
||||
- name: CLEARML_API_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-webserver"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_secret
|
||||
{{- if .Values.agentk8sglue.podTemplate.env }}
|
||||
{{ toYaml .Values.agentk8sglue.podTemplate.env | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.podTemplate.nodeSelector}}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.agentk8sglue.podTemplate.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
13
charts/clearml/templates/configmap-webserver.yaml
Executable file
13
charts/clearml/templates/configmap-webserver.yaml
Executable file
@@ -0,0 +1,13 @@
|
||||
{{- if .Values.webserver.additionalConfigs -}}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: "{{ include "clearml.fullname" . }}-webserver-configmap"
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
data:
|
||||
{{- range $key, $val := .Values.webserver.additionalConfigs }}
|
||||
{{ $key }}: |
|
||||
{{- $val | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
64
charts/clearml/templates/deployment-agentk8s.yaml
Normal file
64
charts/clearml/templates/deployment-agentk8s.yaml
Normal file
@@ -0,0 +1,64 @@
|
||||
{{- if .Values.agentk8sglue.enabled }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: "{{ include "clearml.fullname" . }}-k8sagent"
|
||||
labels:
|
||||
app: k8sagent
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: k8sagent
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: k8sagent
|
||||
spec:
|
||||
containers:
|
||||
- name: k8s-glue-container
|
||||
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
|
||||
imagePullPolicy: Always
|
||||
command: ["/bin/bash", "-c", "export PATH=$PATH:$HOME/bin; source /root/.bashrc && /root/entrypoint.sh"]
|
||||
volumeMounts:
|
||||
- name: k8sagent-pod-template
|
||||
mountPath: /root/template
|
||||
env:
|
||||
- name: CLEARML_API_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
|
||||
- name: CLEARML_WEB_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-webserver"
|
||||
- name: CLEARML_FILES_HOST
|
||||
value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}"
|
||||
- name: K8S_GLUE_MAX_PODS
|
||||
value: "{{.Values.agentk8sglue.maxPods}}"
|
||||
- name: K8S_GLUE_QUEUE
|
||||
value: "{{.Values.agentk8sglue.queue}}"
|
||||
- name: K8S_GLUE_EXTRA_ARGS
|
||||
value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml"
|
||||
- name: K8S_DEFAULT_NAMESPACE
|
||||
value: "{{ .Release.Namespace }}"
|
||||
- name: CLEARML_API_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_key
|
||||
- name: CLEARML_API_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clearml-conf
|
||||
key: apiserver_secret
|
||||
- name: CLEARML_WORKER_ID
|
||||
value: "{{.Values.agentk8sglue.id}}"
|
||||
- name: CLEARML_AGENT_UPDATE_REPO
|
||||
value: ""
|
||||
- name: FORCE_CLEARML_AGENT_REPO
|
||||
value: ""
|
||||
- name: CLEARML_DOCKER_IMAGE
|
||||
value: "{{.Values.agentk8sglue.defaultDockerImage}}"
|
||||
volumes:
|
||||
- name: k8sagent-pod-template
|
||||
configMap:
|
||||
name: k8sagent-pod-template
|
||||
{{- end }}
|
||||
|
||||
@@ -19,6 +19,7 @@ spec:
|
||||
labels:
|
||||
{{- include "clearml.selectorLabelsApiServer" . | nindent 8 }}
|
||||
spec:
|
||||
{{- include "clearml.imagePullSecrets" . | indent 6 }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: "{{ .Values.apiserver.image.repository }}:{{ .Values.apiserver.image.tag | default .Chart.AppVersion }}"
|
||||
|
||||
@@ -22,6 +22,7 @@ spec:
|
||||
- name: fileserver-data
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ include "clearml.fullname" . }}-fileserver-data
|
||||
{{- include "clearml.imagePullSecrets" . | indent 6 }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: "{{ .Values.fileserver.image.repository }}:{{ .Values.fileserver.image.tag | default .Chart.AppVersion }}"
|
||||
|
||||
@@ -18,6 +18,7 @@ spec:
|
||||
labels:
|
||||
{{- include "clearml.selectorLabelsWebServer" . | nindent 8 }}
|
||||
spec:
|
||||
{{- include "clearml.imagePullSecrets" . | indent 6 }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: "{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Chart.AppVersion }}"
|
||||
@@ -38,6 +39,11 @@ spec:
|
||||
- curl
|
||||
- -X OPTIONS
|
||||
- http://0.0.0.0:80/
|
||||
{{- if .Values.webserver.additionalConfigs }}
|
||||
volumeMounts:
|
||||
- name: webserver-config
|
||||
mountPath: /opt/clearml/config
|
||||
{{- end }}
|
||||
env:
|
||||
- name: NGINX_APISERVER_ADDRESS
|
||||
value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}"
|
||||
@@ -50,6 +56,12 @@ spec:
|
||||
- webserver
|
||||
resources:
|
||||
{{- toYaml .Values.webserver.resources | nindent 12 }}
|
||||
{{- if .Values.webserver.additionalConfigs }}
|
||||
volumes:
|
||||
- name: webserver-config
|
||||
configMap:
|
||||
name: "{{ include "clearml.fullname" . }}-webserver-configmap"
|
||||
{{- end }}
|
||||
{{- with .Values.webserver.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
@@ -61,4 +73,4 @@ spec:
|
||||
{{- with .Values.webserver.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -1,4 +1,4 @@
|
||||
{{- if .Values.ingress.enabled -}}
|
||||
{{- if .Values.ingress.api.enabled -}}
|
||||
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
@@ -11,10 +11,13 @@ metadata:
|
||||
name: {{ include "clearml.fullname" . }}-api
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- $annotations := .Values.ingress.annotations }}
|
||||
{{- if .Values.ingress.api.annotations }}
|
||||
{{- $annotations = mergeOverwrite $annotations .Values.ingress.api.annotations }}
|
||||
{{- end }}
|
||||
annotations:
|
||||
{{- toYaml $annotations | nindent 4 }}
|
||||
|
||||
spec:
|
||||
{{- if .Values.ingress.api.tlsSecretName }}
|
||||
tls:
|
||||
@@ -26,7 +29,7 @@ spec:
|
||||
- host: {{ .Values.ingress.api.hostName }}
|
||||
http:
|
||||
paths:
|
||||
- path: "/"
|
||||
- path: {{ .Values.ingress.api.path }}
|
||||
{{ if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
|
||||
pathType: Prefix
|
||||
backend:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
{{- if .Values.ingress.enabled -}}
|
||||
{{- if .Values.ingress.app.enabled -}}
|
||||
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
@@ -11,10 +11,12 @@ metadata:
|
||||
name: {{ include "clearml.fullname" . }}-app
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- $annotations := .Values.ingress.annotations }}
|
||||
{{- if .Values.ingress.app.annotations }}
|
||||
{{- $annotations = mergeOverwrite $annotations .Values.ingress.app.annotations }}
|
||||
{{- end }}
|
||||
annotations:
|
||||
{{- toYaml $annotations | nindent 4 }}
|
||||
spec:
|
||||
{{- if .Values.ingress.app.tlsSecretName }}
|
||||
tls:
|
||||
@@ -26,7 +28,7 @@ spec:
|
||||
- host: {{ .Values.ingress.app.hostName }}
|
||||
http:
|
||||
paths:
|
||||
- path: "/"
|
||||
- path: {{ .Values.ingress.app.path }}
|
||||
{{ if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
|
||||
pathType: Prefix
|
||||
backend:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
{{- if .Values.ingress.enabled -}}
|
||||
{{- if .Values.ingress.files.enabled -}}
|
||||
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
@@ -11,10 +11,12 @@ metadata:
|
||||
name: {{ include "clearml.fullname" . }}-files
|
||||
labels:
|
||||
{{- include "clearml.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- $annotations := .Values.ingress.annotations }}
|
||||
{{- if .Values.ingress.files.annotations }}
|
||||
{{- $annotations = mergeOverwrite $annotations .Values.ingress.files.annotations }}
|
||||
{{- end }}
|
||||
annotations:
|
||||
{{- toYaml $annotations | nindent 4 }}
|
||||
spec:
|
||||
{{- if .Values.ingress.files.tlsSecretName }}
|
||||
tls:
|
||||
@@ -26,7 +28,7 @@ spec:
|
||||
- host: {{ .Values.ingress.files.hostName }}
|
||||
http:
|
||||
paths:
|
||||
- path: "/"
|
||||
- path: {{ .Values.ingress.files.path }}
|
||||
{{ if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }}
|
||||
pathType: Prefix
|
||||
backend:
|
||||
|
||||
@@ -11,5 +11,7 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.agentservices.storage.data.size | quote }}
|
||||
{{- if .Values.agentservices.storage.data.class -}}
|
||||
storageClassName: {{ .Values.agentservices.storage.data.class | quote }}
|
||||
{{- end -}}
|
||||
{{- end }}
|
||||
|
||||
@@ -10,4 +10,7 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.fileserver.storage.data.size | quote }}
|
||||
storageClassName: {{ .Values.fileserver.storage.data.class | quote }}
|
||||
{{- if .Values.fileserver.storage.data.class -}}
|
||||
storageClassName: {{ .Values.fileserver.storage.data.class | quote }}
|
||||
{{- end -}}
|
||||
|
||||
25
charts/clearml/templates/rbac-agentk8s.yaml
Normal file
25
charts/clearml/templates/rbac-agentk8s.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
{{- if .Values.agentk8sglue.enabled }}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: k8sagent-pods-access
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs: ["get", "list", "watch", "create", "patch", "delete"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: k8sagent-pods-access
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: default
|
||||
namespace: {{ .Release.Namespace }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: k8sagent-pods-access
|
||||
{{- end }}
|
||||
88
charts/clearml/values.yaml
Normal file → Executable file
88
charts/clearml/values.yaml
Normal file → Executable file
@@ -1,18 +1,29 @@
|
||||
# global:
|
||||
# imagePullSecrets:
|
||||
# - docker-cfg
|
||||
clearml:
|
||||
defaultCompany: "d1bd92a3b039400cbafc60a7a5b1e52b"
|
||||
ingress:
|
||||
enabled: false
|
||||
name: clearml-server-ingress
|
||||
annotations: {}
|
||||
app:
|
||||
enabled: false
|
||||
hostName: "app.clearml.127-0-0-1.nip.io"
|
||||
tlsSecretName: ""
|
||||
annotations: {}
|
||||
path: "/"
|
||||
api:
|
||||
enabled: false
|
||||
hostName: "api.clearml.127-0-0-1.nip.io"
|
||||
tlsSecretName: ""
|
||||
annotations: {}
|
||||
path: "/"
|
||||
files:
|
||||
enabled: false
|
||||
hostName: "files.clearml.127-0-0-1.nip.io"
|
||||
tlsSecretName: ""
|
||||
annotations: {}
|
||||
path: "/"
|
||||
|
||||
secret:
|
||||
# -- Set for http_session field
|
||||
@@ -56,7 +67,7 @@ apiserver:
|
||||
image:
|
||||
repository: "allegroai/clearml"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "1.1.1"
|
||||
tag: "1.4.0"
|
||||
|
||||
extraEnvs: []
|
||||
|
||||
@@ -80,6 +91,7 @@ apiserver:
|
||||
|
||||
affinity: {}
|
||||
|
||||
# -- additional configurations that can be used by api server; check examples in values.yaml file
|
||||
additionalConfigs: {}
|
||||
# services.conf: |
|
||||
# tasks {
|
||||
@@ -90,6 +102,25 @@ apiserver:
|
||||
# watch_interval_sec: 900
|
||||
# }
|
||||
# }
|
||||
# apiserver.conf: |
|
||||
# auth {
|
||||
# fixed_users {
|
||||
# enabled: true
|
||||
# pass_hashed: false
|
||||
# users: [
|
||||
# {
|
||||
# username: "jane"
|
||||
# password: "12345678"
|
||||
# name: "Jane Doe"
|
||||
# },
|
||||
# {
|
||||
# username: "john"
|
||||
# password: "12345678"
|
||||
# name: "John Doe"
|
||||
# },
|
||||
# ]
|
||||
# }
|
||||
# }
|
||||
|
||||
fileserver:
|
||||
service:
|
||||
@@ -105,7 +136,7 @@ fileserver:
|
||||
image:
|
||||
repository: "allegroai/clearml"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "1.1.1"
|
||||
tag: "1.4.0"
|
||||
|
||||
extraEnvs: []
|
||||
|
||||
@@ -131,7 +162,7 @@ fileserver:
|
||||
|
||||
storage:
|
||||
data:
|
||||
class: "standard"
|
||||
class: ""
|
||||
size: 50Gi
|
||||
|
||||
webserver:
|
||||
@@ -150,7 +181,7 @@ webserver:
|
||||
image:
|
||||
repository: "allegroai/clearml"
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "1.1.1"
|
||||
tag: "1.4.0"
|
||||
|
||||
podAnnotations: {}
|
||||
|
||||
@@ -172,6 +203,8 @@ webserver:
|
||||
|
||||
affinity: {}
|
||||
|
||||
additionalConfigs: {}
|
||||
|
||||
agentservices:
|
||||
enabled: false
|
||||
clearmlHostIp: null
|
||||
@@ -219,12 +252,12 @@ agentservices:
|
||||
|
||||
storage:
|
||||
data:
|
||||
class: "standard"
|
||||
class: ""
|
||||
size: 50Gi
|
||||
|
||||
agentGroups:
|
||||
agent-group-cpu:
|
||||
enabled: true
|
||||
enabled: false
|
||||
name: agent-group-cpu
|
||||
replicaCount: 1
|
||||
updateStrategy: Recreate
|
||||
@@ -258,7 +291,7 @@ agentGroups:
|
||||
affinity: {}
|
||||
|
||||
agent-group-gpu:
|
||||
enabled: true
|
||||
enabled: false
|
||||
name: agent-group-gpu
|
||||
replicaCount: 0
|
||||
updateStrategy: Recreate
|
||||
@@ -291,6 +324,43 @@ agentGroups:
|
||||
|
||||
affinity: {}
|
||||
|
||||
# This agent will spawn queued experiments in new pods, a good use case is to combine this with
|
||||
# GPU autoscaling nodes.
|
||||
# https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue
|
||||
agentk8sglue:
|
||||
enabled: true
|
||||
image:
|
||||
repository: "allegroai/clearml-agent-k8s"
|
||||
tag: "latest"
|
||||
serviceAccountName: default
|
||||
maxPods: 10
|
||||
defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04 # default docker image that is spawned as new pod
|
||||
queue: default
|
||||
id: k8s-agent
|
||||
podTemplate:
|
||||
volumes: []
|
||||
# - name: "yourvolume"
|
||||
# path: "/yourpath"
|
||||
env: []
|
||||
# # to setup access to private repo, setup secret with git credentials:
|
||||
# - name: CLEARML_AGENT_GIT_USER
|
||||
# value: mygitusername
|
||||
# - name: CLEARML_AGENT_GIT_PASS
|
||||
# valueFrom:
|
||||
# secretKeyRef:
|
||||
# name: git-password
|
||||
# key: git-password
|
||||
resources: {}
|
||||
# limits:
|
||||
# nvidia.com/gpu: 1
|
||||
tolerations: []
|
||||
# - key: "nvidia.com/gpu"
|
||||
# operator: Exists
|
||||
# effect: "NoSchedule"
|
||||
nodeSelector: {}
|
||||
# fleet: gpu-nodes
|
||||
|
||||
|
||||
externalServices:
|
||||
# -- Existing ElasticSearch Hostname to use if elasticsearch.enabled is false
|
||||
elasticsearchHost: ""
|
||||
@@ -337,7 +407,7 @@ mongodb: # configuration from https://github.com/bitnami/charts/blob/master/bit
|
||||
port: 27017
|
||||
portName: mongo-service
|
||||
|
||||
elasticsearch: # configuration from https://github.com/elastic/helm-charts/blob/7.10/elasticsearch/values.yaml
|
||||
elasticsearch: # configuration from https://github.com/elastic/helm-charts/blob/7.16/elasticsearch/values.yaml
|
||||
enabled: true
|
||||
httpPort: 9200
|
||||
roles:
|
||||
|
||||
Reference in New Issue
Block a user