Compare commits

..

3 Commits

Author SHA1 Message Date
Valeriano Manassero
1b164c2906 Add configurable default base serve url (#83)
* Added: configurable default base serving url

* Changed: chart version bump
2022-06-23 10:46:18 +02:00
Valeriano Manassero
43806b8e21 Add insecure cert check flag (#85)
* Added: clearmlcheckCertificate flag

* Changed: bump chart
2022-06-23 10:43:39 +02:00
Valeriano Manassero
80072c0654 Add editable config for k8s Agent (#84)
* Added: editable configuration

* Changed: bump up version
2022-06-23 09:52:19 +02:00
9 changed files with 38 additions and 12 deletions

View File

@@ -2,8 +2,8 @@ apiVersion: v2
name: clearml-agent
description: MLOps platform
type: application
version: "1.0.2"
appVersion: "1.21"
version: "1.1.1"
appVersion: "1.24"
kubeVersion: ">= 1.19.0-0 < 1.25.0-0"
home: https://clear.ml
icon: https://raw.githubusercontent.com/allegroai/clearml/master/docs/clearml-logo.svg

View File

@@ -1,6 +1,6 @@
# clearml-agent
![Version: 1.0.2](https://img.shields.io/badge/Version-1.0.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.21](https://img.shields.io/badge/AppVersion-1.21-informational?style=flat-square)
![Version: 1.1.1](https://img.shields.io/badge/Version-1.1.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square)
MLOps platform
@@ -25,8 +25,9 @@ Kubernetes: `>= 1.19.0-0 < 1.25.0-0`
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| agentk8sglue | object | `{"apiServerUrlReference":"https://api.clear.ml","defaultContainerImage":"ubuntu:18.04","fileServerUrlReference":"https://files.clear.ml","id":"k8s-agent","image":{"repository":"allegroai/clearml-agent-k8s","tag":"base-1.21"},"maxPods":10,"podTemplate":{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumes":[]},"queue":"default","replicaCount":1,"serviceAccountName":"default","webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
| agentk8sglue | object | `{"apiServerUrlReference":"https://api.clear.ml","clearmlcheckCertificate":true,"defaultContainerImage":"ubuntu:18.04","fileServerUrlReference":"https://files.clear.ml","id":"k8s-agent","image":{"repository":"allegroai/clearml-agent-k8s","tag":"base-1.21"},"maxPods":10,"podTemplate":{"env":[],"nodeSelector":{},"resources":{},"tolerations":[],"volumes":[]},"queue":"default","replicaCount":1,"serviceAccountName":"default","webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
| agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url |
| agentk8sglue.clearmlcheckCertificate | bool | `true` | Check certificates validity for evefry UrlReference below. |
| agentk8sglue.defaultContainerImage | string | `"ubuntu:18.04"` | default container image for ClearML Task pod |
| agentk8sglue.fileServerUrlReference | string | `"https://files.clear.ml"` | Reference to File server url |
| agentk8sglue.id | string | `"k8s-agent"` | ClearML worker ID (must be unique across the entire ClearMLenvironment) |
@@ -42,9 +43,10 @@ Kubernetes: `>= 1.19.0-0 < 1.25.0-0`
| agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods |
| agentk8sglue.serviceAccountName | string | `"default"` | serviceAccountName for pods spawned to consume ClearML Task |
| agentk8sglue.webServerUrlReference | string | `"https://app.clear.ml"` | Reference to Web server url |
| clearml | object | `{"agentk8sglueKey":"ACCESSKEY","agentk8sglueSecret":"SECRETKEY"}` | ClearMl generic configurations |
| clearml | object | `{"agentk8sglueKey":"ACCESSKEY","agentk8sglueSecret":"SECRETKEY","clearmlConfig":"sdk {\n}"}` | ClearMl generic configurations |
| clearml.agentk8sglueKey | string | `"ACCESSKEY"` | Agent k8s Glue basic auth key |
| clearml.agentk8sglueSecret | string | `"SECRETKEY"` | Agent k8s Glue basic auth secret |
| clearml.clearmlConfig | string | `"sdk {\n}"` | ClearML configuration file |
| imageCredentials | object | `{"email":"someone@host.com","enabled":false,"existingSecret":"","password":"pwd","registry":"docker.io","username":"someone"}` | Private image registry configuration |
| imageCredentials.email | string | `"someone@host.com"` | Email |
| imageCredentials.enabled | bool | `false` | Use private authentication mode |

View File

@@ -32,26 +32,32 @@ spec:
- -c
- >
set -x;
while [ $(curl -sw '%{http_code}' "{{.Values.agentk8sglue.apiServerUrlReference}}/debug.ping" -o /dev/null) -ne 200 ] ; do
while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.apiServerUrlReference}}/debug.ping" -o /dev/null) -ne 200 ] ; do
echo "waiting for apiserver" ;
sleep 5 ;
done;
while [ $(curl -sw '%{http_code}' "{{.Values.agentk8sglue.fileServerUrlReference}}/" -o /dev/null) =~ 403|405 ]] ; do
while [[ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.fileServerUrlReference}}/" -o /dev/null) =~ 403|405 ]] ; do
echo "waiting for fileserver" ;
sleep 5 ;
done;
while [ $(curl -sw '%{http_code}' "{{.Values.agentk8sglue.webServerUrlReference}}/" -o /dev/null) -ne 200 ] ; do
while [ $(curl {{ if not .Values.agentk8sglue.clearmlcheckCertificate }}--insecure{{ end }} -sw '%{http_code}' "{{.Values.agentk8sglue.webServerUrlReference}}/" -o /dev/null) -ne 200 ] ; do
echo "waiting for webserver" ;
sleep 5 ;
done
containers:
- name: k8s-glue
image: "{{ .Values.agentk8sglue.image.repository }}:{{ .Values.agentk8sglue.image.tag }}"
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
command: ["/bin/bash", "-c", "export PATH=$PATH:$HOME/bin; source /root/.bashrc && /root/entrypoint.sh"]
volumeMounts:
- name: k8sagent-pod-template
mountPath: /root/template
{{ if .Values.clearml.clearmlConfig }}
- name: k8sagent-clearml-conf-volume
mountPath: /root/clearml.conf
subPath: clearml.conf
readOnly: true
{{- end }}
env:
- name: CLEARML_API_HOST
value: "{{.Values.agentk8sglue.apiServerUrlReference}}"
@@ -89,3 +95,11 @@ spec:
- name: k8sagent-pod-template
configMap:
name: k8sagent-pod-template
{{ if .Values.clearml.clearmlConfig }}
- name: k8sagent-clearml-conf-volume
secret:
secretName: clearml-agent-conf
items:
- key: clearml.conf
path: clearml.conf
{{ end }}

View File

@@ -5,6 +5,7 @@ metadata:
data:
agentk8sglue_key: {{ .Values.clearml.agentk8sglueKey | b64enc }}
agentk8sglue_secret: {{ .Values.clearml.agentk8sglueSecret | b64enc }}
clearml.conf: {{ .Values.clearml.clearmlConfig | b64enc }}
---
{{- if .Values.imageCredentials.enabled }}
{{- if not .Values.imageCredentials.existingSecret }}

View File

@@ -19,6 +19,10 @@ clearml:
agentk8sglueKey: "ACCESSKEY"
# -- Agent k8s Glue basic auth secret
agentk8sglueSecret: "SECRETKEY"
# -- ClearML configuration file
clearmlConfig: |-
sdk {
}
# -- This agent will spawn queued experiments in new pods, a good use case is to combine this with
# GPU autoscaling nodes.
@@ -32,6 +36,9 @@ agentk8sglue:
# -- Glue Agent number of pods
replicaCount: 1
# -- Check certificates validity for evefry UrlReference below.
clearmlcheckCertificate: true
# -- Reference to Api server url
apiServerUrlReference: "https://api.clear.ml"
# -- Reference to File server url

View File

@@ -2,7 +2,7 @@ apiVersion: v2
name: clearml-serving
description: ClearML Serving Helm Chart
type: application
version: 0.3.0
version: 0.4.0
appVersion: "0.9.0"
maintainers:
- name: valeriano-manassero

View File

@@ -1,6 +1,6 @@
# clearml-serving
![Version: 0.3.0](https://img.shields.io/badge/Version-0.3.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.9.0](https://img.shields.io/badge/AppVersion-0.9.0-informational?style=flat-square)
![Version: 0.4.0](https://img.shields.io/badge/Version-0.4.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.9.0](https://img.shields.io/badge/AppVersion-0.9.0-informational?style=flat-square)
ClearML Serving Helm Chart
@@ -23,6 +23,7 @@ ClearML Serving Helm Chart
| clearml.apiAccessKey | string | `"ClearML API Access Key"` | |
| clearml.apiHost | string | `"http://clearml-server-apiserver:8008"` | |
| clearml.apiSecretKey | string | `"ClearML API Secret Key"` | |
| clearml.defaultBaseServeUrl | string | `"http://127.0.0.1:8080/serve"` | |
| clearml.filesHost | string | `"http://clearml-server-fileserver:8081"` | |
| clearml.servingTaskId | string | `"ClearML Serving Task ID"` | |
| clearml.webHost | string | `"http://clearml-server-webserver:80"` | |

View File

@@ -35,7 +35,7 @@ spec:
- name: CLEARML_SERVING_POLL_FREQ
value: "1.0"
- name: CLEARML_DEFAULT_BASE_SERVE_URL
value: http://127.0.0.1:8080/serve
value: "{{ .Values.clearml.defaultBaseServeUrl }}"
- name: CLEARML_DEFAULT_TRITON_GRPC_ADDR
{{- if .Values.clearml_serving_triton.enabled }}
value: "clearml-serving-triton:8001"

View File

@@ -6,6 +6,7 @@ clearml:
apiHost: http://clearml-server-apiserver:8008
filesHost: http://clearml-server-fileserver:8081
webHost: http://clearml-server-webserver:80
defaultBaseServeUrl: http://127.0.0.1:8080/serve
servingTaskId: "ClearML Serving Task ID"
zookeeper: