Compare commits

...

4 Commits

Author SHA1 Message Date
Valeriano Manassero
a9d57db3a8 Force agent upgrade apps (#181)
* Fixed: force agent update

* Changed: bump up version
2023-03-09 11:28:12 +01:00
Valeriano Manassero
08b92ba622 Fix apps baseimage (#180)
* Fixed: apps base image

* Changed: bump up version
2023-03-09 08:19:30 +01:00
Valeriano Manassero
5b77cf41c2 Add external clusterrolebinding and rolebinding support (#179)
* Added: external rb and crb support

* Changed: bump up version
2023-03-07 13:09:30 +01:00
Valeriano Manassero
a6db8b4262 Fix init container waits forever pinging a mongodb in production config (#178)
* Fixed: hostname healthcheck for mongodb

* Changed: bump up version
2023-03-07 08:19:22 +01:00
9 changed files with 61 additions and 10 deletions

View File

@@ -2,7 +2,7 @@ apiVersion: v2
name: clearml-agent
description: MLOps platform Task running agent
type: application
version: "3.6.0"
version: "3.7.0"
appVersion: "1.24"
kubeVersion: ">= 1.21.0-0 < 1.27.0-0"
home: https://clear.ml
@@ -21,4 +21,4 @@ keywords:
annotations:
artifacthub.io/changes: |
- kind: added
description: support for priorityClass in task pod template
description: support for existing rolebindings and clusterrolebindings

View File

@@ -1,6 +1,6 @@
# ClearML Kubernetes Agent
![Version: 3.6.0](https://img.shields.io/badge/Version-3.6.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square)
![Version: 3.7.0](https://img.shields.io/badge/Version-3.7.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square)
MLOps platform Task running agent
@@ -30,7 +30,9 @@ Kubernetes: `>= 1.21.0-0 < 1.27.0-0`
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| agentk8sglue | object | `{"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"priorityClassName":"","resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"securityContext":{},"serviceExistingAccountName":"","taskAsJob":false,"tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
| agentk8sglue | object | `{"additionalClusterRoleBindings":[],"additionalRoleBindings":[],"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"priorityClassName":"","resources":{},"schedulerName":"","securityContext":{},"tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"queue":"default","replicaCount":1,"securityContext":{},"serviceExistingAccountName":"","taskAsJob":false,"tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue |
| agentk8sglue.additionalClusterRoleBindings | list | `[]` | additional existing ClusterRoleBindings |
| agentk8sglue.additionalRoleBindings | list | `[]` | additional existing RoleBindings |
| agentk8sglue.affinity | object | `{}` | affinity setup for Agent pod (example in values.yaml comments) |
| agentk8sglue.annotations | object | `{}` | annotations setup for Agent pod (example in values.yaml comments) |
| agentk8sglue.apiServerUrlReference | string | `"https://api.clear.ml"` | Reference to Api server url |

View File

@@ -86,3 +86,33 @@ roleRef:
kind: Role
name: {{ include "clearmlAgent.name" . }}-kpa
{{- end }}
{{- range .Values.agentk8sglue.additionalClusterRoleBindings }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "clearmlAgent.name" $ }}-kpa-{{ . }}
subjects:
- kind: ServiceAccount
name: {{ include "clearmlAgent.serviceAccountName" $ }}
namespace: {{ $.Release.Namespace }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ . }}
{{- end }}
{{- range .Values.agentk8sglue.additionalRoleBindings }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "clearmlAgent.name" $ }}-kpa-{{ . }}
subjects:
- kind: ServiceAccount
name: {{ include "clearmlAgent.serviceAccountName" $ }}
namespace: {{ $.Release.Namespace }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ . }}
{{- end }}

View File

@@ -81,6 +81,12 @@ agentk8sglue:
securityContext: {}
# runAsUser: 1001
# fsGroup: 1001
# -- additional existing ClusterRoleBindings
additionalClusterRoleBindings: []
# - privileged
# -- additional existing RoleBindings
additionalRoleBindings: []
# - privileged
# -- nodeSelector setup for Agent pod (example in values.yaml comments)
nodeSelector: {}
# fleet: agent-nodes

View File

@@ -2,7 +2,7 @@ apiVersion: v2
name: clearml
description: MLOps platform
type: application
version: "5.8.0"
version: "5.8.3"
appVersion: "1.9.2"
kubeVersion: ">= 1.21.0-0 < 1.27.0-0"
home: https://clear.ml
@@ -32,5 +32,5 @@ dependencies:
condition: elasticsearch.enabled
annotations:
artifacthub.io/changes: |
- kind: added
description: filemount support for apps agent
- kind: fixed
description: force app agent update

View File

@@ -1,6 +1,6 @@
# ClearML Ecosystem for Kubernetes
![Version: 5.8.0](https://img.shields.io/badge/Version-5.8.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.9.2](https://img.shields.io/badge/AppVersion-1.9.2-informational?style=flat-square)
![Version: 5.8.3](https://img.shields.io/badge/Version-5.8.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.9.2](https://img.shields.io/badge/AppVersion-1.9.2-informational?style=flat-square)
MLOps platform

View File

@@ -177,6 +177,17 @@ MongoDB Comnnection string
{{- end }}
{{- end }}
{{/*
MongoDB hotname
*/}}
{{- define "mongodb.hostname" -}}
{{- if eq .Values.mongodb.architecture "standalone" }}
{{- printf "%s" "mongodb" }}
{{- else }}
{{- printf "%s" "mongodb-headless" }}
{{- end }}
{{- end }}
{{/*
Redis Service name
*/}}

View File

@@ -61,7 +61,7 @@ spec:
done ;
{{- end }}
{{- if .Values.mongodb.enabled }}
while [ $(curl --telnet-option BOGUS --connect-timeout 2 -s "telnet://{{ .Release.Name }}-mongodb:27017" -o /dev/null; echo $?) -ne 49 ] ; do
while [ $(curl --telnet-option BOGUS --connect-timeout 2 -s "telnet://{{ .Release.Name }}-{{ include "mongodb.hostname" . }}:27017" -o /dev/null; echo $?) -ne 49 ] ; do
echo "waiting for mongodb" ;
sleep 5 ;
done ;

View File

@@ -70,7 +70,7 @@ spec:
value: "http://{{ include "fileserver.referenceName" . }}:{{ .Values.fileserver.service.port }}"
- name: CLEARML_WEB_HOST
value: "http://{{ include "webserver.referenceName" . }}:{{ .Values.webserver.service.port }}"
- name: CLEARML_AGENT_DEFAULT_BASE_DOCKER
- name: CLEARML_DOCKER_IMAGE
value: "{{ .Values.enterpriseFeatures.clearmlApplications.basePodImage.repository }}:{{ .Values.enterpriseFeatures.clearmlApplications.basePodImage.tag }}"
- name: CLEARML_WORKER_ID
value: "apps-agent-1"
@@ -83,6 +83,8 @@ spec:
value: "apps_queue"
- name: CLEARML_AGENT_DISABLE_SSH_MOUNT
value: "1"
- name: K8S_GLUE_POD_AGENT_INSTALL_ARGS
value: " -U"
- name: CLEARML_API_ACCESS_KEY
valueFrom:
secretKeyRef: