From 5b31ea8599ecd6235f2ee28b298bb180bb70adba Mon Sep 17 00:00:00 2001 From: Valeriano Manassero <14011549+valeriano-manassero@users.noreply.github.com> Date: Mon, 8 May 2023 17:25:01 +0200 Subject: [PATCH] Remove unsupported dynamic svc (#206) * Removed: unsupported values * Changed: version bump * Changed: removed not needed value * Changed: helm-docs * Removed: unsupported values --- charts/clearml-agent/Chart.yaml | 2 +- charts/clearml-agent/README.md | 12 ++----- .../templates/agentk8sglue-deployment.yaml | 35 ------------------- .../templates/service-sessions.yaml | 2 -- charts/clearml-agent/values.yaml | 14 -------- 5 files changed, 4 insertions(+), 61 deletions(-) diff --git a/charts/clearml-agent/Chart.yaml b/charts/clearml-agent/Chart.yaml index 9fa34e9..d7647ec 100644 --- a/charts/clearml-agent/Chart.yaml +++ b/charts/clearml-agent/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: clearml-agent description: MLOps platform Task running agent type: application -version: "5.0.0" +version: "5.0.1" appVersion: "1.24" kubeVersion: ">= 1.21.0-0 < 1.28.0-0" home: https://clear.ml diff --git a/charts/clearml-agent/README.md b/charts/clearml-agent/README.md index ff9d0d3..086c78a 100644 --- a/charts/clearml-agent/README.md +++ b/charts/clearml-agent/README.md @@ -1,6 +1,6 @@ # ClearML Kubernetes Agent -![Version: 5.0.0](https://img.shields.io/badge/Version-5.0.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square) +![Version: 5.0.1](https://img.shields.io/badge/Version-5.0.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.24](https://img.shields.io/badge/AppVersion-1.24-informational?style=flat-square) MLOps platform Task running agent @@ -53,7 +53,7 @@ Kubernetes: `>= 1.21.0-0 < 1.28.0-0` | Key | Type | Default | Description | |-----|------|---------|-------------| -| agentk8sglue | object | `{"additionalClusterRoleBindings":[],"additionalRoleBindings":[],"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"containerSecurityContext":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"podSecurityContext":{},"priorityClassName":"","resources":{},"schedulerName":"","tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerCustomBashScript":"","containerSecurityContext":{},"customBashScript":"","debugMode":false,"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"registry":"","repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"podSecurityContext":{},"queue":"default","replicaCount":1,"serviceExistingAccountName":"","taskAsJob":false,"tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue | +| agentk8sglue | object | `{"additionalClusterRoleBindings":[],"additionalRoleBindings":[],"affinity":{},"annotations":{},"apiServerUrlReference":"https://api.clear.ml","basePodTemplate":{"affinity":{},"annotations":{},"containerSecurityContext":{},"env":[],"fileMounts":[],"hostAliases":[],"initContainers":[],"labels":{},"nodeSelector":{},"podSecurityContext":{},"priorityClassName":"","resources":{},"schedulerName":"","tolerations":[],"volumeMounts":[],"volumes":[]},"clearmlcheckCertificate":true,"containerSecurityContext":{},"defaultContainerImage":"ubuntu:18.04","extraEnvs":[],"fileMounts":[],"fileServerUrlReference":"https://files.clear.ml","image":{"registry":"","repository":"allegroai/clearml-agent-k8s-base","tag":"1.24-21"},"labels":{},"nodeSelector":{},"podSecurityContext":{},"queue":"default","replicaCount":1,"serviceExistingAccountName":"","tolerations":[],"volumeMounts":[],"volumes":[],"webServerUrlReference":"https://app.clear.ml"}` | This agent will spawn queued experiments in new pods, a good use case is to combine this with GPU autoscaling nodes. https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue | | agentk8sglue.additionalClusterRoleBindings | list | `[]` | additional existing ClusterRoleBindings | | agentk8sglue.additionalRoleBindings | list | `[]` | additional existing RoleBindings | | agentk8sglue.affinity | object | `{}` | affinity setup for Agent pod (example in values.yaml comments) | @@ -77,10 +77,7 @@ Kubernetes: `>= 1.21.0-0 < 1.28.0-0` | agentk8sglue.basePodTemplate.volumeMounts | list | `[]` | volume mounts definition for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.basePodTemplate.volumes | list | `[]` | volumes definition for pods spawned to consume ClearML Task (example in values.yaml comments) | | agentk8sglue.clearmlcheckCertificate | bool | `true` | Check certificates validity for evefry UrlReference below. | -| agentk8sglue.containerCustomBashScript | string | `""` | Custom Bash script for the Task Pods ran by Glue Agent | | agentk8sglue.containerSecurityContext | object | `{}` | container securityContext setup for Agent pod (example in values.yaml comments) | -| agentk8sglue.customBashScript | string | `""` | Custom Bash script for the Agent pod ran by Glue Agent | -| agentk8sglue.debugMode | bool | `false` | Enable Debugging logs for Agent pod | | agentk8sglue.defaultContainerImage | string | `"ubuntu:18.04"` | default container image for ClearML Task pod | | agentk8sglue.extraEnvs | list | `[]` | Extra Environment variables for Glue Agent | | agentk8sglue.fileMounts | list | `[]` | file definition for Glue Agent (example in values.yaml comments) | @@ -92,7 +89,6 @@ Kubernetes: `>= 1.21.0-0 < 1.28.0-0` | agentk8sglue.queue | string | `"default"` | ClearML queue this agent will consume | | agentk8sglue.replicaCount | int | `1` | Glue Agent number of pods | | agentk8sglue.serviceExistingAccountName | string | `""` | if set, don't create a serviceAccountName but use defined existing one | -| agentk8sglue.taskAsJob | bool | `false` | ClearML spawn tasks as jobs instead of pods | | agentk8sglue.tolerations | list | `[]` | tolerations setup for Agent pod (example in values.yaml comments) | | agentk8sglue.volumeMounts | list | `[]` | volume mounts definition for Glue Agent (example in values.yaml comments) | | agentk8sglue.volumes | list | `[]` | volumes definition for Glue Agent (example in values.yaml comments) | @@ -112,12 +108,10 @@ Kubernetes: `>= 1.21.0-0 < 1.28.0-0` | imageCredentials.password | string | `"pwd"` | Registry password | | imageCredentials.registry | string | `"docker.io"` | Registry name | | imageCredentials.username | string | `"someone"` | Registry username | -| sessions | object | `{"dynamicSvcs":false,"externalIP":"0.0.0.0","maxServices":20,"portModeEnabled":false,"setInteractiveQueuesTag":true,"startingPort":30000,"svcAnnotations":{},"svcType":"NodePort"}` | Sessions internal service configuration | -| sessions.dynamicSvcs | bool | `false` | Enable/Disable dynamic svc for sessions pods | +| sessions | object | `{"externalIP":"0.0.0.0","maxServices":20,"portModeEnabled":false,"startingPort":30000,"svcAnnotations":{},"svcType":"NodePort"}` | Sessions internal service configuration | | sessions.externalIP | string | `"0.0.0.0"` | External IP sessions clients can connect to | | sessions.maxServices | int | `20` | maximum number of NodePorts exposed | | sessions.portModeEnabled | bool | `false` | Enable/Disable sessions portmode WARNING: only one Agent deployment can have this set to true | -| sessions.setInteractiveQueuesTag | bool | `true` | set interactive queue tags | | sessions.startingPort | int | `30000` | starting range of exposed NodePorts | | sessions.svcAnnotations | object | `{}` | specific annotations for session services | | sessions.svcType | string | `"NodePort"` | service type ("NodePort" or "ClusterIP" or "LoadBalancer") | diff --git a/charts/clearml-agent/templates/agentk8sglue-deployment.yaml b/charts/clearml-agent/templates/agentk8sglue-deployment.yaml index d239437..c53a978 100644 --- a/charts/clearml-agent/templates/agentk8sglue-deployment.yaml +++ b/charts/clearml-agent/templates/agentk8sglue-deployment.yaml @@ -97,12 +97,6 @@ spec: --ports-mode --num-of-services {{ .Values.sessions.maxServices }} \ --base-port {{ .Values.sessions.startingPort }} \ --gateway-address {{ .Values.sessions.externalIP }}" - {{- if .Values.sessions.dynamicSvcs }} - - name: CLEARML_K8S_GLUE_POD_POST_APPLY_CMD - value: "kubectl -n {namespace} apply -f ~/template/services-{pod_number}.yaml ; kubectl -n {namespace} label svc clearml-session-{pod_number} service-for={pod_name}" - - name: CLEARML_K8S_GLUE_POD_POST_DELETE_CMD - value: "kubectl -n {namespace} delete svc -l service-for={pod_name}" - {{- end }} {{- else}} - name: K8S_GLUE_EXTRA_ARGS value: "--namespace {{ .Release.Namespace }} --template-yaml /root/template/template.yaml" @@ -111,10 +105,6 @@ spec: - name: CLEARML_CONFIG_FILE value: /root/clearml.conf {{- end }} - - name: CLEARML_K8S_GLUE_LIMIT_POD_LABEL - value: "ai.allegro.agent.serial=pod-{pod_number}" - - name: CLEARML_K8S_SECRETS_LIST_FILE - value: /root/template/secrets.yaml - name: K8S_DEFAULT_NAMESPACE value: "{{ .Release.Namespace }}" - name: CLEARML_API_ACCESS_KEY @@ -135,31 +125,6 @@ spec: value: "" - name: CLEARML_DOCKER_IMAGE value: "{{.Values.agentk8sglue.defaultContainerImage}}" - {{- if .Values.agentk8sglue.customBashScript }} - - name: CLEARML_K8S_GLUE_EXTRA_BASH_SCRIPT - value: "{{.Values.agentk8sglue.customBashScript}}" - {{- end }} - {{- if .Values.agentk8sglue.containerCustomBashScript }} - - name: CLEARML_K8S_GLUE_POD_BASH_SCRIPT - value: "{{.Values.agentk8sglue.containerCustomBashScript}}" - {{- end }} - {{- if .Values.agentk8sglue.debugMode }} - - name: "CLEARML_K8S_GLUE_DEBUG" - value: "1" - {{- end }} - {{- if .Values.sessions.portModeEnabled }} - {{- if .Values.sessions.setInteractiveQueuesTag }} - - name: "CLEARML_K8S_GLUE_SET_QUEUE_SYSTEM_TAGS" - value: "interactive" - {{- end }} - {{- end }} - {{- if .Values.agentk8sglue.taskAsJob }} - - name: "CLEARML_K8S_GLUE_KIND" - value: "job" - {{- else }} - - name: "CLEARML_K8S_GLUE_KIND" - value: "pod" - {{- end }} - name: K8S_GLUE_QUEUE value: {{ .Values.agentk8sglue.queue }} {{- if .Values.agentk8sglue.extraEnvs }} diff --git a/charts/clearml-agent/templates/service-sessions.yaml b/charts/clearml-agent/templates/service-sessions.yaml index 87c1b29..7150518 100644 --- a/charts/clearml-agent/templates/service-sessions.yaml +++ b/charts/clearml-agent/templates/service-sessions.yaml @@ -1,5 +1,4 @@ {{- if .Values.sessions.portModeEnabled }} -{{- if not .Values.sessions.dynamicSvcs }} {{- range untilStep 1 ( ( add .Values.sessions.maxServices 1 ) | int ) 1 }} --- apiVersion: v1 @@ -29,4 +28,3 @@ spec: ai.allegro.agent.serial: pod-{{ . }} {{- end }} {{- end }} -{{- end }} diff --git a/charts/clearml-agent/values.yaml b/charts/clearml-agent/values.yaml index cad1756..9a8bd4a 100644 --- a/charts/clearml-agent/values.yaml +++ b/charts/clearml-agent/values.yaml @@ -53,9 +53,6 @@ agentk8sglue: # -- Check certificates validity for evefry UrlReference below. clearmlcheckCertificate: true - # -- Enable Debugging logs for Agent pod - debugMode: false - # -- Reference to Api server url apiServerUrlReference: "https://api.clear.ml" # -- Reference to File server url @@ -67,19 +64,12 @@ agentk8sglue: defaultContainerImage: ubuntu:18.04 # -- ClearML queue this agent will consume queue: default - # -- ClearML spawn tasks as jobs instead of pods - taskAsJob: false - # -- Custom Bash script for the Glue Agent # -- labels setup for Agent pod (example in values.yaml comments) labels: {} # schedulerName: scheduler # -- annotations setup for Agent pod (example in values.yaml comments) annotations: {} # key1: value1 - # -- Custom Bash script for the Agent pod ran by Glue Agent - customBashScript: "" - # -- Custom Bash script for the Task Pods ran by Glue Agent - containerCustomBashScript: "" # -- Extra Environment variables for Glue Agent extraEnvs: [] # - name: PYTHONPATH @@ -216,8 +206,6 @@ agentk8sglue: sessions: # -- Enable/Disable sessions portmode WARNING: only one Agent deployment can have this set to true portModeEnabled: false - # -- Enable/Disable dynamic svc for sessions pods - dynamicSvcs: false # -- specific annotations for session services svcAnnotations: {} # -- service type ("NodePort" or "ClusterIP" or "LoadBalancer") @@ -228,5 +216,3 @@ sessions: startingPort: 30000 # -- maximum number of NodePorts exposed maxServices: 20 - # -- set interactive queue tags - setInteractiveQueuesTag: true