clearml-helm-charts/charts/clearml/values.yaml

# global:
#   imagePullSecrets:
#   - docker-cfg
clearml:
  defaultCompany: "d1bd92a3b039400cbafc60a7a5b1e52b"
ingress:
  name: clearml-server-ingress
  annotations: {}
  app:
    enabled: false
    hostName: "app.clearml.127-0-0-1.nip.io"
    tlsSecretName: ""
    annotations: {}
    path: "/"
  api:
    enabled: false
    hostName: "api.clearml.127-0-0-1.nip.io"
    tlsSecretName: ""
    annotations: {}
    path: "/"
  files:
    enabled: false
    hostName: "files.clearml.127-0-0-1.nip.io"
    tlsSecretName: ""
    annotations: {}
    path: "/"

secret:
  # -- Set for http_session field
  httpSession: "9Tw20RbhJ1bLBiHEOWXvhplKGUbTgLzAtwFN2oLQvWwS0uRpD5"
  # -- Set for auth_token field
  authToken: "1SCf0ov3Nm544Td2oZ0gXSrsNx5XhMWdVlKz1tOgcx158bD5RV"
  credentials:
    apiserver:
      # -- Set for apiserver_key field
      accessKey: "5442F3443MJMORWZA3ZH"
      # -- Set for apiserver_secret field
      secretKey: "BxapIRo9ZINi8x25CRxz8Wdmr2pQjzuWVB4PNASZqCtTyWgWVQ"
    tests:
      # -- Set for tests_user_key field
      accessKey: "ENP39EQM4SLACGD5FXB7"
      # -- Set for tests_user_secret field
      secretKey: "lPcm0imbcBZ8mwgO7tpadutiS3gnJD05x9j7afwXPS35IKbpiQ"

apiserver:
  prepopulateEnabled: "true"
  prepopulateZipFiles: "/opt/clearml/db-pre-populate"
  prepopulateArtifactsPath: "/mnt/fileserver"
  configDir: /opt/clearml/config

  # -- Amount of seconds the authorization cookie will last in user browser
  authCookiesMaxAge: 864000

  service:
    # -- This will set to service's spec.type field
    type: NodePort
    port: 8008
    # -- If service.type set to NodePort, this will be set to service's nodePort field.
    # If service.type is set to others, this field will be ignored
    nodePort: 30008

  livenessDelay: 60
  readinessDelay: 60

  replicaCount: 1

  image:
    repository: "allegroai/clearml"
    pullPolicy: IfNotPresent
    tag: "1.2.0"

  extraEnvs: []

  podAnnotations: {}

  resources: {}
    # We usually recommend not to specify default resources and to leave this as a conscious
    # choice for the user. This also increases chances charts run on environments with little
    # resources, such as Minikube. If you do want to specify resources, uncomment the following
    # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
    # limits:
    #   cpu: 100m
    #   memory: 128Mi
    # requests:
    #   cpu: 100m
    #   memory: 128Mi

  nodeSelector: {}

  tolerations: []

  affinity: {}

  additionalConfigs: {}
    # services.conf: |
    #   tasks {
    #     non_responsive_tasks_watchdog {
    #       # In-progress tasks that haven't been updated for at least 'value' seconds will be stopped by the watchdog
    #       threshold_sec: 21000
    #       # Watchdog will sleep for this number of seconds after each cycle
    #       watch_interval_sec: 900
    #     }
    #   }
    # apiserver.conf: |
    #   auth {
    #     fixed_users {
    #       enabled: true
    #       pass_hashed: false
    #       users: [
    #         {
    #           username: "jane"
    #           password: "12345678"
    #           name: "Jane Doe"
    #         },
    #         {
    #           username: "john"
    #           password: "12345678"
    #           name: "John Doe"
    #         },
    #       ]
    #     }
    #   }

fileserver:
  service:
    # -- This will set to service's spec.type field
    type: NodePort
    port: 8081
    # -- If service.type set to NodePort, this will be set to service's nodePort field.
    # If service.type is set to others, this field will be ignored
    nodePort: 30081

  replicaCount: 1

  image:
    repository: "allegroai/clearml"
    pullPolicy: IfNotPresent
    tag: "1.2.0"

  extraEnvs: []

  podAnnotations: {}

  resources: {}
    # We usually recommend not to specify default resources and to leave this as a conscious
    # choice for the user. This also increases chances charts run on environments with little
    # resources, such as Minikube. If you do want to specify resources, uncomment the following
    # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
    # limits:
    #   cpu: 100m
    #   memory: 128Mi
    # requests:
    #   cpu: 100m
    #   memory: 128Mi

  nodeSelector: {}

  tolerations: []

  affinity: {}

  storage:
    data:
      class: "standard"
      size: 50Gi

webserver:
  extraEnvs: []

  service:
    # -- This will set to service's spec.type field
    type: NodePort
    port: 80
    # -- If service.type set to NodePort, this will be set to service's nodePort field.
    # If service.type is set to others, this field will be ignored
    nodePort: 30080

  replicaCount: 1

  image:
    repository: "allegroai/clearml"
    pullPolicy: IfNotPresent
    tag: "1.2.0"

  podAnnotations: {}

  resources: {}
    # We usually recommend not to specify default resources and to leave this as a conscious
    # choice for the user. This also increases chances charts run on environments with little
    # resources, such as Minikube. If you do want to specify resources, uncomment the following
    # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
    # limits:
    #   cpu: 100m
    #   memory: 128Mi
    # requests:
    #   cpu: 100m
    #   memory: 128Mi

  nodeSelector: {}

  tolerations: []

  affinity: {}

  additionalConfigs: {}

agentservices:
  enabled: false
  clearmlHostIp: null
  agentVersion: ""
  clearmlWebHost: null
  clearmlFilesHost: null
  clearmlGitUser: null
  clearmlGitPassword: null
  awsAccessKeyId: null
  awsSecretAccessKey: null
  awsDefaultRegion: null
  azureStorageAccount: null
  azureStorageKey: null
  googleCredentials: null
  clearmlWorkerId: "clearml-services"

  replicaCount: 1

  image:
    repository: "allegroai/clearml-agent-services"
    pullPolicy: IfNotPresent
    tag: "latest"

  extraEnvs: []

  podAnnotations: {}

  resources: {}
    # We usually recommend not to specify default resources and to leave this as a conscious
    # choice for the user. This also increases chances charts run on environments with little
    # resources, such as Minikube. If you do want to specify resources, uncomment the following
    # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
    # limits:
    #   cpu: 100m
    #   memory: 128Mi
    # requests:
    #   cpu: 100m
    #   memory: 128Mi

  nodeSelector: {}

  tolerations: []

  affinity: {}

  storage:
    data:
      class: "standard"
      size: 50Gi

agentGroups:
  agent-group-cpu:
    enabled: true
    name: agent-group-cpu
    replicaCount: 1
    updateStrategy: Recreate
    nvidiaGpusPerAgent: 0
    agentVersion: ""  # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
    queues: "default"  # multiple queues can be specified separated by a space (e.g. "important_jobs default")
    clearmlGitUser: null
    clearmlGitPassword: null
    clearmlAccessKey: null
    clearmlSecretKey: null
    awsAccessKeyId: null
    awsSecretAccessKey: null
    awsDefaultRegion: null
    azureStorageAccount: null
    azureStorageKey: null
    clearmlConfig: |-
      sdk {
      }

    image:
      repository: "ubuntu"
      pullPolicy: IfNotPresent
      tag: "18.04"

    podAnnotations: {}

    nodeSelector: {}

    tolerations: []

    affinity: {}

  agent-group-gpu:
    enabled: true
    name: agent-group-gpu
    replicaCount: 0
    updateStrategy: Recreate
    nvidiaGpusPerAgent: 1
    agentVersion: ""  # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
    queues: "default"  # multiple queues can be specified separated by a space (e.g. "important_jobs default")
    clearmlGitUser: null
    clearmlGitPassword: null
    clearmlAccessKey: null
    clearmlSecretKey: null
    awsAccessKeyId: null
    awsSecretAccessKey: null
    awsDefaultRegion: null
    azureStorageAccount: null
    azureStorageKey: null
    clearmlConfig: |-
      sdk {
      }

    image:
      repository: "nvidia/cuda"
      pullPolicy: IfNotPresent
      tag: "11.0-base-ubuntu18.04"

    podAnnotations: {}

    nodeSelector: {}

    tolerations: []

    affinity: {}

# This agent will spawn queued experiments in new pods, a good use case is to combine this with
# GPU autoscaling nodes.
# https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue
agentk8sglue:
  enabled: false
  image:
    repository: "allegroai/clearml-agent-k8s"
    tag: "aws-latest-1.21"
  serviceAccountName: default
  maxPods: 10
  defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04   # default docker image that is spawned as new pod
  queue: aws-instances    # create this queue manually in the UI first for it to work
  id: k8s-agent
  podTemplate:
    volumes: []
      # - name: "yourvolume"
      #   path: "/yourpath"
    env: []
      # # to setup access to private repo, setup secret with git credentials:
      # - name: CLEARML_AGENT_GIT_USER
      #   value: mygitusername
      # - name: CLEARML_AGENT_GIT_PASS
      #   valueFrom:
      #     secretKeyRef:
      #       name: git-password
      #       key: git-password
    resources: {}
      # limits:
      #   nvidia.com/gpu: 1
    tolerations: []
      # - key: "nvidia.com/gpu"
      #   operator: Exists
      #   effect: "NoSchedule"
    nodeSelector: {}
      # fleet: gpu-nodes


externalServices:
  # -- Existing ElasticSearch Hostname to use if elasticsearch.enabled is false
  elasticsearchHost: ""
  # -- Existing ElasticSearch Port to use if elasticsearch.enabled is false
  elasticsearchPort: 9200
  # -- Existing MongoDB Hostname to use if elasticsearch.enabled is false
  mongodbHost: ""
  # -- Existing MongoDB Port to use if elasticsearch.enabled is false
  mongodbPort: 27017
  # -- Existing Redis Hostname to use if elasticsearch.enabled is false
  redisHost: ""
  # -- Existing Redis Port to use if elasticsearch.enabled is false
  redisPort: 6379

redis:  # configuration from https://github.com/bitnami/charts/blob/master/bitnami/redis/values.yaml
  enabled: true
  usePassword: false
  databaseNumber: 0
  master:
    name: "{{ .Release.Name }}-redis-master"
    port: 6379
    persistence:
      enabled: true
      accessModes:
        - ReadWriteOnce
      size: 5Gi
  cluster:
    enabled: false

mongodb:  # configuration from https://github.com/bitnami/charts/blob/master/bitnami/mongodb/values.yaml
  enabled: true
  architecture: standalone
  auth:
    enabled: false
  replicaCount: 1
  persistence:
    enabled: true
    accessModes:
      - ReadWriteOnce
    size: 50Gi
  service:
    name: "{{ .Release.Name }}-mongodb"
    type: ClusterIP
    port: 27017
    portName: mongo-service

elasticsearch:  # configuration from https://github.com/elastic/helm-charts/blob/7.16/elasticsearch/values.yaml
  enabled: true
  httpPort: 9200
  roles:
    master: "true"
    ingest: "true"
    data: "true"
    remote_cluster_client: "true"
  replicas: 1
  # Readiness probe hack for a single-node cluster (where status will never be green). Should be removed if using replicas > 1
  clusterHealthCheckParams: "wait_for_status=yellow&timeout=1s"

  minimumMasterNodes: 1
  clusterName: clearml-elastic
  esJavaOpts: "-Xmx2g -Xms2g"
  extraEnvs:
    - name: bootstrap.memory_lock
      value: "false"
    - name: cluster.routing.allocation.node_initial_primaries_recoveries
      value: "500"
    - name: cluster.routing.allocation.disk.watermark.low
      value: 500mb
    - name: cluster.routing.allocation.disk.watermark.high
      value: 500mb
    - name: cluster.routing.allocation.disk.watermark.flood_stage
      value: 500mb
    - name: http.compression_level
      value: "7"
    - name: reindex.remote.whitelist
      value: '*.*'
    - name: xpack.monitoring.enabled
      value: "false"
    - name: xpack.security.enabled
      value: "false"
  resources:
    requests:
      memory: "4Gi"
    limits:
      memory: "4Gi"
  persistence:
    enabled: true
  volumeClaimTemplate:
    accessModes: ["ReadWriteOnce"]
    resources:
      requests:
        storage: 50Gi
  esConfig:
    elasticsearch.yml: |
      xpack.security.enabled: false