mirror of
https://github.com/clearml/clearml-helm-charts
synced 2025-01-31 09:06:48 +00:00
d7bef0ff9d
* Added: auth enabled example in additionalConfigs * Changed: bump up version * Fixed: remove trailing spaces
458 lines
12 KiB
YAML
Executable File
458 lines
12 KiB
YAML
Executable File
# global:
|
|
# imagePullSecrets:
|
|
# - docker-cfg
|
|
clearml:
|
|
defaultCompany: "d1bd92a3b039400cbafc60a7a5b1e52b"
|
|
ingress:
|
|
name: clearml-server-ingress
|
|
annotations: {}
|
|
app:
|
|
enabled: false
|
|
hostName: "app.clearml.127-0-0-1.nip.io"
|
|
tlsSecretName: ""
|
|
annotations: {}
|
|
path: "/"
|
|
api:
|
|
enabled: false
|
|
hostName: "api.clearml.127-0-0-1.nip.io"
|
|
tlsSecretName: ""
|
|
annotations: {}
|
|
path: "/"
|
|
files:
|
|
enabled: false
|
|
hostName: "files.clearml.127-0-0-1.nip.io"
|
|
tlsSecretName: ""
|
|
annotations: {}
|
|
path: "/"
|
|
|
|
secret:
|
|
# -- Set for http_session field
|
|
httpSession: "9Tw20RbhJ1bLBiHEOWXvhplKGUbTgLzAtwFN2oLQvWwS0uRpD5"
|
|
# -- Set for auth_token field
|
|
authToken: "1SCf0ov3Nm544Td2oZ0gXSrsNx5XhMWdVlKz1tOgcx158bD5RV"
|
|
credentials:
|
|
apiserver:
|
|
# -- Set for apiserver_key field
|
|
accessKey: "5442F3443MJMORWZA3ZH"
|
|
# -- Set for apiserver_secret field
|
|
secretKey: "BxapIRo9ZINi8x25CRxz8Wdmr2pQjzuWVB4PNASZqCtTyWgWVQ"
|
|
tests:
|
|
# -- Set for tests_user_key field
|
|
accessKey: "ENP39EQM4SLACGD5FXB7"
|
|
# -- Set for tests_user_secret field
|
|
secretKey: "lPcm0imbcBZ8mwgO7tpadutiS3gnJD05x9j7afwXPS35IKbpiQ"
|
|
|
|
apiserver:
|
|
prepopulateEnabled: "true"
|
|
prepopulateZipFiles: "/opt/clearml/db-pre-populate"
|
|
prepopulateArtifactsPath: "/mnt/fileserver"
|
|
configDir: /opt/clearml/config
|
|
|
|
# -- Amount of seconds the authorization cookie will last in user browser
|
|
authCookiesMaxAge: 864000
|
|
|
|
service:
|
|
# -- This will set to service's spec.type field
|
|
type: NodePort
|
|
port: 8008
|
|
# -- If service.type set to NodePort, this will be set to service's nodePort field.
|
|
# If service.type is set to others, this field will be ignored
|
|
nodePort: 30008
|
|
|
|
livenessDelay: 60
|
|
readinessDelay: 60
|
|
|
|
replicaCount: 1
|
|
|
|
image:
|
|
repository: "allegroai/clearml"
|
|
pullPolicy: IfNotPresent
|
|
tag: "1.2.0"
|
|
|
|
extraEnvs: []
|
|
|
|
podAnnotations: {}
|
|
|
|
resources: {}
|
|
# We usually recommend not to specify default resources and to leave this as a conscious
|
|
# choice for the user. This also increases chances charts run on environments with little
|
|
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
|
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
|
# limits:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
|
|
nodeSelector: {}
|
|
|
|
tolerations: []
|
|
|
|
affinity: {}
|
|
|
|
additionalConfigs: {}
|
|
# services.conf: |
|
|
# tasks {
|
|
# non_responsive_tasks_watchdog {
|
|
# # In-progress tasks that haven't been updated for at least 'value' seconds will be stopped by the watchdog
|
|
# threshold_sec: 21000
|
|
# # Watchdog will sleep for this number of seconds after each cycle
|
|
# watch_interval_sec: 900
|
|
# }
|
|
# }
|
|
# apiserver.conf: |
|
|
# auth {
|
|
# fixed_users {
|
|
# enabled: true
|
|
# pass_hashed: false
|
|
# users: [
|
|
# {
|
|
# username: "jane"
|
|
# password: "12345678"
|
|
# name: "Jane Doe"
|
|
# },
|
|
# {
|
|
# username: "john"
|
|
# password: "12345678"
|
|
# name: "John Doe"
|
|
# },
|
|
# ]
|
|
# }
|
|
# }
|
|
|
|
fileserver:
|
|
service:
|
|
# -- This will set to service's spec.type field
|
|
type: NodePort
|
|
port: 8081
|
|
# -- If service.type set to NodePort, this will be set to service's nodePort field.
|
|
# If service.type is set to others, this field will be ignored
|
|
nodePort: 30081
|
|
|
|
replicaCount: 1
|
|
|
|
image:
|
|
repository: "allegroai/clearml"
|
|
pullPolicy: IfNotPresent
|
|
tag: "1.2.0"
|
|
|
|
extraEnvs: []
|
|
|
|
podAnnotations: {}
|
|
|
|
resources: {}
|
|
# We usually recommend not to specify default resources and to leave this as a conscious
|
|
# choice for the user. This also increases chances charts run on environments with little
|
|
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
|
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
|
# limits:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
|
|
nodeSelector: {}
|
|
|
|
tolerations: []
|
|
|
|
affinity: {}
|
|
|
|
storage:
|
|
data:
|
|
class: "standard"
|
|
size: 50Gi
|
|
|
|
webserver:
|
|
extraEnvs: []
|
|
|
|
service:
|
|
# -- This will set to service's spec.type field
|
|
type: NodePort
|
|
port: 80
|
|
# -- If service.type set to NodePort, this will be set to service's nodePort field.
|
|
# If service.type is set to others, this field will be ignored
|
|
nodePort: 30080
|
|
|
|
replicaCount: 1
|
|
|
|
image:
|
|
repository: "allegroai/clearml"
|
|
pullPolicy: IfNotPresent
|
|
tag: "1.2.0"
|
|
|
|
podAnnotations: {}
|
|
|
|
resources: {}
|
|
# We usually recommend not to specify default resources and to leave this as a conscious
|
|
# choice for the user. This also increases chances charts run on environments with little
|
|
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
|
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
|
# limits:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
|
|
nodeSelector: {}
|
|
|
|
tolerations: []
|
|
|
|
affinity: {}
|
|
|
|
additionalConfigs: {}
|
|
|
|
agentservices:
|
|
enabled: false
|
|
clearmlHostIp: null
|
|
agentVersion: ""
|
|
clearmlWebHost: null
|
|
clearmlFilesHost: null
|
|
clearmlGitUser: null
|
|
clearmlGitPassword: null
|
|
awsAccessKeyId: null
|
|
awsSecretAccessKey: null
|
|
awsDefaultRegion: null
|
|
azureStorageAccount: null
|
|
azureStorageKey: null
|
|
googleCredentials: null
|
|
clearmlWorkerId: "clearml-services"
|
|
|
|
replicaCount: 1
|
|
|
|
image:
|
|
repository: "allegroai/clearml-agent-services"
|
|
pullPolicy: IfNotPresent
|
|
tag: "latest"
|
|
|
|
extraEnvs: []
|
|
|
|
podAnnotations: {}
|
|
|
|
resources: {}
|
|
# We usually recommend not to specify default resources and to leave this as a conscious
|
|
# choice for the user. This also increases chances charts run on environments with little
|
|
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
|
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
|
# limits:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
|
|
nodeSelector: {}
|
|
|
|
tolerations: []
|
|
|
|
affinity: {}
|
|
|
|
storage:
|
|
data:
|
|
class: "standard"
|
|
size: 50Gi
|
|
|
|
agentGroups:
|
|
agent-group-cpu:
|
|
enabled: true
|
|
name: agent-group-cpu
|
|
replicaCount: 1
|
|
updateStrategy: Recreate
|
|
nvidiaGpusPerAgent: 0
|
|
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
|
|
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")
|
|
clearmlGitUser: null
|
|
clearmlGitPassword: null
|
|
clearmlAccessKey: null
|
|
clearmlSecretKey: null
|
|
awsAccessKeyId: null
|
|
awsSecretAccessKey: null
|
|
awsDefaultRegion: null
|
|
azureStorageAccount: null
|
|
azureStorageKey: null
|
|
clearmlConfig: |-
|
|
sdk {
|
|
}
|
|
|
|
image:
|
|
repository: "ubuntu"
|
|
pullPolicy: IfNotPresent
|
|
tag: "18.04"
|
|
|
|
podAnnotations: {}
|
|
|
|
nodeSelector: {}
|
|
|
|
tolerations: []
|
|
|
|
affinity: {}
|
|
|
|
agent-group-gpu:
|
|
enabled: true
|
|
name: agent-group-gpu
|
|
replicaCount: 0
|
|
updateStrategy: Recreate
|
|
nvidiaGpusPerAgent: 1
|
|
agentVersion: "" # if set, it *MUST* include comparison operator (e.g. ">=0.16.1")
|
|
queues: "default" # multiple queues can be specified separated by a space (e.g. "important_jobs default")
|
|
clearmlGitUser: null
|
|
clearmlGitPassword: null
|
|
clearmlAccessKey: null
|
|
clearmlSecretKey: null
|
|
awsAccessKeyId: null
|
|
awsSecretAccessKey: null
|
|
awsDefaultRegion: null
|
|
azureStorageAccount: null
|
|
azureStorageKey: null
|
|
clearmlConfig: |-
|
|
sdk {
|
|
}
|
|
|
|
image:
|
|
repository: "nvidia/cuda"
|
|
pullPolicy: IfNotPresent
|
|
tag: "11.0-base-ubuntu18.04"
|
|
|
|
podAnnotations: {}
|
|
|
|
nodeSelector: {}
|
|
|
|
tolerations: []
|
|
|
|
affinity: {}
|
|
|
|
# This agent will spawn queued experiments in new pods, a good use case is to combine this with
|
|
# GPU autoscaling nodes.
|
|
# https://github.com/allegroai/clearml-agent/tree/master/docker/k8s-glue
|
|
agentk8sglue:
|
|
enabled: false
|
|
image:
|
|
repository: "allegroai/clearml-agent-k8s"
|
|
tag: "aws-latest-1.21"
|
|
serviceAccountName: default
|
|
maxPods: 10
|
|
defaultDockerImage: nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04 # default docker image that is spawned as new pod
|
|
queue: aws-instances # create this queue manually in the UI first for it to work
|
|
id: k8s-agent
|
|
podTemplate:
|
|
volumes: []
|
|
# - name: "yourvolume"
|
|
# path: "/yourpath"
|
|
env: []
|
|
# # to setup access to private repo, setup secret with git credentials:
|
|
# - name: CLEARML_AGENT_GIT_USER
|
|
# value: mygitusername
|
|
# - name: CLEARML_AGENT_GIT_PASS
|
|
# valueFrom:
|
|
# secretKeyRef:
|
|
# name: git-password
|
|
# key: git-password
|
|
resources: {}
|
|
# limits:
|
|
# nvidia.com/gpu: 1
|
|
tolerations: []
|
|
# - key: "nvidia.com/gpu"
|
|
# operator: Exists
|
|
# effect: "NoSchedule"
|
|
nodeSelector: {}
|
|
# fleet: gpu-nodes
|
|
|
|
|
|
externalServices:
|
|
# -- Existing ElasticSearch Hostname to use if elasticsearch.enabled is false
|
|
elasticsearchHost: ""
|
|
# -- Existing ElasticSearch Port to use if elasticsearch.enabled is false
|
|
elasticsearchPort: 9200
|
|
# -- Existing MongoDB Hostname to use if elasticsearch.enabled is false
|
|
mongodbHost: ""
|
|
# -- Existing MongoDB Port to use if elasticsearch.enabled is false
|
|
mongodbPort: 27017
|
|
# -- Existing Redis Hostname to use if elasticsearch.enabled is false
|
|
redisHost: ""
|
|
# -- Existing Redis Port to use if elasticsearch.enabled is false
|
|
redisPort: 6379
|
|
|
|
redis: # configuration from https://github.com/bitnami/charts/blob/master/bitnami/redis/values.yaml
|
|
enabled: true
|
|
usePassword: false
|
|
databaseNumber: 0
|
|
master:
|
|
name: "{{ .Release.Name }}-redis-master"
|
|
port: 6379
|
|
persistence:
|
|
enabled: true
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
size: 5Gi
|
|
cluster:
|
|
enabled: false
|
|
|
|
mongodb: # configuration from https://github.com/bitnami/charts/blob/master/bitnami/mongodb/values.yaml
|
|
enabled: true
|
|
architecture: standalone
|
|
auth:
|
|
enabled: false
|
|
replicaCount: 1
|
|
persistence:
|
|
enabled: true
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
size: 50Gi
|
|
service:
|
|
name: "{{ .Release.Name }}-mongodb"
|
|
type: ClusterIP
|
|
port: 27017
|
|
portName: mongo-service
|
|
|
|
elasticsearch: # configuration from https://github.com/elastic/helm-charts/blob/7.16/elasticsearch/values.yaml
|
|
enabled: true
|
|
httpPort: 9200
|
|
roles:
|
|
master: "true"
|
|
ingest: "true"
|
|
data: "true"
|
|
remote_cluster_client: "true"
|
|
replicas: 1
|
|
# Readiness probe hack for a single-node cluster (where status will never be green). Should be removed if using replicas > 1
|
|
clusterHealthCheckParams: "wait_for_status=yellow&timeout=1s"
|
|
|
|
minimumMasterNodes: 1
|
|
clusterName: clearml-elastic
|
|
esJavaOpts: "-Xmx2g -Xms2g"
|
|
extraEnvs:
|
|
- name: bootstrap.memory_lock
|
|
value: "false"
|
|
- name: cluster.routing.allocation.node_initial_primaries_recoveries
|
|
value: "500"
|
|
- name: cluster.routing.allocation.disk.watermark.low
|
|
value: 500mb
|
|
- name: cluster.routing.allocation.disk.watermark.high
|
|
value: 500mb
|
|
- name: cluster.routing.allocation.disk.watermark.flood_stage
|
|
value: 500mb
|
|
- name: http.compression_level
|
|
value: "7"
|
|
- name: reindex.remote.whitelist
|
|
value: '*.*'
|
|
- name: xpack.monitoring.enabled
|
|
value: "false"
|
|
- name: xpack.security.enabled
|
|
value: "false"
|
|
resources:
|
|
requests:
|
|
memory: "4Gi"
|
|
limits:
|
|
memory: "4Gi"
|
|
persistence:
|
|
enabled: true
|
|
volumeClaimTemplate:
|
|
accessModes: ["ReadWriteOnce"]
|
|
resources:
|
|
requests:
|
|
storage: 50Gi
|
|
esConfig:
|
|
elasticsearch.yml: |
|
|
xpack.security.enabled: false
|