mirror of
https://github.com/clearml/clearml-agent
synced 2025-06-26 18:16:15 +00:00
removed helm chart
This commit is contained in:
parent
5fd100e7f7
commit
f2a1fae836
@ -1,23 +0,0 @@
|
|||||||
# Patterns to ignore when building packages.
|
|
||||||
# This supports shell glob matching, relative path matching, and
|
|
||||||
# negation (prefixed with !). Only one pattern per line.
|
|
||||||
.DS_Store
|
|
||||||
# Common VCS dirs
|
|
||||||
.git/
|
|
||||||
.gitignore
|
|
||||||
.bzr/
|
|
||||||
.bzrignore
|
|
||||||
.hg/
|
|
||||||
.hgignore
|
|
||||||
.svn/
|
|
||||||
# Common backup files
|
|
||||||
*.swp
|
|
||||||
*.bak
|
|
||||||
*.tmp
|
|
||||||
*.orig
|
|
||||||
*~
|
|
||||||
# Various IDEs
|
|
||||||
.project
|
|
||||||
.idea/
|
|
||||||
*.tmproj
|
|
||||||
.vscode/
|
|
@ -1,27 +0,0 @@
|
|||||||
apiVersion: v2
|
|
||||||
name: clearml-agent
|
|
||||||
description: A Helm chart for Kubernetes
|
|
||||||
|
|
||||||
# A chart can be either an 'application' or a 'library' chart.
|
|
||||||
#
|
|
||||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
|
||||||
# to be deployed.
|
|
||||||
#
|
|
||||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
|
||||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
|
||||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
|
||||||
type: application
|
|
||||||
|
|
||||||
# This is the chart version. This version number should be incremented each time you make changes
|
|
||||||
# to the chart and its templates, including the app version.
|
|
||||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
|
||||||
version: 0.1.0
|
|
||||||
|
|
||||||
# This is the version number of the application being deployed. This version number should be
|
|
||||||
# incremented each time you make changes to the application. Versions are not expected to
|
|
||||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
|
||||||
# It is recommended to use it with quotes.
|
|
||||||
appVersion: "1.16.0"
|
|
||||||
|
|
||||||
sources:
|
|
||||||
- https://github.com/allegroai/clearml-agent
|
|
@ -1,13 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
metadata:
|
|
||||||
namespace: clearml-agent
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- resources:
|
|
||||||
limits:
|
|
||||||
cpu: 1
|
|
||||||
memory: 4Gi
|
|
||||||
requests:
|
|
||||||
cpu: 1
|
|
||||||
memory: 4Gi
|
|
||||||
restartPolicy: Never
|
|
@ -1,62 +0,0 @@
|
|||||||
{{/*
|
|
||||||
Expand the name of the chart.
|
|
||||||
*/}}
|
|
||||||
{{- define "clearml-agent.name" -}}
|
|
||||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Create a default fully qualified app name.
|
|
||||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
|
||||||
If release name contains chart name it will be used as a full name.
|
|
||||||
*/}}
|
|
||||||
{{- define "clearml-agent.fullname" -}}
|
|
||||||
{{- if .Values.fullnameOverride }}
|
|
||||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- else }}
|
|
||||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
|
||||||
{{- if contains $name .Release.Name }}
|
|
||||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- else }}
|
|
||||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Create chart name and version as used by the chart label.
|
|
||||||
*/}}
|
|
||||||
{{- define "clearml-agent.chart" -}}
|
|
||||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Common labels
|
|
||||||
*/}}
|
|
||||||
{{- define "clearml-agent.labels" -}}
|
|
||||||
helm.sh/chart: {{ include "clearml-agent.chart" . }}
|
|
||||||
{{ include "clearml-agent.selectorLabels" . }}
|
|
||||||
{{- if .Chart.AppVersion }}
|
|
||||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
|
||||||
{{- end }}
|
|
||||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Selector labels
|
|
||||||
*/}}
|
|
||||||
{{- define "clearml-agent.selectorLabels" -}}
|
|
||||||
app.kubernetes.io/name: {{ include "clearml-agent.name" . }}
|
|
||||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Create the name of the service account to use
|
|
||||||
*/}}
|
|
||||||
{{- define "clearml-agent.serviceAccountName" -}}
|
|
||||||
{{- if .Values.serviceAccount.create }}
|
|
||||||
{{- default (include "clearml-agent.fullname" .) .Values.serviceAccount.name }}
|
|
||||||
{{- else }}
|
|
||||||
{{- default "default" .Values.serviceAccount.name }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
@ -1,410 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: {{ include "clearml-agent.fullname" . }}-clearml-conf
|
|
||||||
labels:
|
|
||||||
{{- include "clearml-agent.labels" . | nindent 4 }}
|
|
||||||
data:
|
|
||||||
clearml.conf: |-
|
|
||||||
# CLEARML-AGENT configuration file
|
|
||||||
api {
|
|
||||||
# Notice: 'host' is the api server (default port 8008), not the web server.
|
|
||||||
api_server: "{{ .Values.env.CLEARML_API_HOST }}"
|
|
||||||
web_server: "{{ .Values.env.CLEARML_WEB_HOST }}"
|
|
||||||
files_server: "{{ .Values.env.CLEARML_FILES_HOST }}"
|
|
||||||
# Override with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY
|
|
||||||
credentials {"access_key": "{{ .Values.apiAccessKey }}", "secret_key": "{{ .Values.apiSecretKey }}"}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Set GIT user/pass credentials
|
|
||||||
# leave blank for GIT SSH credentials
|
|
||||||
agent.git_user=""
|
|
||||||
agent.git_pass=""
|
|
||||||
|
|
||||||
# extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"]
|
|
||||||
agent.package_manager.extra_index_url= [
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
agent {
|
|
||||||
# unique name of this worker, if None, created based on hostname:process_id
|
|
||||||
# Override with os environment: CLEARML_WORKER_ID
|
|
||||||
# worker_id: "clearml-agent-machine1:gpu0"
|
|
||||||
worker_id: ""
|
|
||||||
|
|
||||||
# worker name, replaces the hostname when creating a unique name for this worker
|
|
||||||
# Override with os environment: CLEARML_WORKER_NAME
|
|
||||||
# worker_name: "clearml-agent-machine1"
|
|
||||||
worker_name: ""
|
|
||||||
|
|
||||||
# Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)
|
|
||||||
# leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)
|
|
||||||
# git_user: ""
|
|
||||||
# git_pass: ""
|
|
||||||
# git_host: ""
|
|
||||||
|
|
||||||
# Force GIT protocol to use SSH regardless of the git url (Assumes GIT user/pass are blank)
|
|
||||||
force_git_ssh_protocol: false
|
|
||||||
# Force a specific SSH port when converting http to ssh links (the domain is kept the same)
|
|
||||||
# force_git_ssh_port: 0
|
|
||||||
# Force a specific SSH username when converting http to ssh links (the default username is 'git')
|
|
||||||
# force_git_ssh_user: git
|
|
||||||
|
|
||||||
# Set the python version to use when creating the virtual environment and launching the experiment
|
|
||||||
# Example values: "/usr/bin/python3" or "/usr/local/bin/python3.6"
|
|
||||||
# The default is the python executing the clearml_agent
|
|
||||||
python_binary: ""
|
|
||||||
# ignore any requested python version (Default: False, if a Task was using a
|
|
||||||
# specific python version and the system supports multiple python the agent will use the requested python version)
|
|
||||||
# ignore_requested_python_version: true
|
|
||||||
|
|
||||||
# select python package manager:
|
|
||||||
# currently supported pip and conda
|
|
||||||
# poetry is used if pip selected and repository contains poetry.lock file
|
|
||||||
package_manager: {
|
|
||||||
# supported options: pip, conda, poetry
|
|
||||||
type: pip,
|
|
||||||
|
|
||||||
# specify pip version to use (examples "<20", "==19.3.1", "", empty string will install the latest version)
|
|
||||||
pip_version: "<20.2",
|
|
||||||
|
|
||||||
# virtual environment inheres packages from system
|
|
||||||
system_site_packages: false,
|
|
||||||
|
|
||||||
# install with --upgrade
|
|
||||||
force_upgrade: false,
|
|
||||||
|
|
||||||
# additional artifact repositories to use when installing python packages
|
|
||||||
# extra_index_url: ["https://allegroai.jfrog.io/clearmlai/api/pypi/public/simple"]
|
|
||||||
|
|
||||||
# additional conda channels to use when installing with conda package manager
|
|
||||||
conda_channels: ["pytorch", "conda-forge", "defaults", ]
|
|
||||||
|
|
||||||
# If set to true, Task's "installed packages" are ignored,
|
|
||||||
# and the repository's "requirements.txt" is used instead
|
|
||||||
# force_repo_requirements_txt: false
|
|
||||||
|
|
||||||
# set the priority packages to be installed before the rest of the required packages
|
|
||||||
# priority_packages: ["cython", "numpy", "setuptools", ]
|
|
||||||
|
|
||||||
# set the optional priority packages to be installed before the rest of the required packages,
|
|
||||||
# In case a package installation fails, the package will be ignored,
|
|
||||||
# and the virtual environment process will continue
|
|
||||||
# priority_optional_packages: ["pygobject", ]
|
|
||||||
|
|
||||||
# set the post packages to be installed after all the rest of the required packages
|
|
||||||
# post_packages: ["horovod", ]
|
|
||||||
|
|
||||||
# set the optional post packages to be installed after all the rest of the required packages,
|
|
||||||
# In case a package installation fails, the package will be ignored,
|
|
||||||
# and the virtual environment process will continue
|
|
||||||
# post_optional_packages: []
|
|
||||||
|
|
||||||
# set to True to support torch nightly build installation,
|
|
||||||
# notice: torch nightly builds are ephemeral and are deleted from time to time
|
|
||||||
torch_nightly: false,
|
|
||||||
},
|
|
||||||
|
|
||||||
# target folder for virtual environments builds, created when executing experiment
|
|
||||||
venvs_dir = ~/.clearml/venvs-builds
|
|
||||||
|
|
||||||
# cached virtual environment folder
|
|
||||||
venvs_cache: {
|
|
||||||
# maximum number of cached venvs
|
|
||||||
max_entries: 10
|
|
||||||
# minimum required free space to allow for cache entry, disable by passing 0 or negative value
|
|
||||||
free_space_threshold_gb: 2.0
|
|
||||||
# unmark to enable virtual environment caching
|
|
||||||
# path: ~/.clearml/venvs-cache
|
|
||||||
},
|
|
||||||
|
|
||||||
# cached git clone folder
|
|
||||||
vcs_cache: {
|
|
||||||
enabled: true,
|
|
||||||
path: ~/.clearml/vcs-cache
|
|
||||||
},
|
|
||||||
|
|
||||||
# use venv-update in order to accelerate python virtual environment building
|
|
||||||
# Still in beta, turned off by default
|
|
||||||
venv_update: {
|
|
||||||
enabled: false,
|
|
||||||
},
|
|
||||||
|
|
||||||
# cached folder for specific python package download (used for pytorch package caching)
|
|
||||||
pip_download_cache {
|
|
||||||
enabled: true,
|
|
||||||
path: ~/.clearml/pip-download-cache
|
|
||||||
},
|
|
||||||
|
|
||||||
translate_ssh: true,
|
|
||||||
# reload configuration file every daemon execution
|
|
||||||
reload_config: false,
|
|
||||||
|
|
||||||
# pip cache folder mapped into docker, used for python package caching
|
|
||||||
docker_pip_cache = ~/.clearml/pip-cache
|
|
||||||
# apt cache folder mapped into docker, used for ubuntu package caching
|
|
||||||
docker_apt_cache = ~/.clearml/apt-cache
|
|
||||||
|
|
||||||
# optional arguments to pass to docker image
|
|
||||||
# these are local for this agent and will not be updated in the experiment's docker_cmd section
|
|
||||||
# extra_docker_arguments: ["--ipc=host", ]
|
|
||||||
|
|
||||||
# optional shell script to run in docker when started before the experiment is started
|
|
||||||
# extra_docker_shell_script: ["apt-get install -y bindfs", ]
|
|
||||||
|
|
||||||
# Install the required packages for opencv libraries (libsm6 libxext6 libxrender-dev libglib2.0-0),
|
|
||||||
# for backwards compatibility reasons, true as default,
|
|
||||||
# change to false to skip installation and decrease docker spin up time
|
|
||||||
# docker_install_opencv_libs: true
|
|
||||||
|
|
||||||
# optional uptime configuration, make sure to use only one of 'uptime/downtime' and not both.
|
|
||||||
# If uptime is specified, agent will actively poll (and execute) tasks in the time-spans defined here.
|
|
||||||
# Outside of the specified time-spans, the agent will be idle.
|
|
||||||
# Defined using a list of items of the format: "<hours> <days>".
|
|
||||||
# hours - use values 0-23, single values would count as start hour and end at midnight.
|
|
||||||
# days - use days in abbreviated format (SUN-SAT)
|
|
||||||
# use '-' for ranges and ',' to separate singular values.
|
|
||||||
# for example, to enable the workers every Sunday and Tuesday between 17:00-20:00 set uptime to:
|
|
||||||
# uptime: ["17-20 SUN,TUE"]
|
|
||||||
|
|
||||||
# optional downtime configuration, can be used only when uptime is not used.
|
|
||||||
# If downtime is specified, agent will be idle in the time-spans defined here.
|
|
||||||
# Outside of the specified time-spans, the agent will actively poll (and execute) tasks.
|
|
||||||
# Use the same format as described above for uptime
|
|
||||||
# downtime: []
|
|
||||||
|
|
||||||
# set to true in order to force "docker pull" before running an experiment using a docker image.
|
|
||||||
# This makes sure the docker image is updated.
|
|
||||||
docker_force_pull: false
|
|
||||||
|
|
||||||
default_docker: {
|
|
||||||
# default docker image to use when running in docker mode
|
|
||||||
image: "nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04"
|
|
||||||
|
|
||||||
# optional arguments to pass to docker image
|
|
||||||
# arguments: ["--ipc=host", ]
|
|
||||||
}
|
|
||||||
|
|
||||||
# set the OS environments based on the Task's Environment section before launching the Task process.
|
|
||||||
enable_task_env: false
|
|
||||||
|
|
||||||
# set the initial bash script to execute at the startup of any docker.
|
|
||||||
# all lines will be executed regardless of their exit code.
|
|
||||||
# {python_single_digit} is translated to 'python3' or 'python2' according to requested python version
|
|
||||||
# docker_init_bash_script = [
|
|
||||||
# "echo 'Binary::apt::APT::Keep-Downloaded-Packages \"true\";' > /etc/apt/apt.conf.d/docker-clean",
|
|
||||||
# "chown -R root /root/.cache/pip",
|
|
||||||
# "apt-get update",
|
|
||||||
# "apt-get install -y git libsm6 libxext6 libxrender-dev libglib2.0-0",
|
|
||||||
# "(which {python_single_digit} && {python_single_digit} -m pip --version) || apt-get install -y {python_single_digit}-pip",
|
|
||||||
# ]
|
|
||||||
|
|
||||||
# set the preprocessing bash script to execute at the startup of any docker.
|
|
||||||
# all lines will be executed regardless of their exit code.
|
|
||||||
# docker_preprocess_bash_script = [
|
|
||||||
# "echo \"starting docker\"",
|
|
||||||
#]
|
|
||||||
|
|
||||||
# If False replace \r with \n and display full console output
|
|
||||||
# default is True, report a single \r line in a sequence of consecutive lines, per 5 seconds.
|
|
||||||
# suppress_carriage_return: true
|
|
||||||
|
|
||||||
# cuda versions used for solving pytorch wheel packages
|
|
||||||
# should be detected automatically. Override with os environment CUDA_VERSION / CUDNN_VERSION
|
|
||||||
# cuda_version: 10.1
|
|
||||||
# cudnn_version: 7.6
|
|
||||||
|
|
||||||
# Hide docker environment variables containing secrets when printing out the docker command by replacing their
|
|
||||||
# values with "********". Turning this feature on will hide the following environment variables values:
|
|
||||||
# CLEARML_API_SECRET_KEY, CLEARML_AGENT_GIT_PASS, AWS_SECRET_ACCESS_KEY, AZURE_STORAGE_KEY
|
|
||||||
# To include more environment variables, add their keys to the "extra_keys" list. E.g. to make sure the value of
|
|
||||||
# your custom environment variable named MY_SPECIAL_PASSWORD will not show in the logs when included in the
|
|
||||||
# docker command, set:
|
|
||||||
# extra_keys: ["MY_SPECIAL_PASSWORD"]
|
|
||||||
hide_docker_command_env_vars {
|
|
||||||
enabled: true
|
|
||||||
extra_keys: []
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sdk {
|
|
||||||
# ClearML - default SDK configuration
|
|
||||||
|
|
||||||
storage {
|
|
||||||
cache {
|
|
||||||
# Defaults to system temp folder / cache
|
|
||||||
default_base_dir: "~/.clearml/cache"
|
|
||||||
size {
|
|
||||||
# max_used_bytes = -1
|
|
||||||
min_free_bytes = 10GB
|
|
||||||
# cleanup_margin_percent = 5%
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
direct_access: [
|
|
||||||
# Objects matching are considered to be available for direct access, i.e. they will not be downloaded
|
|
||||||
# or cached, and any download request will return a direct reference.
|
|
||||||
# Objects are specified in glob format, available for url and content_type.
|
|
||||||
{ url: "file://*" } # file-urls are always directly referenced
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
metrics {
|
|
||||||
# History size for debug files per metric/variant. For each metric/variant combination with an attached file
|
|
||||||
# (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
|
|
||||||
# X files are stored in the upload destination for each metric/variant combination.
|
|
||||||
file_history_size: 100
|
|
||||||
|
|
||||||
# Max history size for matplotlib imshow files per plot title.
|
|
||||||
# File names for the uploaded images will be recycled in such a way that no more than
|
|
||||||
# X images are stored in the upload destination for each matplotlib plot title.
|
|
||||||
matplotlib_untitled_history_size: 100
|
|
||||||
|
|
||||||
# Limit the number of digits after the dot in plot reporting (reducing plot report size)
|
|
||||||
# plot_max_num_digits: 5
|
|
||||||
|
|
||||||
# Settings for generated debug images
|
|
||||||
images {
|
|
||||||
format: JPEG
|
|
||||||
quality: 87
|
|
||||||
subsampling: 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to true, each series should have its own graph)
|
|
||||||
tensorboard_single_series_per_graph: false
|
|
||||||
}
|
|
||||||
|
|
||||||
network {
|
|
||||||
metrics {
|
|
||||||
# Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
|
|
||||||
# a specific iteration
|
|
||||||
file_upload_threads: 4
|
|
||||||
|
|
||||||
# Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
|
|
||||||
# being sent for upload
|
|
||||||
file_upload_starvation_warning_sec: 120
|
|
||||||
}
|
|
||||||
|
|
||||||
iteration {
|
|
||||||
# Max number of retries when getting frames if the server returned an error (http code 500)
|
|
||||||
max_retries_on_server_error: 5
|
|
||||||
# Backoff factory for consecutive retry attempts.
|
|
||||||
# SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
|
|
||||||
retry_backoff_factor_sec: 10
|
|
||||||
}
|
|
||||||
}
|
|
||||||
aws {
|
|
||||||
s3 {
|
|
||||||
# S3 credentials, used for read/write access by various SDK elements
|
|
||||||
|
|
||||||
# default, used for any bucket not specified below
|
|
||||||
key: ""
|
|
||||||
secret: ""
|
|
||||||
region: ""
|
|
||||||
|
|
||||||
credentials: [
|
|
||||||
# specifies key/secret credentials to use when handling s3 urls (read or write)
|
|
||||||
# {
|
|
||||||
# bucket: "my-bucket-name"
|
|
||||||
# key: "my-access-key"
|
|
||||||
# secret: "my-secret-key"
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
|
|
||||||
# host: "my-minio-host:9000"
|
|
||||||
# key: "12345678"
|
|
||||||
# secret: "12345678"
|
|
||||||
# multipart: false
|
|
||||||
# secure: false
|
|
||||||
# }
|
|
||||||
]
|
|
||||||
}
|
|
||||||
boto3 {
|
|
||||||
pool_connections: 512
|
|
||||||
max_multipart_concurrency: 16
|
|
||||||
}
|
|
||||||
}
|
|
||||||
google.storage {
|
|
||||||
# # Default project and credentials file
|
|
||||||
# # Will be used when no bucket configuration is found
|
|
||||||
# project: "clearml"
|
|
||||||
# credentials_json: "/path/to/credentials.json"
|
|
||||||
|
|
||||||
# # Specific credentials per bucket and sub directory
|
|
||||||
# credentials = [
|
|
||||||
# {
|
|
||||||
# bucket: "my-bucket"
|
|
||||||
# subdir: "path/in/bucket" # Not required
|
|
||||||
# project: "clearml"
|
|
||||||
# credentials_json: "/path/to/credentials.json"
|
|
||||||
# },
|
|
||||||
# ]
|
|
||||||
}
|
|
||||||
azure.storage {
|
|
||||||
# containers: [
|
|
||||||
# {
|
|
||||||
# account_name: "clearml"
|
|
||||||
# account_key: "secret"
|
|
||||||
# # container_name:
|
|
||||||
# }
|
|
||||||
# ]
|
|
||||||
}
|
|
||||||
|
|
||||||
log {
|
|
||||||
# debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
|
|
||||||
null_log_propagate: false
|
|
||||||
task_log_buffer_capacity: 66
|
|
||||||
|
|
||||||
# disable urllib info and lower levels
|
|
||||||
disable_urllib3_info: true
|
|
||||||
}
|
|
||||||
|
|
||||||
development {
|
|
||||||
# Development-mode options
|
|
||||||
|
|
||||||
# dev task reuse window
|
|
||||||
task_reuse_time_window_in_hours: 72.0
|
|
||||||
|
|
||||||
# Run VCS repository detection asynchronously
|
|
||||||
vcs_repo_detect_async: true
|
|
||||||
|
|
||||||
# Store uncommitted git/hg source code diff in experiment manifest when training in development mode
|
|
||||||
# This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
|
|
||||||
store_uncommitted_code_diff: true
|
|
||||||
|
|
||||||
# Support stopping an experiment in case it was externally stopped, status was changed or task was reset
|
|
||||||
support_stopping: true
|
|
||||||
|
|
||||||
# Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead.
|
|
||||||
default_output_uri: ""
|
|
||||||
|
|
||||||
# Default auto generated requirements optimize for smaller requirements
|
|
||||||
# If True, analyze the entire repository regardless of the entry point.
|
|
||||||
# If False, first analyze the entry point script, if it does not contain other to local files,
|
|
||||||
# do not analyze the entire repository.
|
|
||||||
force_analyze_entire_repo: false
|
|
||||||
|
|
||||||
# If set to true, *clearml* update message will not be printed to the console
|
|
||||||
# this value can be overwritten with os environment variable CLEARML_SUPPRESS_UPDATE_MESSAGE=1
|
|
||||||
suppress_update_message: false
|
|
||||||
|
|
||||||
# If this flag is true (default is false), instead of analyzing the code with Pigar, analyze with `pip freeze`
|
|
||||||
detect_with_pip_freeze: false
|
|
||||||
|
|
||||||
# Development mode worker
|
|
||||||
worker {
|
|
||||||
# Status report period in seconds
|
|
||||||
report_period_sec: 2
|
|
||||||
|
|
||||||
# ping to the server - check connectivity
|
|
||||||
ping_period_sec: 30
|
|
||||||
|
|
||||||
# Log all stdout & stderr
|
|
||||||
log_stdout: true
|
|
||||||
|
|
||||||
# compatibility feature, report memory usage for the entire machine
|
|
||||||
# default (false), report only on the running process and its sub-processes
|
|
||||||
report_global_mem_used: false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,8 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: {{ include "clearml-agent.fullname" . }}
|
|
||||||
labels:
|
|
||||||
{{- include "clearml-agent.labels" . | nindent 4 }}
|
|
||||||
data:
|
|
||||||
{{ (.Files.Glob "files/*").AsConfig | indent 2 }}
|
|
@ -1,147 +0,0 @@
|
|||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: {{ include "clearml-agent.fullname" . }}
|
|
||||||
labels:
|
|
||||||
{{- include "clearml-agent.labels" . | nindent 4 }}
|
|
||||||
spec:
|
|
||||||
replicas: {{ .Values.replicaCount }}
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
{{- include "clearml-agent.selectorLabels" . | nindent 6 }}
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
annotations:
|
|
||||||
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
|
|
||||||
checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }}
|
|
||||||
{{- with .Values.podAnnotations }}
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
labels:
|
|
||||||
{{- include "clearml-agent.selectorLabels" . | nindent 8 }}
|
|
||||||
spec:
|
|
||||||
{{- with .Values.imagePullSecrets }}
|
|
||||||
imagePullSecrets:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
serviceAccountName: {{ include "clearml-agent.serviceAccountName" . }}
|
|
||||||
securityContext:
|
|
||||||
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
|
||||||
initContainers:
|
|
||||||
- name: copy-clearml-conf
|
|
||||||
securityContext:
|
|
||||||
{{- toYaml .Values.securityContext | nindent 12 }}
|
|
||||||
image: "alpine:3.15"
|
|
||||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
|
||||||
command:
|
|
||||||
- cp
|
|
||||||
args:
|
|
||||||
- /clearml/clearml.conf
|
|
||||||
- $(HOME_DIR)
|
|
||||||
env:
|
|
||||||
{{- if .Values.extraEnv }}
|
|
||||||
{{- toYaml .Values.extraEnv | nindent 12 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.env }}
|
|
||||||
{{- range $key, $val := .Values.env }}
|
|
||||||
- name: {{ $key }}
|
|
||||||
value: {{ $val | quote }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.envFromSecrets }}
|
|
||||||
{{- range $key, $val := .Values.envFromSecrets }}
|
|
||||||
- name: {{ $key }}
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: {{ .secretName }}
|
|
||||||
key: {{ .key }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
resources:
|
|
||||||
{{- toYaml .Values.resources | nindent 12 }}
|
|
||||||
volumeMounts:
|
|
||||||
- name: home-dir
|
|
||||||
mountPath: "{{ required ".Values.env.HOME_DIR" .Values.env.HOME_DIR }}"
|
|
||||||
- name: clearml-conf
|
|
||||||
mountPath: /clearml
|
|
||||||
containers:
|
|
||||||
- name: {{ .Chart.Name }}
|
|
||||||
securityContext:
|
|
||||||
{{- toYaml .Values.securityContext | nindent 12 }}
|
|
||||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
|
||||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
|
||||||
{{- if .Values.workingDir }}
|
|
||||||
workingDir: {{ .Values.workingDir }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.command }}
|
|
||||||
command:
|
|
||||||
{{- range .Values.command }}
|
|
||||||
- {{ . }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.args }}
|
|
||||||
args:
|
|
||||||
{{- range .Values.args }}
|
|
||||||
- {{ . }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
env:
|
|
||||||
- name: CLEARML_API_ACCESS_KEY
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: {{ include "clearml-agent.fullname" . }}
|
|
||||||
key: api-access-key
|
|
||||||
- name: CLEARML_API_SECRET_KEY
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: {{ include "clearml-agent.fullname" . }}
|
|
||||||
key: api-secret-key
|
|
||||||
{{- if .Values.extraEnv }}
|
|
||||||
{{- toYaml .Values.extraEnv | nindent 12 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.env }}
|
|
||||||
{{- range $key, $val := .Values.env }}
|
|
||||||
- name: {{ $key }}
|
|
||||||
value: {{ $val | quote }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.envFromSecrets }}
|
|
||||||
{{- range $key, $val := .Values.envFromSecrets }}
|
|
||||||
- name: {{ $key }}
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: {{ .secretName }}
|
|
||||||
key: {{ .key }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
resources:
|
|
||||||
{{- toYaml .Values.resources | nindent 12 }}
|
|
||||||
volumeMounts:
|
|
||||||
- name: config
|
|
||||||
mountPath: "/config"
|
|
||||||
- name: home-dir
|
|
||||||
mountPath: "{{ required ".Values.env.HOME_DIR" .Values.env.HOME_DIR }}"
|
|
||||||
{{- with .Values.nodeSelector }}
|
|
||||||
nodeSelector:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- with .Values.affinity }}
|
|
||||||
affinity:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- with .Values.tolerations }}
|
|
||||||
tolerations:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
volumes:
|
|
||||||
- name: config
|
|
||||||
configMap:
|
|
||||||
name: {{ include "clearml-agent.fullname" . }}
|
|
||||||
- name: home-dir
|
|
||||||
emptyDir: {}
|
|
||||||
- name: clearml-conf
|
|
||||||
configMap:
|
|
||||||
name: {{ include "clearml-agent.fullname" . }}-clearml-conf
|
|
||||||
items:
|
|
||||||
- key: clearml.conf
|
|
||||||
path: clearml.conf
|
|
@ -1,13 +0,0 @@
|
|||||||
{{- if .Values.rbac.create -}}
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
{{- include "clearml-agent.labels" . | nindent 4 }}
|
|
||||||
name: {{ include "clearml-agent.fullname" . }}
|
|
||||||
namespace: {{ .Release.Namespace }}
|
|
||||||
rules:
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources: ["pods"]
|
|
||||||
verbs: ["*"]
|
|
||||||
{{- end }}
|
|
@ -1,17 +0,0 @@
|
|||||||
{{- if .Values.rbac.create -}}
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
{{- include "clearml-agent.labels" . | nindent 4 }}
|
|
||||||
name: {{ include "clearml-agent.fullname" . }}
|
|
||||||
namespace: {{ .Release.Namespace }}
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: {{ include "clearml-agent.fullname" . }}
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: {{ include "clearml-agent.serviceAccountName" . }}
|
|
||||||
namespace: {{ .Release.Namespace | quote }}
|
|
||||||
{{- end }}
|
|
@ -1,10 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: Secret
|
|
||||||
metadata:
|
|
||||||
name: {{ include "clearml-agent.fullname" . }}
|
|
||||||
labels:
|
|
||||||
{{- include "clearml-agent.labels" . | nindent 4 }}
|
|
||||||
type: Opaque
|
|
||||||
data:
|
|
||||||
api-access-key: "{{ required "A .Values.apiAccessKey entry required!" .Values.apiAccessKey | b64enc }}"
|
|
||||||
api-secret-key: "{{ required "A .Values.apiSecretKey entry required!" .Values.apiSecretKey | b64enc }}"
|
|
@ -1,12 +0,0 @@
|
|||||||
{{- if .Values.serviceAccount.create -}}
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
name: {{ include "clearml-agent.serviceAccountName" . }}
|
|
||||||
labels:
|
|
||||||
{{- include "clearml-agent.labels" . | nindent 4 }}
|
|
||||||
{{- with .Values.serviceAccount.annotations }}
|
|
||||||
annotations:
|
|
||||||
{{- toYaml . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
@ -1,96 +0,0 @@
|
|||||||
# Default values for clearml-agent.
|
|
||||||
# This is a YAML-formatted file.
|
|
||||||
# Declare variables to be passed into your templates.
|
|
||||||
|
|
||||||
replicaCount: 1
|
|
||||||
|
|
||||||
image:
|
|
||||||
repository: allegroai/clearml-agent-k8s
|
|
||||||
pullPolicy: IfNotPresent
|
|
||||||
# Overrides the image tag whose default is the chart appVersion.
|
|
||||||
tag: ""
|
|
||||||
|
|
||||||
imagePullSecrets: []
|
|
||||||
nameOverride: ""
|
|
||||||
fullnameOverride: ""
|
|
||||||
|
|
||||||
command:
|
|
||||||
- python3
|
|
||||||
args:
|
|
||||||
- k8s_glue_example.py
|
|
||||||
- --queue
|
|
||||||
- $(K8S_GLUE_QUEUE)
|
|
||||||
- --max-pods
|
|
||||||
- $(K8S_GLUE_MAX_PODS)
|
|
||||||
- --template-yaml
|
|
||||||
- $(K8S_GLUE_TEMPLATE_YAML)
|
|
||||||
- --namespace
|
|
||||||
- $(POD_NAMESPACE)
|
|
||||||
# - --pod-clearml-conf
|
|
||||||
# - $(K8S_GLUE_CLEARML_CONF)
|
|
||||||
|
|
||||||
env:
|
|
||||||
PYTHONUNBUFFERED: "1"
|
|
||||||
CLEARML_API_HOST: "http://10.0.0.10:8008"
|
|
||||||
CLEARML_WEB_HOST: "http://10.0.0.10:8080"
|
|
||||||
CLEARML_FILES_HOST: "http://10.0.0.10:8081"
|
|
||||||
K8S_GLUE_MAX_PODS: "2"
|
|
||||||
K8S_GLUE_QUEUE: "k8s-queue"
|
|
||||||
K8S_GLUE_TEMPLATE_YAML: "/config/pod-template.yaml"
|
|
||||||
# K8S_GLUE_CLEARML_CONF: "/root/clearml.conf"
|
|
||||||
CLEARML_WORKER_ID: "k8s-glue-agent"
|
|
||||||
CLEARML_AGENT_UPDATE_REPO: ""
|
|
||||||
FORCE_CLEARML_AGENT_REPO: ""
|
|
||||||
CLEARML_DOCKER_IMAGE: "nvidia/cuda:11.6.0-runtime-ubuntu20.04"
|
|
||||||
HOME_DIR: /root
|
|
||||||
|
|
||||||
envFromSecrets: {}
|
|
||||||
|
|
||||||
extraEnv:
|
|
||||||
- name: POD_NAMESPACE
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: metadata.namespace
|
|
||||||
|
|
||||||
serviceAccount:
|
|
||||||
# Specifies whether a service account should be created
|
|
||||||
create: true
|
|
||||||
# Annotations to add to the service account
|
|
||||||
annotations: {}
|
|
||||||
# The name of the service account to use.
|
|
||||||
# If not set and create is true, a name is generated using the fullname template
|
|
||||||
name: ""
|
|
||||||
|
|
||||||
podAnnotations: {}
|
|
||||||
|
|
||||||
podSecurityContext: {}
|
|
||||||
# fsGroup: 2000
|
|
||||||
|
|
||||||
securityContext: {}
|
|
||||||
# capabilities:
|
|
||||||
# drop:
|
|
||||||
# - ALL
|
|
||||||
# readOnlyRootFilesystem: true
|
|
||||||
# runAsNonRoot: true
|
|
||||||
# runAsUser: 1000
|
|
||||||
|
|
||||||
resources: {}
|
|
||||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
|
||||||
# choice for the user. This also increases chances charts run on environments with little
|
|
||||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
|
||||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
|
||||||
# limits:
|
|
||||||
# cpu: 100m
|
|
||||||
# memory: 128Mi
|
|
||||||
# requests:
|
|
||||||
# cpu: 100m
|
|
||||||
# memory: 128Mi
|
|
||||||
|
|
||||||
nodeSelector: {}
|
|
||||||
|
|
||||||
tolerations: []
|
|
||||||
|
|
||||||
affinity: {}
|
|
||||||
|
|
||||||
rbac:
|
|
||||||
create: true
|
|
Loading…
Reference in New Issue
Block a user