mirror of
https://github.com/clearml/clearml-agent
synced 2025-01-31 09:06:52 +00:00
465 lines
19 KiB
Plaintext
465 lines
19 KiB
Plaintext
# CLEARML-AGENT configuration file
|
|
api {
|
|
api_server: https://demoapi.demo.clear.ml
|
|
web_server: https://demoapp.demo.clear.ml
|
|
files_server: https://demofiles.demo.clear.ml
|
|
|
|
# Credentials are generated in the webapp, https://app.clear.ml/settings/workspace-configuration
|
|
# Overridden with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY
|
|
credentials {"access_key": "EGRTCO8JMSIGI6S39GTP43NFWXDQOW", "secret_key": "x!XTov_G-#vspE*Y(h$Anm&DIc5Ou-F)jsl$PdOyj5wG1&E!Z8"}
|
|
|
|
# verify host ssl certificate, set to False only if you have a very good reason
|
|
verify_certificate: True
|
|
}
|
|
|
|
agent {
|
|
# Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)
|
|
# leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)
|
|
# **Notice**: GitHub personal token is equivalent to password, you can put it directly into `git_pass`
|
|
# To learn how to generate git token GitHub/Bitbucket/GitLab:
|
|
# https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token
|
|
# https://support.atlassian.com/bitbucket-cloud/docs/app-passwords/
|
|
# https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html
|
|
git_user=""
|
|
git_pass=""
|
|
# Limit credentials to a single domain, for example: github.com,
|
|
# all other domains will use public access (no user/pass). Default: always send user/pass for any VCS domain
|
|
git_host=""
|
|
|
|
# Force GIT protocol to use SSH regardless of the git url (Assumes GIT user/pass are blank)
|
|
force_git_ssh_protocol: false
|
|
# Force a specific SSH port when converting http to ssh links (the domain is kept the same)
|
|
# force_git_ssh_port: 0
|
|
# Force a specific SSH username when converting http to ssh links (the default username is 'git')
|
|
# force_git_ssh_user: git
|
|
|
|
# unique name of this worker, if None, created based on hostname:process_id
|
|
# Overridden with os environment: CLEARML_WORKER_ID
|
|
# worker_id: "clearml-agent-machine1:gpu0"
|
|
worker_id: ""
|
|
|
|
# worker name, replaces the hostname when creating a unique name for this worker
|
|
# Overridden with os environment: CLEARML_WORKER_NAME
|
|
# worker_name: "clearml-agent-machine1"
|
|
worker_name: ""
|
|
|
|
# Set the python version to use when creating the virtual environment and launching the experiment
|
|
# Example values: "/usr/bin/python3" or "/usr/local/bin/python3.6"
|
|
# The default is the python executing the clearml_agent
|
|
python_binary: ""
|
|
# ignore any requested python version (Default: False, if a Task was using a
|
|
# specific python version and the system supports multiple python the agent will use the requested python version)
|
|
# ignore_requested_python_version: true
|
|
|
|
# select python package manager:
|
|
# currently supported: pip, conda and poetry
|
|
# if "pip" or "conda" are used, the agent installs the required packages
|
|
# based on the "installed packages" section of the Task. If the "installed packages" is empty,
|
|
# it will revert to using `requirements.txt` from the repository's root directory.
|
|
# If Poetry is selected and the root repository contains `poetry.lock` or `pyproject.toml`,
|
|
# the "installed packages" section is ignored, and poetry is used.
|
|
# If Poetry is selected and no lock file is found, it reverts to "pip" package manager behaviour.
|
|
package_manager: {
|
|
# supported options: pip, conda, poetry
|
|
type: pip,
|
|
|
|
# specify pip version to use (examples "<20", "==19.3.1", "", empty string will install the latest version)
|
|
# pip_version: "<20"
|
|
# specify poetry version to use (examples "<2", "==1.1.1", "", empty string will install the latest version)
|
|
# poetry_version: "<2",
|
|
|
|
# virtual environment inheres packages from system
|
|
system_site_packages: false,
|
|
# install with --upgrade
|
|
force_upgrade: false,
|
|
|
|
# additional artifact repositories to use when installing python packages
|
|
# extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"]
|
|
extra_index_url: []
|
|
|
|
# additional conda channels to use when installing with conda package manager
|
|
conda_channels: ["pytorch", "conda-forge", "defaults", ]
|
|
# conda_full_env_update: false
|
|
# conda_env_as_base_docker: false
|
|
|
|
# set the priority packages to be installed before the rest of the required packages
|
|
# priority_packages: ["cython", "numpy", "setuptools", ]
|
|
|
|
# set the optional priority packages to be installed before the rest of the required packages,
|
|
# In case a package installation fails, the package will be ignored,
|
|
# and the virtual environment process will continue
|
|
# priority_optional_packages: ["pygobject", ]
|
|
|
|
# set the post packages to be installed after all the rest of the required packages
|
|
# post_packages: ["horovod", ]
|
|
|
|
# set the optional post packages to be installed after all the rest of the required packages,
|
|
# In case a package installation fails, the package will be ignored,
|
|
# and the virtual environment process will continue
|
|
# post_optional_packages: []
|
|
|
|
# set to True to support torch nightly build installation,
|
|
# notice: torch nightly builds are ephemeral and are deleted from time to time
|
|
torch_nightly: false,
|
|
},
|
|
|
|
# target folder for virtual environments builds, created when executing experiment
|
|
venvs_dir = ~/.clearml/venvs-builds
|
|
|
|
# cached virtual environment folder
|
|
venvs_cache: {
|
|
# maximum number of cached venvs
|
|
max_entries: 10
|
|
# minimum required free space to allow for cache entry, disable by passing 0 or negative value
|
|
free_space_threshold_gb: 2.0
|
|
# unmark to enable virtual environment caching
|
|
# path: ~/.clearml/venvs-cache
|
|
},
|
|
|
|
# cached git clone folder
|
|
vcs_cache: {
|
|
enabled: true,
|
|
path: ~/.clearml/vcs-cache
|
|
},
|
|
|
|
# DEPRECATED: please use `venvs_cache` and set `venvs_cache.path`
|
|
# use venv-update in order to accelerate python virtual environment building
|
|
# Still in beta, turned off by default
|
|
# venv_update: {
|
|
# enabled: false,
|
|
# },
|
|
|
|
# cached folder for specific python package download (mostly pytorch versions)
|
|
pip_download_cache {
|
|
enabled: true,
|
|
path: ~/.clearml/pip-download-cache
|
|
},
|
|
|
|
translate_ssh: true,
|
|
# reload configuration file every daemon execution
|
|
reload_config: false,
|
|
|
|
# pip cache folder mapped into docker, used for python package caching
|
|
docker_pip_cache = ~/.clearml/pip-cache
|
|
# apt cache folder mapped into docker, used for ubuntu package caching
|
|
docker_apt_cache = ~/.clearml/apt-cache
|
|
|
|
# optional arguments to pass to docker image
|
|
# these are local for this agent and will not be updated in the experiment's docker_cmd section
|
|
# extra_docker_arguments: ["--ipc=host", "-v", "/mnt/host/data:/mnt/data"]
|
|
|
|
# optional shell script to run in docker when started before the experiment is started
|
|
# extra_docker_shell_script: ["apt-get install -y bindfs", ]
|
|
|
|
# Install the required packages for opencv libraries (libsm6 libxext6 libxrender-dev libglib2.0-0),
|
|
# for backwards compatibility reasons, true as default,
|
|
# change to false to skip installation and decrease docker spin up time
|
|
# docker_install_opencv_libs: true
|
|
|
|
# set to true in order to force "docker pull" before running an experiment using a docker image.
|
|
# This makes sure the docker image is updated.
|
|
docker_force_pull: false
|
|
|
|
default_docker: {
|
|
# default docker image to use when running in docker mode
|
|
image: "nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04"
|
|
|
|
# optional arguments to pass to docker image
|
|
# arguments: ["--ipc=host"]
|
|
|
|
# lookup table rules for default container
|
|
# first matched rule will be picked, according to rule order
|
|
# enterprise version only
|
|
# match_rules: [
|
|
# {
|
|
# image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
|
|
# arguments: "-e define=value"
|
|
# match: {
|
|
# script{
|
|
# # Optional: must match all requirements (not partial)
|
|
# requirements: {
|
|
# # version selection matching PEP-440
|
|
# pip: {
|
|
# tensorflow: "~=2.6"
|
|
# },
|
|
# }
|
|
# # Optional: matching based on regular expression, example: "^exact_match$"
|
|
# repository: "/my_repository/"
|
|
# branch: "main"
|
|
# binary: "python3.6"
|
|
# }
|
|
# # Optional: matching based on regular expression, example: "^exact_match$"
|
|
# project: "project/sub_project"
|
|
# }
|
|
# },
|
|
# {
|
|
# image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
|
|
# arguments: "-e define=value"
|
|
# match: {
|
|
# # must match all requirements (not partial)
|
|
# script{
|
|
# requirements: {
|
|
# conda: {
|
|
# torch: ">=2.6,<2.8"
|
|
# }
|
|
# }
|
|
# # no repository matching required
|
|
# repository: ""
|
|
# }
|
|
# # no container image matching required (allow to replace one requested container with another)
|
|
# container: ""
|
|
# # no repository matching required
|
|
# project: ""
|
|
# }
|
|
# },
|
|
# ]
|
|
}
|
|
|
|
# set the OS environments based on the Task's Environment section before launching the Task process.
|
|
enable_task_env: false
|
|
|
|
# CUDA versions used for Conda setup & solving PyTorch wheel packages
|
|
# it Should be detected automatically. Override with os environment CUDA_VERSION / CUDNN_VERSION
|
|
# cuda_version: 10.1
|
|
# cudnn_version: 7.6
|
|
|
|
# Hide docker environment variables containing secrets when printing out the docker command by replacing their
|
|
# values with "********". Turning this feature on will hide the following environment variables values:
|
|
# CLEARML_API_SECRET_KEY, CLEARML_AGENT_GIT_PASS, AWS_SECRET_ACCESS_KEY, AZURE_STORAGE_KEY
|
|
# To include more environment variables, add their keys to the "extra_keys" list. E.g. to make sure the value of
|
|
# your custom environment variable named MY_SPECIAL_PASSWORD will not show in the logs when included in the
|
|
# docker command, set:
|
|
# extra_keys: ["MY_SPECIAL_PASSWORD"]
|
|
hide_docker_command_env_vars {
|
|
enabled: true
|
|
extra_keys: []
|
|
parse_embedded_urls: true
|
|
}
|
|
|
|
# allow to set internal mount points inside the docker,
|
|
# especially useful for non-root docker container images.
|
|
# docker_internal_mounts {
|
|
# sdk_cache: "/clearml_agent_cache"
|
|
# apt_cache: "/var/cache/apt/archives"
|
|
# ssh_folder: "/root/.ssh"
|
|
# pip_cache: "/root/.cache/pip"
|
|
# poetry_cache: "/root/.cache/pypoetry"
|
|
# vcs_cache: "/root/.clearml/vcs-cache"
|
|
# venv_build: "~/.clearml/venvs-builds"
|
|
# pip_download: "/root/.clearml/pip-download-cache"
|
|
# }
|
|
|
|
# Name docker containers created by the daemon using the following string format (supported from Docker 0.6.5)
|
|
# Allowed variables are task_id, worker_id and rand_string (random lower-case letters string, up to 32 characters)
|
|
# Note: resulting name must start with an alphanumeric character and
|
|
# continue with alphanumeric characters, underscores (_), dots (.) and/or dashes (-)
|
|
# docker_container_name_format: "clearml-id-{task_id}-{rand_string:.8}"
|
|
}
|
|
|
|
sdk {
|
|
# CLEARML - default SDK configuration
|
|
|
|
storage {
|
|
cache {
|
|
# Defaults to system temp folder / cache
|
|
default_base_dir: "~/.clearml/cache"
|
|
}
|
|
|
|
direct_access: [
|
|
# Objects matching are considered to be available for direct access, i.e. they will not be downloaded
|
|
# or cached, and any download request will return a direct reference.
|
|
# Objects are specified in glob format, available for url and content_type.
|
|
{ url: "file://*" } # file-urls are always directly referenced
|
|
]
|
|
}
|
|
|
|
metrics {
|
|
# History size for debug files per metric/variant. For each metric/variant combination with an attached file
|
|
# (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
|
|
# X files are stored in the upload destination for each metric/variant combination.
|
|
file_history_size: 100
|
|
|
|
# Max history size for matplotlib imshow files per plot title.
|
|
# File names for the uploaded images will be recycled in such a way that no more than
|
|
# X images are stored in the upload destination for each matplotlib plot title.
|
|
matplotlib_untitled_history_size: 100
|
|
|
|
# Limit the number of digits after the dot in plot reporting (reducing plot report size)
|
|
# plot_max_num_digits: 5
|
|
|
|
# Settings for generated debug images
|
|
images {
|
|
format: JPEG
|
|
quality: 87
|
|
subsampling: 0
|
|
}
|
|
|
|
# Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to True, each series should have its own graph)
|
|
tensorboard_single_series_per_graph: False
|
|
}
|
|
|
|
network {
|
|
metrics {
|
|
# Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
|
|
# a specific iteration
|
|
file_upload_threads: 4
|
|
|
|
# Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
|
|
# being sent for upload
|
|
file_upload_starvation_warning_sec: 120
|
|
}
|
|
|
|
iteration {
|
|
# Max number of retries when getting frames if the server returned an error (http code 500)
|
|
max_retries_on_server_error: 5
|
|
# Backoff factory for consecutive retry attempts.
|
|
# SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
|
|
retry_backoff_factor_sec: 10
|
|
}
|
|
}
|
|
aws {
|
|
s3 {
|
|
# S3 credentials, used for read/write access by various SDK elements
|
|
|
|
# default, used for any bucket not specified below
|
|
key: ""
|
|
secret: ""
|
|
region: ""
|
|
# Or enable credentials chain to let Boto3 pick the right credentials.
|
|
# This includes picking credentials from environment variables,
|
|
# credential file and IAM role using metadata service.
|
|
# Refer to the latest Boto3 docs
|
|
use_credentials_chain: false
|
|
|
|
credentials: [
|
|
# specifies key/secret credentials to use when handling s3 urls (read or write)
|
|
# {
|
|
# bucket: "my-bucket-name"
|
|
# key: "my-access-key"
|
|
# secret: "my-secret-key"
|
|
# },
|
|
# {
|
|
# # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
|
|
# host: "my-minio-host:9000"
|
|
# key: "12345678"
|
|
# secret: "12345678"
|
|
# multipart: false
|
|
# secure: false
|
|
# verify: /path/to/ca/bundle.crt OR false to not verify
|
|
# }
|
|
]
|
|
}
|
|
boto3 {
|
|
pool_connections: 512
|
|
max_multipart_concurrency: 16
|
|
}
|
|
}
|
|
google.storage {
|
|
# # Default project and credentials file
|
|
# # Will be used when no bucket configuration is found
|
|
# project: "clearml"
|
|
# credentials_json: "/path/to/credentials.json"
|
|
|
|
# # Specific credentials per bucket and sub directory
|
|
# credentials = [
|
|
# {
|
|
# bucket: "my-bucket"
|
|
# subdir: "path/in/bucket" # Not required
|
|
# project: "clearml"
|
|
# credentials_json: "/path/to/credentials.json"
|
|
# },
|
|
# ]
|
|
}
|
|
azure.storage {
|
|
# containers: [
|
|
# {
|
|
# account_name: "clearml"
|
|
# account_key: "secret"
|
|
# # container_name:
|
|
# }
|
|
# ]
|
|
}
|
|
|
|
log {
|
|
# debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
|
|
null_log_propagate: False
|
|
task_log_buffer_capacity: 66
|
|
|
|
# disable urllib info and lower levels
|
|
disable_urllib3_info: True
|
|
}
|
|
|
|
development {
|
|
# Development-mode options
|
|
|
|
# dev task reuse window
|
|
task_reuse_time_window_in_hours: 72.0
|
|
|
|
# Run VCS repository detection asynchronously
|
|
vcs_repo_detect_async: True
|
|
|
|
# Store uncommitted git/hg source code diff in experiment manifest when training in development mode
|
|
# This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
|
|
store_uncommitted_code_diff_on_train: True
|
|
|
|
# Support stopping an experiment in case it was externally stopped, status was changed or task was reset
|
|
support_stopping: True
|
|
|
|
# Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead.
|
|
default_output_uri: ""
|
|
|
|
# Development mode worker
|
|
worker {
|
|
# Status report period in seconds
|
|
report_period_sec: 2
|
|
|
|
# ping to the server - check connectivity
|
|
ping_period_sec: 30
|
|
|
|
# Log all stdout & stderr
|
|
log_stdout: True
|
|
}
|
|
}
|
|
|
|
# Apply top-level environment section from configuration into os.environ
|
|
apply_environment: true
|
|
# Top-level environment section is in the form of:
|
|
# environment {
|
|
# key: value
|
|
# ...
|
|
# }
|
|
# and is applied to the OS environment as `key=value` for each key/value pair
|
|
|
|
# Apply top-level files section from configuration into local file system
|
|
apply_files: true
|
|
# Top-level files section allows auto-generating files at designated paths with a predefined contents
|
|
# and target format. Options include:
|
|
# contents: the target file's content, typically a string (or any base type int/float/list/dict etc.)
|
|
# format: a custom format for the contents. Currently supported value is `base64` to automatically decode a
|
|
# base64-encoded contents string, otherwise ignored
|
|
# path: the target file's path, may include ~ and inplace env vars
|
|
# target_format: format used to encode contents before writing into the target file. Supported values are json,
|
|
# yaml, yml and bytes (in which case the file will be written in binary mode). Default is text mode.
|
|
# overwrite: overwrite the target file in case it exists. Default is true.
|
|
#
|
|
# Example:
|
|
# files {
|
|
# myfile1 {
|
|
# contents: "The quick brown fox jumped over the lazy dog"
|
|
# path: "/tmp/fox.txt"
|
|
# }
|
|
# myjsonfile {
|
|
# contents: {
|
|
# some {
|
|
# nested {
|
|
# value: [1, 2, 3, 4]
|
|
# }
|
|
# }
|
|
# }
|
|
# path: "/tmp/test.json"
|
|
# target_format: json
|
|
# }
|
|
# }
|
|
}
|
|
|