mirror of
https://github.com/clearml/clearml-agent
synced 2025-02-07 13:26:08 +00:00
3ed63e2154
Fix default docker match rules resolver (used incorrect field "container" instead of "image") Remove "container" (image) match rule option from default docker image resolver
503 lines
22 KiB
Plaintext
503 lines
22 KiB
Plaintext
# CLEARML-AGENT configuration file
|
|
api {
|
|
api_server: https://demoapi.demo.clear.ml
|
|
web_server: https://demoapp.demo.clear.ml
|
|
files_server: https://demofiles.demo.clear.ml
|
|
|
|
# Credentials are generated in the webapp, https://app.clear.ml/settings/workspace-configuration
|
|
# Overridden with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY
|
|
credentials {"access_key": "EGRTCO8JMSIGI6S39GTP43NFWXDQOW", "secret_key": "x!XTov_G-#vspE*Y(h$Anm&DIc5Ou-F)jsl$PdOyj5wG1&E!Z8"}
|
|
|
|
# verify host ssl certificate, set to False only if you have a very good reason
|
|
verify_certificate: True
|
|
}
|
|
|
|
agent {
|
|
# unique name of this worker, if None, created based on hostname:process_id
|
|
# Override with os environment: CLEARML_WORKER_ID
|
|
# worker_id: "clearml-agent-machine1:gpu0"
|
|
worker_id: ""
|
|
|
|
# worker name, replaces the hostname when creating a unique name for this worker
|
|
# Override with os environment: CLEARML_WORKER_NAME
|
|
# worker_name: "clearml-agent-machine1"
|
|
worker_name: ""
|
|
# Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)
|
|
# leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)
|
|
# **Notice**: GitHub personal token is equivalent to password, you can put it directly into `git_pass`
|
|
# To learn how to generate git token GitHub/Bitbucket/GitLab:
|
|
# https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token
|
|
# https://support.atlassian.com/bitbucket-cloud/docs/app-passwords/
|
|
# https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html
|
|
# git_user: ""
|
|
# git_pass: ""
|
|
# Limit credentials to a single domain, for example: github.com,
|
|
# all other domains will use public access (no user/pass). Default: always send user/pass for any VCS domain
|
|
# git_host: ""
|
|
|
|
# Force GIT protocol to use SSH regardless of the git url (Assumes GIT user/pass are blank)
|
|
force_git_ssh_protocol: false
|
|
# Force a specific SSH port when converting http to ssh links (the domain is kept the same)
|
|
# force_git_ssh_port: 0
|
|
# Force a specific SSH username when converting http to ssh links (the default username is 'git')
|
|
# force_git_ssh_user: git
|
|
|
|
# Set the python version to use when creating the virtual environment and launching the experiment
|
|
# Example values: "/usr/bin/python3" or "/usr/local/bin/python3.6"
|
|
# The default is the python executing the clearml_agent
|
|
python_binary: ""
|
|
# ignore any requested python version (Default: False, if a Task was using a
|
|
# specific python version and the system supports multiple python the agent will use the requested python version)
|
|
# ignore_requested_python_version: true
|
|
|
|
# Force the root folder of the git repository (instead of the working directory) into the PYHTONPATH
|
|
# default false, only the working directory will be added to the PYHTONPATH
|
|
# force_git_root_python_path: false
|
|
|
|
# if set, use GIT_ASKPASS to pass user/pass when cloning / fetch repositories
|
|
# it solves passing user/token to git submodules.
|
|
# this is a safer way to ensure multiple users using the same repository will
|
|
# not accidentally leak credentials
|
|
# Only supported on Linux systems, it will be the default in future releases
|
|
# enable_git_ask_pass: false
|
|
|
|
# in docker mode, if container's entrypoint automatically activated a virtual environment
|
|
# use the activated virtual environment and install everything there
|
|
# set to False to disable, and always create a new venv inheriting from the system_site_packages
|
|
# docker_use_activated_venv: true
|
|
|
|
# select python package manager:
|
|
# currently supported: pip, conda and poetry
|
|
# if "pip" or "conda" are used, the agent installs the required packages
|
|
# based on the "installed packages" section of the Task. If the "installed packages" is empty,
|
|
# it will revert to using `requirements.txt` from the repository's root directory.
|
|
# If Poetry is selected and the root repository contains `poetry.lock` or `pyproject.toml`,
|
|
# the "installed packages" section is ignored, and poetry is used.
|
|
# If Poetry is selected and no lock file is found, it reverts to "pip" package manager behaviour.
|
|
package_manager: {
|
|
# supported options: pip, conda, poetry
|
|
type: pip,
|
|
|
|
# specify pip version to use (examples "<20.2", "==19.3.1", "", empty string will install the latest version)
|
|
# pip_version: ["<20.2 ; python_version < '3.10'", "<22.3 ; python_version >= '3.10'"]
|
|
# specify poetry version to use (examples "<2", "==1.1.1", "", empty string will install the latest version)
|
|
# poetry_version: "<2",
|
|
# poetry_install_extra_args: ["-v"]
|
|
|
|
# virtual environment inheres packages from system
|
|
system_site_packages: false,
|
|
# install with --upgrade
|
|
force_upgrade: false,
|
|
|
|
# additional artifact repositories to use when installing python packages
|
|
# extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"]
|
|
extra_index_url: []
|
|
|
|
# control the pytorch wheel resolving algorithm, options are: "pip", "direct"
|
|
# "pip" (default): would automatically detect the cuda version, and supply pip with the correct
|
|
# extra-index-url, based on pytorch.org tables
|
|
# "direct": would resolve a direct link to the pytorch wheel by parsing the pytorch.org pip repository
|
|
# and matching the automatically detected cuda version with the required pytorch wheel.
|
|
# if the exact cuda version is not found for the required pytorch wheel, it will try
|
|
# a lower cuda version until a match is found
|
|
#
|
|
# pytorch_resolve: "pip"
|
|
|
|
# additional conda channels to use when installing with conda package manager
|
|
conda_channels: ["pytorch", "conda-forge", "defaults", ]
|
|
# conda_full_env_update: false
|
|
# conda_env_as_base_docker: false
|
|
|
|
# set the priority packages to be installed before the rest of the required packages
|
|
# Note: this only controls the installation order of existing requirement packages (and does not add additional packages)
|
|
# priority_packages: ["cython", "numpy", "setuptools", ]
|
|
|
|
# set the optional priority packages to be installed before the rest of the required packages,
|
|
# In case a package installation fails, the package will be ignored,
|
|
# and the virtual environment process will continue
|
|
# Note: this only controls the installation order of existing requirement packages (and does not add additional packages)
|
|
# priority_optional_packages: ["pygobject", ]
|
|
|
|
# set the post packages to be installed after all the rest of the required packages
|
|
# Note: this only controls the installation order of existing requirement packages (and does not add additional packages)
|
|
# post_packages: ["horovod", ]
|
|
|
|
# set the optional post packages to be installed after all the rest of the required packages,
|
|
# In case a package installation fails, the package will be ignored,
|
|
# and the virtual environment process will continue
|
|
# Note: this only controls the installation order of existing requirement packages (and does not add additional packages)
|
|
# post_optional_packages: []
|
|
|
|
# set to True to support torch nightly build installation,
|
|
# notice: torch nightly builds are ephemeral and are deleted from time to time
|
|
torch_nightly: false,
|
|
},
|
|
|
|
# target folder for virtual environments builds, created when executing experiment
|
|
venvs_dir = ~/.clearml/venvs-builds
|
|
|
|
# cached virtual environment folder
|
|
venvs_cache: {
|
|
# maximum number of cached venvs
|
|
max_entries: 10
|
|
# minimum required free space to allow for cache entry, disable by passing 0 or negative value
|
|
free_space_threshold_gb: 2.0
|
|
# unmark to enable virtual environment caching
|
|
path: ~/.clearml/venvs-cache
|
|
},
|
|
|
|
# cached git clone folder
|
|
vcs_cache: {
|
|
enabled: true,
|
|
path: ~/.clearml/vcs-cache
|
|
},
|
|
|
|
# DEPRECATED: please use `venvs_cache` and set `venvs_cache.path`
|
|
# use venv-update in order to accelerate python virtual environment building
|
|
# Still in beta, turned off by default
|
|
# venv_update: {
|
|
# enabled: false,
|
|
# },
|
|
|
|
# cached folder for specific python package download (mostly pytorch versions)
|
|
pip_download_cache {
|
|
enabled: true,
|
|
path: ~/.clearml/pip-download-cache
|
|
},
|
|
|
|
translate_ssh: true,
|
|
|
|
# set "disable_ssh_mount: true" to disable the automatic mount of ~/.ssh folder into the docker containers
|
|
# default is false, automatically mounts ~/.ssh
|
|
# Must be set to True if using "clearml-session" with this agent!
|
|
# disable_ssh_mount: false
|
|
|
|
# reload configuration file every daemon execution
|
|
reload_config: false,
|
|
|
|
# pip cache folder mapped into docker, used for python package caching
|
|
docker_pip_cache = ~/.clearml/pip-cache
|
|
# apt cache folder mapped into docker, used for ubuntu package caching
|
|
docker_apt_cache = ~/.clearml/apt-cache
|
|
|
|
# optional arguments to pass to docker image
|
|
# these are local for this agent and will not be updated in the experiment's docker_cmd section
|
|
# extra_docker_arguments: ["--ipc=host", "-v", "/mnt/host/data:/mnt/data"]
|
|
|
|
# optional shell script to run in docker when started before the experiment is started
|
|
# extra_docker_shell_script: ["apt-get install -y bindfs", ]
|
|
|
|
# Install the required packages for opencv libraries (libsm6 libxext6 libxrender-dev libglib2.0-0),
|
|
# for backwards compatibility reasons, true as default,
|
|
# change to false to skip installation and decrease docker spin up time
|
|
# docker_install_opencv_libs: true
|
|
|
|
# set to true in order to force "docker pull" before running an experiment using a docker image.
|
|
# This makes sure the docker image is updated.
|
|
docker_force_pull: false
|
|
|
|
default_docker: {
|
|
# default docker image to use when running in docker mode
|
|
image: "nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04"
|
|
|
|
# optional arguments to pass to docker image
|
|
# arguments: ["--ipc=host"]
|
|
|
|
# lookup table rules for default container
|
|
# first matched rule will be picked, according to rule order
|
|
# enterprise version only
|
|
# match_rules: [
|
|
# {
|
|
# image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
|
|
# arguments: "-e define=value"
|
|
# match: {
|
|
# script{
|
|
# # Optional: must match all requirements (not partial)
|
|
# requirements: {
|
|
# # version selection matching PEP-440
|
|
# pip: {
|
|
# tensorflow: "~=2.6"
|
|
# },
|
|
# }
|
|
# # Optional: matching based on regular expression, example: "^exact_match$"
|
|
# repository: "/my_repository/"
|
|
# branch: "main"
|
|
# binary: "python3.6"
|
|
# }
|
|
# # Optional: matching based on regular expression, example: "^exact_match$"
|
|
# project: "project/sub_project"
|
|
# }
|
|
# },
|
|
# {
|
|
# image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
|
|
# arguments: "-e define=value"
|
|
# match: {
|
|
# # must match all requirements (not partial)
|
|
# script{
|
|
# requirements: {
|
|
# conda: {
|
|
# torch: ">=2.6,<2.8"
|
|
# }
|
|
# }
|
|
# # no repository matching required
|
|
# repository: ""
|
|
# }
|
|
# # no repository matching required
|
|
# project: ""
|
|
# }
|
|
# },
|
|
# ]
|
|
}
|
|
|
|
# set the OS environments based on the Task's Environment section before launching the Task process.
|
|
enable_task_env: false
|
|
|
|
# CUDA versions used for Conda setup & solving PyTorch wheel packages
|
|
# Should be detected automatically. Override with os environment CUDA_VERSION / CUDNN_VERSION
|
|
# cuda_version: 10.1
|
|
# cudnn_version: 7.6
|
|
|
|
# Hide docker environment variables containing secrets when printing out the docker command by replacing their
|
|
# values with "********". Turning this feature on will hide the following environment variables values:
|
|
# CLEARML_API_SECRET_KEY, CLEARML_AGENT_GIT_PASS, AWS_SECRET_ACCESS_KEY, AZURE_STORAGE_KEY
|
|
# To include more environment variables, add their keys to the "extra_keys" list. E.g. to make sure the value of
|
|
# your custom environment variable named MY_SPECIAL_PASSWORD will not show in the logs when included in the
|
|
# docker command, set:
|
|
# extra_keys: ["MY_SPECIAL_PASSWORD"]
|
|
hide_docker_command_env_vars {
|
|
enabled: true
|
|
extra_keys: []
|
|
parse_embedded_urls: true
|
|
}
|
|
|
|
# allow to set internal mount points inside the docker,
|
|
# especially useful for non-root docker container images.
|
|
# docker_internal_mounts {
|
|
# sdk_cache: "/clearml_agent_cache"
|
|
# apt_cache: "/var/cache/apt/archives"
|
|
# ssh_folder: "/root/.ssh"
|
|
# pip_cache: "/root/.cache/pip"
|
|
# poetry_cache: "/root/.cache/pypoetry"
|
|
# vcs_cache: "/root/.clearml/vcs-cache"
|
|
# venv_build: "~/.clearml/venvs-builds"
|
|
# pip_download: "/root/.clearml/pip-download-cache"
|
|
# }
|
|
|
|
# Name docker containers created by the daemon using the following string format (supported from Docker 0.6.5)
|
|
# Allowed variables are task_id, worker_id and rand_string (random lower-case letters string, up to 32 characters)
|
|
# Note: resulting name must start with an alphanumeric character and
|
|
# continue with alphanumeric characters, underscores (_), dots (.) and/or dashes (-)
|
|
# docker_container_name_format: "clearml-id-{task_id}-{rand_string:.8}"
|
|
}
|
|
|
|
sdk {
|
|
# CLEARML - default SDK configuration
|
|
|
|
storage {
|
|
cache {
|
|
# Defaults to system temp folder / cache
|
|
default_base_dir: "~/.clearml/cache"
|
|
}
|
|
|
|
direct_access: [
|
|
# Objects matching are considered to be available for direct access, i.e. they will not be downloaded
|
|
# or cached, and any download request will return a direct reference.
|
|
# Objects are specified in glob format, available for url and content_type.
|
|
{ url: "file://*" } # file-urls are always directly referenced
|
|
]
|
|
}
|
|
|
|
metrics {
|
|
# History size for debug files per metric/variant. For each metric/variant combination with an attached file
|
|
# (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
|
|
# X files are stored in the upload destination for each metric/variant combination.
|
|
file_history_size: 100
|
|
|
|
# Max history size for matplotlib imshow files per plot title.
|
|
# File names for the uploaded images will be recycled in such a way that no more than
|
|
# X images are stored in the upload destination for each matplotlib plot title.
|
|
matplotlib_untitled_history_size: 100
|
|
|
|
# Limit the number of digits after the dot in plot reporting (reducing plot report size)
|
|
# plot_max_num_digits: 5
|
|
|
|
# Settings for generated debug images
|
|
images {
|
|
format: JPEG
|
|
quality: 87
|
|
subsampling: 0
|
|
}
|
|
|
|
# Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to True, each series should have its own graph)
|
|
tensorboard_single_series_per_graph: False
|
|
}
|
|
|
|
network {
|
|
metrics {
|
|
# Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
|
|
# a specific iteration
|
|
file_upload_threads: 4
|
|
|
|
# Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
|
|
# being sent for upload
|
|
file_upload_starvation_warning_sec: 120
|
|
}
|
|
|
|
iteration {
|
|
# Max number of retries when getting frames if the server returned an error (http code 500)
|
|
max_retries_on_server_error: 5
|
|
# Backoff factory for consecutive retry attempts.
|
|
# SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
|
|
retry_backoff_factor_sec: 10
|
|
}
|
|
}
|
|
aws {
|
|
s3 {
|
|
# S3 credentials, used for read/write access by various SDK elements
|
|
|
|
# default, used for any bucket not specified below
|
|
key: ""
|
|
secret: ""
|
|
region: ""
|
|
# Or enable credentials chain to let Boto3 pick the right credentials.
|
|
# This includes picking credentials from environment variables,
|
|
# credential file and IAM role using metadata service.
|
|
# Refer to the latest Boto3 docs
|
|
use_credentials_chain: false
|
|
|
|
credentials: [
|
|
# specifies key/secret credentials to use when handling s3 urls (read or write)
|
|
# {
|
|
# bucket: "my-bucket-name"
|
|
# key: "my-access-key"
|
|
# secret: "my-secret-key"
|
|
# },
|
|
# {
|
|
# # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
|
|
# host: "my-minio-host:9000"
|
|
# key: "12345678"
|
|
# secret: "12345678"
|
|
# multipart: false
|
|
# secure: false
|
|
# verify: /path/to/ca/bundle.crt OR false to not verify
|
|
# }
|
|
]
|
|
}
|
|
boto3 {
|
|
pool_connections: 512
|
|
max_multipart_concurrency: 16
|
|
}
|
|
}
|
|
google.storage {
|
|
# # Default project and credentials file
|
|
# # Will be used when no bucket configuration is found
|
|
# project: "clearml"
|
|
# credentials_json: "/path/to/credentials.json"
|
|
|
|
# # Specific credentials per bucket and sub directory
|
|
# credentials = [
|
|
# {
|
|
# bucket: "my-bucket"
|
|
# subdir: "path/in/bucket" # Not required
|
|
# project: "clearml"
|
|
# credentials_json: "/path/to/credentials.json"
|
|
# },
|
|
# ]
|
|
}
|
|
azure.storage {
|
|
# containers: [
|
|
# {
|
|
# account_name: "clearml"
|
|
# account_key: "secret"
|
|
# # container_name:
|
|
# }
|
|
# ]
|
|
}
|
|
|
|
log {
|
|
# debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
|
|
null_log_propagate: False
|
|
task_log_buffer_capacity: 66
|
|
|
|
# disable urllib info and lower levels
|
|
disable_urllib3_info: True
|
|
}
|
|
|
|
development {
|
|
# Development-mode options
|
|
|
|
# dev task reuse window
|
|
task_reuse_time_window_in_hours: 72.0
|
|
|
|
# Run VCS repository detection asynchronously
|
|
vcs_repo_detect_async: True
|
|
|
|
# Store uncommitted git/hg source code diff in experiment manifest when training in development mode
|
|
# This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
|
|
store_uncommitted_code_diff_on_train: True
|
|
|
|
# Support stopping an experiment in case it was externally stopped, status was changed or task was reset
|
|
support_stopping: True
|
|
|
|
# Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead.
|
|
default_output_uri: ""
|
|
|
|
# Development mode worker
|
|
worker {
|
|
# Status report period in seconds
|
|
report_period_sec: 2
|
|
|
|
# ping to the server - check connectivity
|
|
ping_period_sec: 30
|
|
|
|
# Log all stdout & stderr
|
|
log_stdout: True
|
|
}
|
|
}
|
|
|
|
# Apply top-level environment section from configuration into os.environ
|
|
apply_environment: true
|
|
# Apply top-level files section from configuration into local file system
|
|
apply_files: true
|
|
}
|
|
|
|
# Environment section (top-level) is applied to the OS environment as `key=value` for each key/value pair
|
|
# * enable/disable with `agent.apply_environment` OR `sdk.apply_environment`
|
|
# Example:
|
|
#
|
|
# environment {
|
|
# key_a: value_a
|
|
# key_b: value_b
|
|
# }
|
|
|
|
# Files section (top-level) allows auto-generating files at designated paths with
|
|
# predefined content and target format.
|
|
# * enable/disable with `agent.apply_files` OR `sdk.apply_files`
|
|
# Files content options include:
|
|
# contents: the target file's content, typically a string (or any base type int/float/list/dict etc.)
|
|
# format: a custom format for the contents. Currently supported value is `base64` to automatically decode a
|
|
# base64-encoded contents string, otherwise ignored
|
|
# path: the target file's path, may include ~ and inplace env vars
|
|
# target_format: format used to encode contents before writing into the target file. Supported values are json,
|
|
# yaml, yml and bytes (in which case the file will be written in binary mode). Default is text mode.
|
|
# overwrite: overwrite the target file in case it exists. Default is true.
|
|
#
|
|
# Example:
|
|
# files {
|
|
# myfile1 {
|
|
# contents: "The quick brown fox jumped over the lazy dog"
|
|
# path: "/tmp/fox.txt"
|
|
# }
|
|
# myjsonfile {
|
|
# contents: {
|
|
# some {
|
|
# nested {
|
|
# value: [1, 2, 3, 4]
|
|
# }
|
|
# }
|
|
# }
|
|
# path: "/tmp/test.json"
|
|
# target_format: json
|
|
# }
|
|
# }
|