clearml-agent/docs/clearml.conf

# CLEARML-AGENT configuration file
api {
    api_server: https://demoapi.demo.clear.ml
    web_server: https://demoapp.demo.clear.ml
    files_server: https://demofiles.demo.clear.ml

    # Credentials are generated in the webapp, https://demoapp.demo.clear.ml/profile
    # Overridden with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY
    credentials {"access_key": "EGRTCO8JMSIGI6S39GTP43NFWXDQOW", "secret_key": "x!XTov_G-#vspE*Y(h$Anm&DIc5Ou-F)jsl$PdOyj5wG1&E!Z8"}

    # verify host ssl certificate, set to False only if you have a very good reason
    verify_certificate: True
}

agent {
    # Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)
    # leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)
    git_user=""
    git_pass=""
    # Limit credentials to a single domain, for example: github.com,
    # all other domains will use public access (no user/pass). Default: always send user/pass for any VCS domain
    git_host=""

    # Force GIT protocol to use SSH regardless of the git url (Assumes GIT user/pass are blank)
    force_git_ssh_protocol: false
    # Force a specific SSH port when converting http to ssh links (the domain is kept the same)
    # force_git_ssh_port: ""

    # unique name of this worker, if None, created based on hostname:process_id
    # Overridden with os environment: CLEARML_WORKER_NAME
    # worker_id: "clearml-agent-machine1:gpu0"
    worker_id: ""

    # worker name, replaces the hostname when creating a unique name for this worker
    # Overridden with os environment: CLEARML_WORKER_ID
    # worker_name: "clearml-agent-machine1"
    worker_name: ""

    # Set the python version to use when creating the virtual environment and launching the experiment
    # Example values: "/usr/bin/python3" or "/usr/local/bin/python3.6"
    # The default is the python executing the clearml_agent
    python_binary: ""

    # select python package manager:
    # currently supported pip and conda
    # poetry is used if pip selected and repository contains poetry.lock file
    package_manager: {
        # supported options: pip, conda, poetry
        type: pip,

        # specify pip version to use (examples "<20", "==19.3.1", "", empty string will install the latest version)
        # pip_version: "<20"

        # virtual environment inheres packages from system
        system_site_packages: false,
        # install with --upgrade
        force_upgrade: false,

        # additional artifact repositories to use when installing python packages
        # extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"]
        extra_index_url: []

        # additional conda channels to use when installing with conda package manager
        conda_channels: ["pytorch", "conda-forge", ]
        # conda_full_env_update: false
        # conda_env_as_base_docker: false

        # set the priority packages to be installed before the rest of the required packages
        # priority_packages: ["cython", "numpy", "setuptools", ]

        # set the optional priority packages to be installed before the rest of the required packages,
        # In case a package installation fails, the package will be ignored,
        # and the virtual environment process will continue
        # priority_optional_packages: ["pygobject", ]

        # set the post packages to be installed after all the rest of the required packages
        # post_packages: ["horovod", ]

        # set the optional post packages to be installed after all the rest of the required packages,
        # In case a package installation fails, the package will be ignored,
        # and the virtual environment process will continue
        # post_optional_packages: []

        # set to True to support torch nightly build installation,
        # notice: torch nightly builds are ephemeral and are deleted from time to time
        torch_nightly: false,
    },

    # target folder for virtual environments builds, created when executing experiment
    venvs_dir = ~/.clearml/venvs-builds

    # cached git clone folder
    vcs_cache: {
        enabled: true,
        path: ~/.clearml/vcs-cache
    },

    # use venv-update in order to accelerate python virtual environment building
    # Still in beta, turned off by default
    venv_update: {
        enabled: false,
    },

    # cached folder for specific python package download (mostly pytorch versions)
    pip_download_cache {
        enabled: true,
        path: ~/.clearml/pip-download-cache
    },

    translate_ssh: true,
    # reload configuration file every daemon execution
    reload_config: false,

    # pip cache folder mapped into docker, used for python package caching
    docker_pip_cache = ~/.clearml/pip-cache
    # apt cache folder mapped into docker, used for ubuntu package caching
    docker_apt_cache = ~/.clearml/apt-cache

    # optional arguments to pass to docker image
    # these are local for this agent and will not be updated in the experiment's docker_cmd section
    # extra_docker_arguments: ["--ipc=host", ]

    # optional shell script to run in docker when started before the experiment is started
    # extra_docker_shell_script: ["apt-get install -y bindfs", ]

    # set to true in order to force "docker pull" before running an experiment using a docker image.
    # This makes sure the docker image is updated.
    docker_force_pull: false

    default_docker: {
        # default docker image to use when running in docker mode
        image: "nvidia/cuda:10.1-runtime-ubuntu18.04"

        # optional arguments to pass to docker image
        # arguments: ["--ipc=host"]
    }

    # CUDA versions used for Conda setup & solving PyTorch wheel packages
    # it Should be detected automatically. Override with os environment CUDA_VERSION / CUDNN_VERSION
    # cuda_version: 10.1
    # cudnn_version: 7.6
}

sdk {
    # CLEARML - default SDK configuration

    storage {
        cache {
            # Defaults to system temp folder / cache
            default_base_dir: "~/.clearml/cache"
        }

        direct_access: [
            # Objects matching are considered to be available for direct access, i.e. they will not be downloaded
            # or cached, and any download request will return a direct reference.
            # Objects are specified in glob format, available for url and content_type.
            { url: "file://*" }  # file-urls are always directly referenced
        ]
    }

    metrics {
        # History size for debug files per metric/variant. For each metric/variant combination with an attached file
        # (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
        # X files are stored in the upload destination for each metric/variant combination.
        file_history_size: 100

        # Max history size for matplotlib imshow files per plot title.
        # File names for the uploaded images will be recycled in such a way that no more than
        # X images are stored in the upload destination for each matplotlib plot title.
        matplotlib_untitled_history_size: 100

        # Limit the number of digits after the dot in plot reporting (reducing plot report size)
        # plot_max_num_digits: 5

        # Settings for generated debug images
        images {
            format: JPEG
            quality: 87
            subsampling: 0
        }

        # Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to True, each series should have its own graph)
        tensorboard_single_series_per_graph: False
    }

    network {
        metrics {
            # Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
            # a specific iteration
            file_upload_threads: 4

            # Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
            # being sent for upload
            file_upload_starvation_warning_sec: 120
        }

        iteration {
            # Max number of retries when getting frames if the server returned an error (http code 500)
            max_retries_on_server_error: 5
            # Backoff factory for consecutive retry attempts.
            # SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
            retry_backoff_factor_sec: 10
        }
    }
    aws {
        s3 {
            # S3 credentials, used for read/write access by various SDK elements

            # default, used for any bucket not specified below
            key: ""
            secret: ""
            region: ""

            credentials: [
                # specifies key/secret credentials to use when handling s3 urls (read or write)
                # {
                #     bucket: "my-bucket-name"
                #     key: "my-access-key"
                #     secret: "my-secret-key"
                # },
                # {
                #     # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
                #     host: "my-minio-host:9000"
                #     key: "12345678"
                #     secret: "12345678"
                #     multipart: false
                #     secure: false
                # }
            ]
        }
        boto3 {
            pool_connections: 512
            max_multipart_concurrency: 16
        }
    }
    google.storage {
        # # Default project and credentials file
        # # Will be used when no bucket configuration is found
        # project: "clearml"
        # credentials_json: "/path/to/credentials.json"

        # # Specific credentials per bucket and sub directory
        # credentials = [
        #     {
        #         bucket: "my-bucket"
        #         subdir: "path/in/bucket" # Not required
        #         project: "clearml"
        #         credentials_json: "/path/to/credentials.json"
        #     },
        # ]
    }
    azure.storage {
        # containers: [
        #     {
        #         account_name: "clearml"
        #         account_key: "secret"
        #         # container_name:
        #     }
        # ]
    }

    log {
        # debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
        null_log_propagate: False
        task_log_buffer_capacity: 66

        # disable urllib info and lower levels
        disable_urllib3_info: True
    }

    development {
        # Development-mode options

        # dev task reuse window
        task_reuse_time_window_in_hours: 72.0

        # Run VCS repository detection asynchronously
        vcs_repo_detect_async: True

        # Store uncommitted git/hg source code diff in experiment manifest when training in development mode
        # This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
        store_uncommitted_code_diff_on_train: True

        # Support stopping an experiment in case it was externally stopped, status was changed or task was reset
        support_stopping: True

        # Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead.
        default_output_uri: ""

        # Development mode worker
        worker {
            # Status report period in seconds
            report_period_sec: 2

            # ping to the server - check connectivity
            ping_period_sec: 30

            # Log all stdout & stderr
            log_stdout: True
        }
    }
}
initial clearml-agent v0.17.0 2020-12-22 21:00:57 +00:00			`# CLEARML-AGENT configuration file`
			`api {`
			`api_server: https://demoapi.demo.clear.ml`
			`web_server: https://demoapp.demo.clear.ml`
			`files_server: https://demofiles.demo.clear.ml`

			`# Credentials are generated in the webapp, https://demoapp.demo.clear.ml/profile`
			`# Overridden with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY`
			`credentials {"access_key": "EGRTCO8JMSIGI6S39GTP43NFWXDQOW", "secret_key": "x!XTov_G-#vspE*Y(h$Anm&DIc5Ou-F)jsl$PdOyj5wG1&E!Z8"}`

			`# verify host ssl certificate, set to False only if you have a very good reason`
			`verify_certificate: True`
			`}`

			`agent {`
			`# Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)`
			`# leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)`
			`git_user=""`
			`git_pass=""`
			`# Limit credentials to a single domain, for example: github.com,`
			`# all other domains will use public access (no user/pass). Default: always send user/pass for any VCS domain`
			`git_host=""`

			`# Force GIT protocol to use SSH regardless of the git url (Assumes GIT user/pass are blank)`
			`force_git_ssh_protocol: false`
			`# Force a specific SSH port when converting http to ssh links (the domain is kept the same)`
			`# force_git_ssh_port: ""`

			`# unique name of this worker, if None, created based on hostname:process_id`
			`# Overridden with os environment: CLEARML_WORKER_NAME`
			`# worker_id: "clearml-agent-machine1:gpu0"`
			`worker_id: ""`

			`# worker name, replaces the hostname when creating a unique name for this worker`
			`# Overridden with os environment: CLEARML_WORKER_ID`
			`# worker_name: "clearml-agent-machine1"`
			`worker_name: ""`

			`# Set the python version to use when creating the virtual environment and launching the experiment`
			`# Example values: "/usr/bin/python3" or "/usr/local/bin/python3.6"`
			`# The default is the python executing the clearml_agent`
			`python_binary: ""`

			`# select python package manager:`
			`# currently supported pip and conda`
			`# poetry is used if pip selected and repository contains poetry.lock file`
			`package_manager: {`
			`# supported options: pip, conda, poetry`
			`type: pip,`

			`# specify pip version to use (examples "<20", "==19.3.1", "", empty string will install the latest version)`
			`# pip_version: "<20"`

			`# virtual environment inheres packages from system`
			`system_site_packages: false,`
			`# install with --upgrade`
			`force_upgrade: false,`

			`# additional artifact repositories to use when installing python packages`
			`# extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"]`
			`extra_index_url: []`

			`# additional conda channels to use when installing with conda package manager`
			`conda_channels: ["pytorch", "conda-forge", ]`
			`# conda_full_env_update: false`
			`# conda_env_as_base_docker: false`

			`# set the priority packages to be installed before the rest of the required packages`
			`# priority_packages: ["cython", "numpy", "setuptools", ]`

			`# set the optional priority packages to be installed before the rest of the required packages,`
			`# In case a package installation fails, the package will be ignored,`
			`# and the virtual environment process will continue`
			`# priority_optional_packages: ["pygobject", ]`

			`# set the post packages to be installed after all the rest of the required packages`
			`# post_packages: ["horovod", ]`

			`# set the optional post packages to be installed after all the rest of the required packages,`
			`# In case a package installation fails, the package will be ignored,`
			`# and the virtual environment process will continue`
			`# post_optional_packages: []`

			`# set to True to support torch nightly build installation,`
			`# notice: torch nightly builds are ephemeral and are deleted from time to time`
			`torch_nightly: false,`
			`},`

			`# target folder for virtual environments builds, created when executing experiment`
			`venvs_dir = ~/.clearml/venvs-builds`

			`# cached git clone folder`
			`vcs_cache: {`
			`enabled: true,`
			`path: ~/.clearml/vcs-cache`
			`},`

			`# use venv-update in order to accelerate python virtual environment building`
			`# Still in beta, turned off by default`
			`venv_update: {`
			`enabled: false,`
			`},`

			`# cached folder for specific python package download (mostly pytorch versions)`
			`pip_download_cache {`
			`enabled: true,`
			`path: ~/.clearml/pip-download-cache`
			`},`

			`translate_ssh: true,`
			`# reload configuration file every daemon execution`
			`reload_config: false,`

			`# pip cache folder mapped into docker, used for python package caching`
			`docker_pip_cache = ~/.clearml/pip-cache`
			`# apt cache folder mapped into docker, used for ubuntu package caching`
			`docker_apt_cache = ~/.clearml/apt-cache`

			`# optional arguments to pass to docker image`
			`# these are local for this agent and will not be updated in the experiment's docker_cmd section`
			`# extra_docker_arguments: ["--ipc=host", ]`

			`# optional shell script to run in docker when started before the experiment is started`
			`# extra_docker_shell_script: ["apt-get install -y bindfs", ]`

			`# set to true in order to force "docker pull" before running an experiment using a docker image.`
			`# This makes sure the docker image is updated.`
			`docker_force_pull: false`

			`default_docker: {`
			`# default docker image to use when running in docker mode`
			`image: "nvidia/cuda:10.1-runtime-ubuntu18.04"`

			`# optional arguments to pass to docker image`
			`# arguments: ["--ipc=host"]`
			`}`

			`# CUDA versions used for Conda setup & solving PyTorch wheel packages`
			`# it Should be detected automatically. Override with os environment CUDA_VERSION / CUDNN_VERSION`
			`# cuda_version: 10.1`
			`# cudnn_version: 7.6`
			`}`

			`sdk {`
			`# CLEARML - default SDK configuration`

			`storage {`
			`cache {`
			`# Defaults to system temp folder / cache`
			`default_base_dir: "~/.clearml/cache"`
			`}`

			`direct_access: [`
			`# Objects matching are considered to be available for direct access, i.e. they will not be downloaded`
			`# or cached, and any download request will return a direct reference.`
			`# Objects are specified in glob format, available for url and content_type.`
			`{ url: "file://*" } # file-urls are always directly referenced`
			`]`
			`}`

			`metrics {`
			`# History size for debug files per metric/variant. For each metric/variant combination with an attached file`
			`# (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than`
			`# X files are stored in the upload destination for each metric/variant combination.`
			`file_history_size: 100`

			`# Max history size for matplotlib imshow files per plot title.`
			`# File names for the uploaded images will be recycled in such a way that no more than`
			`# X images are stored in the upload destination for each matplotlib plot title.`
			`matplotlib_untitled_history_size: 100`

			`# Limit the number of digits after the dot in plot reporting (reducing plot report size)`
			`# plot_max_num_digits: 5`

			`# Settings for generated debug images`
			`images {`
			`format: JPEG`
			`quality: 87`
			`subsampling: 0`
			`}`

			`# Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to True, each series should have its own graph)`
			`tensorboard_single_series_per_graph: False`
			`}`

			`network {`
			`metrics {`
			`# Number of threads allocated to uploading files (typically debug images) when transmitting metrics for`
			`# a specific iteration`
			`file_upload_threads: 4`

			`# Warn about upload starvation if no uploads were made in specified period while file-bearing events keep`
			`# being sent for upload`
			`file_upload_starvation_warning_sec: 120`
			`}`

			`iteration {`
			`# Max number of retries when getting frames if the server returned an error (http code 500)`
			`max_retries_on_server_error: 5`
			`# Backoff factory for consecutive retry attempts.`
			`# SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.`
			`retry_backoff_factor_sec: 10`
			`}`
			`}`
			`aws {`
			`s3 {`
			`# S3 credentials, used for read/write access by various SDK elements`

			`# default, used for any bucket not specified below`
			`key: ""`
			`secret: ""`
			`region: ""`

			`credentials: [`
			`# specifies key/secret credentials to use when handling s3 urls (read or write)`
			`# {`
			`# bucket: "my-bucket-name"`
			`# key: "my-access-key"`
			`# secret: "my-secret-key"`
			`# },`
			`# {`
			`# # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)`
			`# host: "my-minio-host:9000"`
			`# key: "12345678"`
			`# secret: "12345678"`
			`# multipart: false`
			`# secure: false`
			`# }`
			`]`
			`}`
			`boto3 {`
			`pool_connections: 512`
			`max_multipart_concurrency: 16`
			`}`
			`}`
			`google.storage {`
			`# # Default project and credentials file`
			`# # Will be used when no bucket configuration is found`
			`# project: "clearml"`
			`# credentials_json: "/path/to/credentials.json"`

			`# # Specific credentials per bucket and sub directory`
			`# credentials = [`
			`# {`
			`# bucket: "my-bucket"`
			`# subdir: "path/in/bucket" # Not required`
			`# project: "clearml"`
			`# credentials_json: "/path/to/credentials.json"`
			`# },`
			`# ]`
			`}`
			`azure.storage {`
			`# containers: [`
			`# {`
			`# account_name: "clearml"`
			`# account_key: "secret"`
			`# # container_name:`
			`# }`
			`# ]`
			`}`

			`log {`
			`# debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)`
			`null_log_propagate: False`
			`task_log_buffer_capacity: 66`

			`# disable urllib info and lower levels`
			`disable_urllib3_info: True`
			`}`

			`development {`
			`# Development-mode options`

			`# dev task reuse window`
			`task_reuse_time_window_in_hours: 72.0`

			`# Run VCS repository detection asynchronously`
			`vcs_repo_detect_async: True`

			`# Store uncommitted git/hg source code diff in experiment manifest when training in development mode`
			`# This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section`
			`store_uncommitted_code_diff_on_train: True`

			`# Support stopping an experiment in case it was externally stopped, status was changed or task was reset`
			`support_stopping: True`

			`# Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead.`
			`default_output_uri: ""`

			`# Development mode worker`
			`worker {`
			`# Status report period in seconds`
			`report_period_sec: 2`

			`# ping to the server - check connectivity`
			`ping_period_sec: 30`

			`# Log all stdout & stderr`
			`log_stdout: True`
			`}`
			`}`
			`}`