# CLEARML-AGENT configuration file api { # Notice: 'host' is the api server (default port 8008), not the web server. api_server: "" web_server: "" files_server: "" # Override with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY credentials {"access_key": "", "secret_key": ""} } # Set GIT user/pass credentials # leave blank for GIT SSH credentials agent.git_user="" agent.git_pass="" # extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"] agent.package_manager.extra_index_url= [ ] agent { # unique name of this worker, if None, created based on hostname:process_id # Override with os environment: CLEARML_WORKER_ID # worker_id: "clearml-agent-machine1:gpu0" worker_id: "" # worker name, replaces the hostname when creating a unique name for this worker # Override with os environment: CLEARML_WORKER_NAME # worker_name: "clearml-agent-machine1" worker_name: "" # Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https) # leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol) # git_user: "" # git_pass: "" # git_host: "" # Force GIT protocol to use SSH regardless of the git url (Assumes GIT user/pass are blank) force_git_ssh_protocol: false # Force a specific SSH port when converting http to ssh links (the domain is kept the same) # force_git_ssh_port: 0 # Force a specific SSH username when converting http to ssh links (the default username is 'git') # force_git_ssh_user: git # Set the python version to use when creating the virtual environment and launching the experiment # Example values: "/usr/bin/python3" or "/usr/local/bin/python3.6" # The default is the python executing the clearml_agent python_binary: "" # ignore any requested python version (Default: False, if a Task was using a # specific python version and the system supports multiple python the agent will use the requested python version) # ignore_requested_python_version: true # select python package manager: # currently supported pip and conda # poetry is used if pip selected and repository contains poetry.lock file package_manager: { # supported options: pip, conda, poetry type: pip, # specify pip version to use (examples "<20.2", "==19.3.1", "", empty string will install the latest version) pip_version: ["<20.2 ; python_version < '3.10'", "<22.3 ; python_version >= '3.10'"], # virtual environment inheres packages from system system_site_packages: false, # install with --upgrade force_upgrade: false, # additional artifact repositories to use when installing python packages # extra_index_url: ["https://allegroai.jfrog.io/clearmlai/api/pypi/public/simple"] # additional conda channels to use when installing with conda package manager conda_channels: ["pytorch", "conda-forge", "defaults", ] # If set to true, Task's "installed packages" are ignored, # and the repository's "requirements.txt" is used instead # force_repo_requirements_txt: false # set the priority packages to be installed before the rest of the required packages # priority_packages: ["cython", "numpy", "setuptools", ] # set the optional priority packages to be installed before the rest of the required packages, # In case a package installation fails, the package will be ignored, # and the virtual environment process will continue # priority_optional_packages: ["pygobject", ] # set the post packages to be installed after all the rest of the required packages # post_packages: ["horovod", ] # set the optional post packages to be installed after all the rest of the required packages, # In case a package installation fails, the package will be ignored, # and the virtual environment process will continue # post_optional_packages: [] # set to True to support torch nightly build installation, # notice: torch nightly builds are ephemeral and are deleted from time to time torch_nightly: false, }, # target folder for virtual environments builds, created when executing experiment venvs_dir = ~/.clearml/venvs-builds # cached virtual environment folder venvs_cache: { # maximum number of cached venvs max_entries: 10 # minimum required free space to allow for cache entry, disable by passing 0 or negative value free_space_threshold_gb: 2.0 # unmark to enable virtual environment caching # path: ~/.clearml/venvs-cache }, # cached git clone folder vcs_cache: { enabled: true, path: ~/.clearml/vcs-cache }, # use venv-update in order to accelerate python virtual environment building # Still in beta, turned off by default venv_update: { enabled: false, }, # cached folder for specific python package download (used for pytorch package caching) pip_download_cache { enabled: true, path: ~/.clearml/pip-download-cache }, translate_ssh: true, # reload configuration file every daemon execution reload_config: false, # pip cache folder mapped into docker, used for python package caching docker_pip_cache = ~/.clearml/pip-cache # apt cache folder mapped into docker, used for ubuntu package caching docker_apt_cache = ~/.clearml/apt-cache # optional arguments to pass to docker image # these are local for this agent and will not be updated in the experiment's docker_cmd section # extra_docker_arguments: ["--ipc=host", ] # optional shell script to run in docker when started before the experiment is started # extra_docker_shell_script: ["apt-get install -y bindfs", ] # Install the required packages for opencv libraries (libsm6 libxext6 libxrender-dev libglib2.0-0), # for backwards compatibility reasons, true as default, # change to false to skip installation and decrease docker spin up time # docker_install_opencv_libs: true # optional uptime configuration, make sure to use only one of 'uptime/downtime' and not both. # If uptime is specified, agent will actively poll (and execute) tasks in the time-spans defined here. # Outside of the specified time-spans, the agent will be idle. # Defined using a list of items of the format: " ". # hours - use values 0-23, single values would count as start hour and end at midnight. # days - use days in abbreviated format (SUN-SAT) # use '-' for ranges and ',' to separate singular values. # for example, to enable the workers every Sunday and Tuesday between 17:00-20:00 set uptime to: # uptime: ["17-20 SUN,TUE"] # optional downtime configuration, can be used only when uptime is not used. # If downtime is specified, agent will be idle in the time-spans defined here. # Outside of the specified time-spans, the agent will actively poll (and execute) tasks. # Use the same format as described above for uptime # downtime: [] # set to true in order to force "docker pull" before running an experiment using a docker image. # This makes sure the docker image is updated. docker_force_pull: false default_docker: { # default docker image to use when running in docker mode image: "nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04" # optional arguments to pass to docker image # arguments: ["--ipc=host", ] } # set the OS environments based on the Task's Environment section before launching the Task process. enable_task_env: false # set the initial bash script to execute at the startup of any docker. # all lines will be executed regardless of their exit code. # {python_single_digit} is translated to 'python3' or 'python2' according to requested python version # docker_init_bash_script = [ # "echo 'Binary::apt::APT::Keep-Downloaded-Packages \"true\";' > /etc/apt/apt.conf.d/docker-clean", # "chown -R root /root/.cache/pip", # "apt-get update", # "apt-get install -y git libsm6 libxext6 libxrender-dev libglib2.0-0", # "(which {python_single_digit} && {python_single_digit} -m pip --version) || apt-get install -y {python_single_digit}-pip", # ] # set the preprocessing bash script to execute at the startup of any docker. # all lines will be executed regardless of their exit code. # docker_preprocess_bash_script = [ # "echo \"starting docker\"", #] # If False replace \r with \n and display full console output # default is True, report a single \r line in a sequence of consecutive lines, per 5 seconds. # suppress_carriage_return: true # cuda versions used for solving pytorch wheel packages # should be detected automatically. Override with os environment CUDA_VERSION / CUDNN_VERSION # cuda_version: 10.1 # cudnn_version: 7.6 # Hide docker environment variables containing secrets when printing out the docker command by replacing their # values with "********". Turning this feature on will hide the following environment variables values: # CLEARML_API_SECRET_KEY, CLEARML_AGENT_GIT_PASS, AWS_SECRET_ACCESS_KEY, AZURE_STORAGE_KEY # To include more environment variables, add their keys to the "extra_keys" list. E.g. to make sure the value of # your custom environment variable named MY_SPECIAL_PASSWORD will not show in the logs when included in the # docker command, set: # extra_keys: ["MY_SPECIAL_PASSWORD"] hide_docker_command_env_vars { enabled: true extra_keys: [] } } sdk { # ClearML - default SDK configuration storage { cache { # Defaults to /clearml_cache default_base_dir: "~/.clearml/cache" size { # max_used_bytes = -1 min_free_bytes = 10GB # cleanup_margin_percent = 5% } } direct_access: [ # Objects matching are considered to be available for direct access, i.e. they will not be downloaded # or cached, and any download request will return a direct reference. # Objects are specified in glob format, available for url and content_type. { url: "file://*" } # file-urls are always directly referenced ] } metrics { # History size for debug files per metric/variant. For each metric/variant combination with an attached file # (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than # X files are stored in the upload destination for each metric/variant combination. file_history_size: 100 # Max history size for matplotlib imshow files per plot title. # File names for the uploaded images will be recycled in such a way that no more than # X images are stored in the upload destination for each matplotlib plot title. matplotlib_untitled_history_size: 100 # Limit the number of digits after the dot in plot reporting (reducing plot report size) # plot_max_num_digits: 5 # Settings for generated debug images images { format: JPEG quality: 87 subsampling: 0 } # Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to true, each series should have its own graph) tensorboard_single_series_per_graph: false } network { metrics { # Number of threads allocated to uploading files (typically debug images) when transmitting metrics for # a specific iteration file_upload_threads: 4 # Warn about upload starvation if no uploads were made in specified period while file-bearing events keep # being sent for upload file_upload_starvation_warning_sec: 120 } iteration { # Max number of retries when getting frames if the server returned an error (http code 500) max_retries_on_server_error: 5 # Backoff factory for consecutive retry attempts. # SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries. retry_backoff_factor_sec: 10 } } aws { s3 { # S3 credentials, used for read/write access by various SDK elements # default, used for any bucket not specified below key: "" secret: "" region: "" credentials: [ # specifies key/secret credentials to use when handling s3 urls (read or write) # { # bucket: "my-bucket-name" # key: "my-access-key" # secret: "my-secret-key" # }, # { # # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket) # host: "my-minio-host:9000" # key: "12345678" # secret: "12345678" # multipart: false # secure: false # } ] } boto3 { pool_connections: 512 max_multipart_concurrency: 16 } } google.storage { # # Default project and credentials file # # Will be used when no bucket configuration is found # project: "clearml" # credentials_json: "/path/to/credentials.json" # # Specific credentials per bucket and sub directory # credentials = [ # { # bucket: "my-bucket" # subdir: "path/in/bucket" # Not required # project: "clearml" # credentials_json: "/path/to/credentials.json" # }, # ] } azure.storage { # containers: [ # { # account_name: "clearml" # account_key: "secret" # # container_name: # } # ] } log { # debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout) null_log_propagate: false task_log_buffer_capacity: 66 # disable urllib info and lower levels disable_urllib3_info: true } development { # Development-mode options # dev task reuse window task_reuse_time_window_in_hours: 72.0 # Run VCS repository detection asynchronously vcs_repo_detect_async: true # Store uncommitted git/hg source code diff in experiment manifest when training in development mode # This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section store_uncommitted_code_diff: true # Support stopping an experiment in case it was externally stopped, status was changed or task was reset support_stopping: true # Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead. default_output_uri: "" # Default auto generated requirements optimize for smaller requirements # If True, analyze the entire repository regardless of the entry point. # If False, first analyze the entry point script, if it does not contain other to local files, # do not analyze the entire repository. force_analyze_entire_repo: false # If set to true, *clearml* update message will not be printed to the console # this value can be overwritten with os environment variable CLEARML_SUPPRESS_UPDATE_MESSAGE=1 suppress_update_message: false # If this flag is true (default is false), instead of analyzing the code with Pigar, analyze with `pip freeze` detect_with_pip_freeze: false # Development mode worker worker { # Status report period in seconds report_period_sec: 2 # ping to the server - check connectivity ping_period_sec: 30 # Log all stdout & stderr log_stdout: true # compatibility feature, report memory usage for the entire machine # default (false), report only on the running process and its sub-processes report_global_mem_used: false } } }