mirror of
https://github.com/clearml/clearml
synced 2025-01-31 09:07:00 +00:00
62bc54d7be
Fix sub-process support Fix delete_after_upload option when uploading images Add logugu support Fix subsample plots if they are too big Fix requests for over 15mb
144 lines
4.8 KiB
Plaintext
144 lines
4.8 KiB
Plaintext
# TRAINS SDK configuration file
|
|
api {
|
|
# web_server on port 8080
|
|
web_server: "http://localhost:8080"
|
|
|
|
# Notice: 'api_server' is the api server (default port 8008), not the web server.
|
|
api_server: "http://localhost:8008"
|
|
|
|
# file server on port 8081
|
|
files_server: "http://localhost:8081"
|
|
|
|
# Credentials are generated in the webapp, http://localhost:8080/admin
|
|
credentials {"access_key": "EGRTCO8JMSIGI6S39GTP43NFWXDQOW", "secret_key": "x!XTov_G-#vspE*Y(h$Anm&DIc5Ou-F)jsl$PdOyj5wG1&E!Z8"}
|
|
|
|
# verify host ssl certificate, set to False only if you have a very good reason
|
|
verify_certificate: True
|
|
}
|
|
sdk {
|
|
# TRAINS - default SDK configuration
|
|
|
|
storage {
|
|
cache {
|
|
# Defaults to system temp folder / cache
|
|
default_base_dir: "~/.trains/cache"
|
|
}
|
|
}
|
|
|
|
metrics {
|
|
# History size for debug files per metric/variant. For each metric/variant combination with an attached file
|
|
# (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
|
|
# X files are stored in the upload destination for each metric/variant combination.
|
|
file_history_size: 100
|
|
|
|
# Settings for generated debug images
|
|
images {
|
|
format: JPEG
|
|
quality: 87
|
|
subsampling: 0
|
|
}
|
|
}
|
|
|
|
network {
|
|
metrics {
|
|
# Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
|
|
# a specific iteration
|
|
file_upload_threads: 4
|
|
|
|
# Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
|
|
# being sent for upload
|
|
file_upload_starvation_warning_sec: 120
|
|
}
|
|
|
|
iteration {
|
|
# Max number of retries when getting frames if the server returned an error (http code 500)
|
|
max_retries_on_server_error: 5
|
|
# Backoff factory for consecutive retry attempts.
|
|
# SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
|
|
retry_backoff_factor_sec: 10
|
|
}
|
|
}
|
|
aws {
|
|
s3 {
|
|
# S3 credentials, used for read/write access by various SDK elements
|
|
|
|
# default, used for any bucket not specified below
|
|
key: ""
|
|
secret: ""
|
|
region: ""
|
|
|
|
credentials: [
|
|
# specifies key/secret credentials to use when handling s3 urls (read or write)
|
|
# {
|
|
# bucket: "my-bucket-name"
|
|
# key: "my-access-key"
|
|
# secret: "my-secret-key"
|
|
# },
|
|
# {
|
|
# # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
|
|
# host: "my-minio-host:9000"
|
|
# key: "12345678"
|
|
# secret: "12345678"
|
|
# multipart: false
|
|
# secure: false
|
|
# }
|
|
]
|
|
}
|
|
boto3 {
|
|
pool_connections: 512
|
|
max_multipart_concurrency: 16
|
|
}
|
|
}
|
|
google.storage {
|
|
# # Default project and credentials file
|
|
# # Will be used when no bucket configuration is found
|
|
# project: "trains"
|
|
# credentials_json: "/path/to/credentials.json"
|
|
|
|
# # Specific credentials per bucket and sub directory
|
|
# credentials = [
|
|
# {
|
|
# bucket: "my-bucket"
|
|
# subdir: "path/in/bucket" # Not required
|
|
# project: "trains"
|
|
# credentials_json: "/path/to/credentials.json"
|
|
# },
|
|
# ]
|
|
}
|
|
|
|
log {
|
|
# debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
|
|
null_log_propagate: False
|
|
task_log_buffer_capacity: 66
|
|
|
|
# disable urllib info and lower levels
|
|
disable_urllib3_info: True
|
|
}
|
|
|
|
development {
|
|
# Development-mode options
|
|
|
|
# dev task reuse window
|
|
task_reuse_time_window_in_hours: 72.0
|
|
|
|
# Run VCS repository detection asynchronously
|
|
vcs_repo_detect_async: True
|
|
|
|
# Store uncommitted git/hg source code diff in experiment manifest when training in development mode
|
|
# This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
|
|
store_uncommitted_code_diff_on_train: True
|
|
|
|
# Support stopping an experiment in case it was externally stopped, status was changed or task was reset
|
|
support_stopping: True
|
|
|
|
# Development mode worker
|
|
worker {
|
|
# Status report period in seconds
|
|
report_period_sec: 2
|
|
|
|
# Log all stdout & stderr
|
|
log_stdout: True
|
|
}
|
|
}
|
|
}
|