2019-06-10 17:00:28 +00:00
|
|
|
# TRAINS SDK configuration file
|
|
|
|
api {
|
2019-07-20 19:01:27 +00:00
|
|
|
# web_server on port 8080
|
|
|
|
web_server: "http://localhost:8080"
|
|
|
|
|
|
|
|
# Notice: 'api_server' is the api server (default port 8008), not the web server.
|
|
|
|
api_server: "http://localhost:8008"
|
|
|
|
|
|
|
|
# file server onport 8081
|
|
|
|
files_server: "http://localhost:8081"
|
2019-06-19 12:14:28 +00:00
|
|
|
|
2019-06-11 11:36:36 +00:00
|
|
|
# Credentials are generated in the webapp, http://localhost:8080/admin
|
|
|
|
credentials {"access_key": "EGRTCO8JMSIGI6S39GTP43NFWXDQOW", "secret_key": "x!XTov_G-#vspE*Y(h$Anm&DIc5Ou-F)jsl$PdOyj5wG1&E!Z8"}
|
2019-06-19 12:14:28 +00:00
|
|
|
|
|
|
|
# verify host ssl certificate, set to False only if you have a very good reason
|
|
|
|
verify_certificate: True
|
2019-06-10 17:00:28 +00:00
|
|
|
}
|
|
|
|
sdk {
|
|
|
|
# TRAINS - default SDK configuration
|
|
|
|
|
|
|
|
storage {
|
|
|
|
cache {
|
|
|
|
# Defaults to system temp folder / cache
|
|
|
|
default_base_dir: "~/.trains/cache"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
metrics {
|
|
|
|
# History size for debug files per metric/variant. For each metric/variant combination with an attached file
|
|
|
|
# (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
|
|
|
|
# X files are stored in the upload destination for each metric/variant combination.
|
|
|
|
file_history_size: 100
|
|
|
|
|
|
|
|
# Settings for generated debug images
|
|
|
|
images {
|
|
|
|
format: JPEG
|
|
|
|
quality: 87
|
|
|
|
subsampling: 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
network {
|
|
|
|
metrics {
|
|
|
|
# Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
|
|
|
|
# a specific iteration
|
|
|
|
file_upload_threads: 4
|
|
|
|
|
|
|
|
# Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
|
|
|
|
# being sent for upload
|
|
|
|
file_upload_starvation_warning_sec: 120
|
|
|
|
}
|
|
|
|
|
|
|
|
iteration {
|
|
|
|
# Max number of retries when getting frames if the server returned an error (http code 500)
|
|
|
|
max_retries_on_server_error: 5
|
|
|
|
# Backoff factory for consecutive retry attempts.
|
|
|
|
# SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
|
|
|
|
retry_backoff_factor_sec: 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
aws {
|
|
|
|
s3 {
|
|
|
|
# S3 credentials, used for read/write access by various SDK elements
|
|
|
|
|
|
|
|
# default, used for any bucket not specified below
|
|
|
|
key: ""
|
|
|
|
secret: ""
|
|
|
|
region: ""
|
|
|
|
|
|
|
|
credentials: [
|
|
|
|
# specifies key/secret credentials to use when handling s3 urls (read or write)
|
|
|
|
# {
|
|
|
|
# bucket: "my-bucket-name"
|
|
|
|
# key: "my-access-key"
|
|
|
|
# secret: "my-secret-key"
|
|
|
|
# },
|
|
|
|
# {
|
|
|
|
# # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
|
|
|
|
# host: "my-minio-host:9000"
|
|
|
|
# key: "12345678"
|
|
|
|
# secret: "12345678"
|
|
|
|
# multipart: false
|
|
|
|
# secure: false
|
|
|
|
# }
|
|
|
|
]
|
|
|
|
}
|
|
|
|
boto3 {
|
|
|
|
pool_connections: 512
|
|
|
|
max_multipart_concurrency: 16
|
|
|
|
}
|
|
|
|
}
|
|
|
|
google.storage {
|
|
|
|
# # Default project and credentials file
|
|
|
|
# # Will be used when no bucket configuration is found
|
|
|
|
# project: "trains"
|
|
|
|
# credentials_json: "/path/to/credentials.json"
|
|
|
|
|
|
|
|
# # Specific credentials per bucket and sub directory
|
|
|
|
# credentials = [
|
|
|
|
# {
|
|
|
|
# bucket: "my-bucket"
|
|
|
|
# subdir: "path/in/bucket" # Not required
|
|
|
|
# project: "trains"
|
|
|
|
# credentials_json: "/path/to/credentials.json"
|
|
|
|
# },
|
|
|
|
# ]
|
|
|
|
}
|
|
|
|
|
|
|
|
log {
|
|
|
|
# debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
|
|
|
|
null_log_propagate: False
|
|
|
|
task_log_buffer_capacity: 66
|
|
|
|
|
|
|
|
# disable urllib info and lower levels
|
|
|
|
disable_urllib3_info: True
|
|
|
|
}
|
|
|
|
|
|
|
|
development {
|
|
|
|
# Development-mode options
|
|
|
|
|
|
|
|
# dev task reuse window
|
|
|
|
task_reuse_time_window_in_hours: 72.0
|
|
|
|
|
|
|
|
# Run VCS repository detection asynchronously
|
2019-07-06 20:02:24 +00:00
|
|
|
vcs_repo_detect_async: True
|
2019-06-10 17:00:28 +00:00
|
|
|
|
|
|
|
# Store uncommitted git/hg source code diff in experiment manifest when training in development mode
|
|
|
|
# This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
|
|
|
|
store_uncommitted_code_diff_on_train: True
|
|
|
|
|
|
|
|
# Support stopping an experiment in case it was externally stopped, status was changed or task was reset
|
|
|
|
support_stopping: True
|
|
|
|
|
|
|
|
# Development mode worker
|
|
|
|
worker {
|
|
|
|
# Status report period in seconds
|
|
|
|
report_period_sec: 2
|
|
|
|
|
|
|
|
# Log all stdout & stderr
|
|
|
|
log_stdout: True
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|