mirror of
https://github.com/clearml/clearml-agent
synced 2025-02-07 05:19:17 +00:00
Add auto terminate, increased polling interval and default docker image in AWS dynamic cluster management service
This commit is contained in:
parent
facbee0005
commit
23668a403a
@ -166,7 +166,11 @@
|
||||
"# echo \"This is the second line\"\n",
|
||||
"# \"\"\"\n",
|
||||
"EXTRA_BASH_SCRIPT = \"\"\"\n",
|
||||
"\"\"\""
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Default docker for trains-agent when running in docker mode (requires docker v19.03 and above). \n",
|
||||
"# Leave empty to run trains-agent in non-docker mode.\n",
|
||||
"DEFAULT_DOCKER_IMAGE = \"nvidia/cuda\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -180,7 +184,8 @@
|
||||
"# maximum idle time in minutes, after which the instance will be shutdown\n",
|
||||
"MAX_IDLE_TIME_MIN = 15\n",
|
||||
"# polling interval in minutes\n",
|
||||
"POLLING_INTERVAL_MIN = 2.0"
|
||||
"# make sure to increase in case bash commands were added in EXTRA_BASH_SCRIPT\n",
|
||||
"POLLING_INTERVAL_MIN = 5.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -284,7 +289,9 @@
|
||||
" export TRAINS_API_SECRET_KEY='{secret_key}'\n",
|
||||
" screen\n",
|
||||
" {bash_script}\n",
|
||||
" python3 -m trains_agent --config-file '/root/trains.conf' daemon --queue '{queue}' --docker\"\"\".format(\n",
|
||||
" python3 -m trains_agent --config-file '/root/trains.conf' daemon --queue '{queue}' {docker}\n",
|
||||
" shutdown\n",
|
||||
" \"\"\".format(\n",
|
||||
" api_server=TRAINS_SERVER_API_SERVER,\n",
|
||||
" web_server=TRAINS_SERVER_WEB_SERVER,\n",
|
||||
" files_server=TRAINS_SERVER_FILES_SERVER,\n",
|
||||
@ -295,7 +302,8 @@
|
||||
" git_user=TRAINS_GIT_USER,\n",
|
||||
" git_pass=TRAINS_GIT_PASS,\n",
|
||||
" trains_conf=EXTRA_TRAINS_CONF_ENCODED,\n",
|
||||
" bash_script=EXTRA_BASH_SCRIPT\n",
|
||||
" bash_script=EXTRA_BASH_SCRIPT,\n",
|
||||
" docker=\"--docker '{}'\".format(DEFAULT_DOCKER_IMAGE) if DEFAULT_DOCKER_IMAGE else \"\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" ec2 = boto3.client(\n",
|
||||
@ -344,6 +352,7 @@
|
||||
" MaxCount=1,\n",
|
||||
" InstanceType=resource_conf[\"instance_type\"],\n",
|
||||
" UserData=user_data,\n",
|
||||
" InstanceInitiatedShutdownBehavior='terminate',\n",
|
||||
" BlockDeviceMappings=[\n",
|
||||
" {\n",
|
||||
" \"DeviceName\": resource_conf[\"ebs_device_name\"],\n",
|
||||
|
Loading…
Reference in New Issue
Block a user