Add support for "execute --docker" and for cloning an experiment before execution

This commit is contained in:
allegroai 2020-03-12 18:33:07 +02:00
parent 00eb2f10ec
commit 757210d5b3
2 changed files with 45 additions and 13 deletions

View File

@ -663,9 +663,7 @@ class Worker(ServiceCommandSection):
# print docker image
if docker is not False and docker is not None:
self._force_current_version = kwargs.get('force_current_version', False)
temp_config, docker_image_func = self.get_docker_config_cmd(docker)
self.dump_config(temp_config)
self.docker_image_func = docker_image_func
self.set_docker_variables(docker)
else:
self.dump_config()
@ -1050,6 +1048,8 @@ class Worker(ServiceCommandSection):
require_queue=False,
log_file=None,
standalone_mode=None,
docker=False,
clone=False,
**_
):
if not task_id:
@ -1062,17 +1062,33 @@ class Worker(ServiceCommandSection):
except Exception:
raise ValueError("Could not find task id={}".format(task_id))
# make sure this task is not stuck in an execution queue, it shouldn't have been, but just in case.
try:
res = self._session.api_client.tasks.dequeue(task=current_task.id)
if require_queue and res.meta.result_code != 200:
raise ValueError("Execution required enqueued task, "
"but task id={} is not queued.".format(current_task.id))
except Exception:
if require_queue:
raise
if clone:
try:
print("Cloning task id={}".format(task_id))
current_task = self._session.api_client.tasks.get_by_id(
self._session.send_api(
tasks_api.CloneRequest(task=current_task.id, new_task_name='Clone of {}'.format(current_task.name))
).id
)
print("Task cloned, new task id={}".format(current_task.id))
except Exception:
raise CommandFailedError("Cloning failed")
else:
# make sure this task is not stuck in an execution queue, it shouldn't have been, but just in case.
try:
res = self._session.api_client.tasks.dequeue(task=current_task.id)
if require_queue and res.meta.result_code != 200:
raise ValueError("Execution required enqueued task, "
"but task id={} is not queued.".format(current_task.id))
except Exception:
if require_queue:
raise
if full_monitoring:
if docker is not False and docker is not None:
self.set_docker_variables(docker)
# We expect the same behaviour in case full_monitoring was set, and in case docker mode is used
if full_monitoring or docker is not False:
worker_params = WorkerParams(
log_level=log_level,
config_file=self._session.config_file,
@ -1255,6 +1271,11 @@ class Worker(ServiceCommandSection):
return 1 if exit_code is None else exit_code
def set_docker_variables(self, docker):
temp_config, docker_image_func = self.get_docker_config_cmd(docker)
self.dump_config(temp_config)
self.docker_image_func = docker_image_func
def get_execution_info(self, current_task):
# type: (...) -> ExecutionInfo
try:

View File

@ -105,6 +105,17 @@ COMMANDS = {
'help': 'Do not use any network connects, assume everything is pre-installed',
'action': 'store_true',
},
'--docker': {
'help': 'Run execution task inside a docker (v19.03 and above). Optional args <image> <arguments> or '
'specify default docker image in agent.default_docker.image / agent.default_docker.arguments'
'use --gpus/--cpu-only (or set NVIDIA_VISIBLE_DEVICES) to limit gpu visibility for docker',
'nargs': '*',
'default': False,
},
'--clone': {
'help': 'Clone the experiment before execution, and execute the cloned experiment',
'action': 'store_true',
},
}, **WORKER_ARGS),
},
'build': {