Add support for "execute --docker" and for cloning an experiment before execution

2025-06-26 18:16:15 +00:00 · 2020-03-12 18:33:07 +02:00 · 2020-03-12 18:33:07 +02:00 · 757210d5b3
commit 757210d5b3
parent 00eb2f10ec
2 changed files with 45 additions and 13 deletions
--- a/trains_agent/commands/worker.py
+++ b/trains_agent/commands/worker.py
@ -663,9 +663,7 @@ class Worker(ServiceCommandSection):
        # print docker image
        if docker is not False and docker is not None:
            self._force_current_version = kwargs.get('force_current_version', False)
-            temp_config, docker_image_func = self.get_docker_config_cmd(docker)
+            self.set_docker_variables(docker)
            self.dump_config(temp_config)
            self.docker_image_func = docker_image_func
        else:
            self.dump_config()
@ -1050,6 +1048,8 @@ class Worker(ServiceCommandSection):
        require_queue=False,
        log_file=None,
        standalone_mode=None,
        docker=False,
        clone=False,
        **_
    ):
        if not task_id:
@ -1062,6 +1062,18 @@ class Worker(ServiceCommandSection):
        except Exception:
            raise ValueError("Could not find task id={}".format(task_id))
        if clone:
            try:
                print("Cloning task id={}".format(task_id))
                current_task = self._session.api_client.tasks.get_by_id(
                    self._session.send_api(
                        tasks_api.CloneRequest(task=current_task.id, new_task_name='Clone of {}'.format(current_task.name))
                    ).id
                )
                print("Task cloned, new task id={}".format(current_task.id))
            except Exception:
                raise CommandFailedError("Cloning failed")
        else:
            # make sure this task is not stuck in an execution queue, it shouldn't have been, but just in case.
            try:
                res = self._session.api_client.tasks.dequeue(task=current_task.id)
@ -1072,7 +1084,11 @@ class Worker(ServiceCommandSection):
                if require_queue:
                    raise
-        if full_monitoring:
+        if docker is not False and docker is not None:
            self.set_docker_variables(docker)
        # We expect the same behaviour in case full_monitoring was set, and in case docker mode is used
        if full_monitoring or docker is not False:
            worker_params = WorkerParams(
                log_level=log_level,
                config_file=self._session.config_file,
@ -1255,6 +1271,11 @@ class Worker(ServiceCommandSection):
        return 1 if exit_code is None else exit_code
    def set_docker_variables(self, docker):
        temp_config, docker_image_func = self.get_docker_config_cmd(docker)
        self.dump_config(temp_config)
        self.docker_image_func = docker_image_func
    def get_execution_info(self, current_task):
        # type: (...) -> ExecutionInfo
        try:
--- a/trains_agent/interface/worker.py
+++ b/trains_agent/interface/worker.py
@ -105,6 +105,17 @@ COMMANDS = {
                'help': 'Do not use any network connects, assume everything is pre-installed',
                'action': 'store_true',
            },
            '--docker': {
                'help': 'Run execution task inside a docker (v19.03 and above). Optional args <image> <arguments> or '
                        'specify default docker image in agent.default_docker.image / agent.default_docker.arguments'
                        'use --gpus/--cpu-only (or set NVIDIA_VISIBLE_DEVICES) to limit gpu visibility for docker',
                'nargs': '*',
                'default': False,
            },
            '--clone': {
                'help': 'Clone the experiment before execution, and execute the cloned experiment',
                'action': 'store_true',
            },
        }, **WORKER_ARGS),
    },
    'build': {