diff --git a/clearml_agent/commands/resolver.py b/clearml_agent/commands/resolver.py index e6b0f41..1e7aaed 100644 --- a/clearml_agent/commands/resolver.py +++ b/clearml_agent/commands/resolver.py @@ -13,19 +13,19 @@ from clearml_agent.helper.package.requirements import ( def resolve_default_container(session, task_id, container_config, ignore_match_rules=False): container_lookup = session.config.get('agent.default_docker.match_rules', None) if not session.check_min_api_version("2.13") or not container_lookup: - return container_config + return container_config, None # check backend support before sending any more requests (because they will fail and crash the Task) try: session.verify_feature_set('advanced') except ValueError: # ignoring matching rules only supported in higher tiers - return container_config + return container_config, None if ignore_match_rules: print("INFO: default docker command line override, ignoring default docker container match rules") # ignoring matching rules only supported in higher tiers - return container_config + return container_config, None result = session.send_request( service='tasks', @@ -42,7 +42,7 @@ def resolve_default_container(session, task_id, container_config, ignore_match_r try: task_info = result.json()['data']['tasks'][0] if result.ok else {} except (ValueError, TypeError): - return container_config + return container_config, None from clearml_agent.external.requirements_parser.requirement import Requirement @@ -173,8 +173,7 @@ def resolve_default_container(session, task_id, container_config, ignore_match_r container_config['update_back_task'] = update_back_task - print('INFO: Matching default container with rule:\n{}'.format(json.dumps(entry))) - return container_config + return container_config, entry - return container_config + return container_config, None diff --git a/clearml_agent/commands/worker.py b/clearml_agent/commands/worker.py index 22f0a5d..45237c6 100644 --- a/clearml_agent/commands/worker.py +++ b/clearml_agent/commands/worker.py @@ -392,6 +392,7 @@ def get_task_container(session, task_id, ignore_match_rules=False, allow_force_c """ Returns dict with Task docker container setup {container: '', arguments: '', setup_shell_script: ''} """ + raw_container = {} if session.check_min_api_version("2.13"): result = session.send_request( service='tasks', @@ -403,6 +404,7 @@ def get_task_container(session, task_id, ignore_match_rules=False, allow_force_c ) try: container = result.json()['data']['tasks'][0]['container'] if result.ok else {} + raw_container = copy(container or {}) if container.get('arguments'): container['arguments'] = shlex.split(str(container.get('arguments')).strip()) if container.get('image'): @@ -422,55 +424,70 @@ def get_task_container(session, task_id, ignore_match_rules=False, allow_force_c ) except (ValueError, TypeError): pass + raw_container = copy(container or {}) + if raw_container.get("arguments"): + raw_container["arguments"] = ' '.join(shlex.quote(x) for x in raw_container["arguments"]) no_default_container = not container or not container.get('image') if no_default_container or allow_force_container_rules and session.check_min_api_version("2.13"): - original_container = copy(container) or {} - updated_container = resolve_default_container( + original_container = copy(container or {}) + updated_container, entry = resolve_default_container( session=session, task_id=task_id, container_config=original_container, ignore_match_rules=ignore_match_rules and not no_default_container, ) - if no_default_container and not ignore_match_rules: + if entry and no_default_container and not ignore_match_rules: # if we do not have a default container image / args, use the defaults from the resolver container = updated_container - elif allow_force_container_rules and updated_container.get('force_container_rules'): + updated = True + elif entry and allow_force_container_rules and updated_container.get('force_container_rules'): # if we allow to force rules (and we have requested container) # and the 'force_container_rules' is turned on in the rule, then overwrite container container = updated_container + updated = True + else: + updated = False - # make sure we pop the new added fields - container.pop("force_container_rules", None) + if updated: + print('INFO: Updating Task Container with matched rule:\n{}'.format(json.dumps(entry or {}))) - # check if we need to update the Task based on the new matched container defaults or overrides - update_back_task = container.pop("update_back_task", None) or updated_container.pop("update_back_task", None) - if update_back_task: - # update back the task - print('INFO: Updating the Task with the selected container with rule') - try: - res = session.send_request( - service='tasks', action='edit', method=Request.def_method, - version='2.13', - json={ - "task": task_id, - "force": True, - "container": { - "image": container.get('image') or "", - "arguments": container.get('arguments') or "", - "setup_shell_script": container.get('setup_shell_script') or "", - } - }, - ) - if not res.ok: - raise Exception("failed setting runtime property") - except Exception as ex: - print("WARNING: failed setting container properties for task '{}': {}".format(task_id, ex)) + # make sure we pop the new added fields + container.pop("force_container_rules", None) - # make sure we preserve backwards compatibility with the expected entries types - if isinstance(container.get('arguments'), str): - container['arguments'] = shlex.split(str(container.get('arguments') or '').strip()) - if container.get('image'): - container['image'] = container.get('image').strip() + # check if we need to update the Task based on the new matched container defaults or overrides + update_back_task = container.pop("update_back_task", None) or updated_container.pop("update_back_task", None) + if update_back_task and container != original_container and session.check_min_api_version("2.13"): + # update back the task + print('INFO: Updating the Task with the selected container with rule') + try: + res = session.send_request( + service='tasks', action='edit', method=Request.def_method, + version='2.13', + json={ + "task": task_id, + "force": True, + "container": { + "image": str(container.get('image') or ""), + # fix if we are left with the list of arguments, take the original arguments text, + # because combining back the list is a bit off + "arguments": + str(raw_container.get('arguments') or "") + if isinstance(container.get('arguments'), (list, tuple)) + else str(container.get('arguments') or ""), + "setup_shell_script": str(container.get('setup_shell_script') or ""), + } + }, + ) + if not res.ok: + raise Exception("failed setting runtime property") + except Exception as ex: + print("WARNING: failed setting container properties for task '{}': {}".format(task_id, ex)) + + # make sure we preserve backwards compatibility with the expected entries types + if isinstance(container.get('arguments'), str): + container['arguments'] = shlex.split(str(container.get('arguments') or '').strip()) + if container.get('image'): + container['image'] = container.get('image').strip() return container