Fix container default arguments should never be a list

This commit is contained in:
clearml 2025-02-24 13:44:52 +02:00
parent d87521c36c
commit ee286e2fb7
2 changed files with 57 additions and 41 deletions

View File

@ -13,19 +13,19 @@ from clearml_agent.helper.package.requirements import (
def resolve_default_container(session, task_id, container_config, ignore_match_rules=False): def resolve_default_container(session, task_id, container_config, ignore_match_rules=False):
container_lookup = session.config.get('agent.default_docker.match_rules', None) container_lookup = session.config.get('agent.default_docker.match_rules', None)
if not session.check_min_api_version("2.13") or not container_lookup: if not session.check_min_api_version("2.13") or not container_lookup:
return container_config return container_config, None
# check backend support before sending any more requests (because they will fail and crash the Task) # check backend support before sending any more requests (because they will fail and crash the Task)
try: try:
session.verify_feature_set('advanced') session.verify_feature_set('advanced')
except ValueError: except ValueError:
# ignoring matching rules only supported in higher tiers # ignoring matching rules only supported in higher tiers
return container_config return container_config, None
if ignore_match_rules: if ignore_match_rules:
print("INFO: default docker command line override, ignoring default docker container match rules") print("INFO: default docker command line override, ignoring default docker container match rules")
# ignoring matching rules only supported in higher tiers # ignoring matching rules only supported in higher tiers
return container_config return container_config, None
result = session.send_request( result = session.send_request(
service='tasks', service='tasks',
@ -42,7 +42,7 @@ def resolve_default_container(session, task_id, container_config, ignore_match_r
try: try:
task_info = result.json()['data']['tasks'][0] if result.ok else {} task_info = result.json()['data']['tasks'][0] if result.ok else {}
except (ValueError, TypeError): except (ValueError, TypeError):
return container_config return container_config, None
from clearml_agent.external.requirements_parser.requirement import Requirement from clearml_agent.external.requirements_parser.requirement import Requirement
@ -173,8 +173,7 @@ def resolve_default_container(session, task_id, container_config, ignore_match_r
container_config['update_back_task'] = update_back_task container_config['update_back_task'] = update_back_task
print('INFO: Matching default container with rule:\n{}'.format(json.dumps(entry))) return container_config, entry
return container_config
return container_config return container_config, None

View File

@ -392,6 +392,7 @@ def get_task_container(session, task_id, ignore_match_rules=False, allow_force_c
""" """
Returns dict with Task docker container setup {container: '', arguments: '', setup_shell_script: ''} Returns dict with Task docker container setup {container: '', arguments: '', setup_shell_script: ''}
""" """
raw_container = {}
if session.check_min_api_version("2.13"): if session.check_min_api_version("2.13"):
result = session.send_request( result = session.send_request(
service='tasks', service='tasks',
@ -403,6 +404,7 @@ def get_task_container(session, task_id, ignore_match_rules=False, allow_force_c
) )
try: try:
container = result.json()['data']['tasks'][0]['container'] if result.ok else {} container = result.json()['data']['tasks'][0]['container'] if result.ok else {}
raw_container = copy(container or {})
if container.get('arguments'): if container.get('arguments'):
container['arguments'] = shlex.split(str(container.get('arguments')).strip()) container['arguments'] = shlex.split(str(container.get('arguments')).strip())
if container.get('image'): if container.get('image'):
@ -422,29 +424,39 @@ def get_task_container(session, task_id, ignore_match_rules=False, allow_force_c
) )
except (ValueError, TypeError): except (ValueError, TypeError):
pass pass
raw_container = copy(container or {})
if raw_container.get("arguments"):
raw_container["arguments"] = ' '.join(shlex.quote(x) for x in raw_container["arguments"])
no_default_container = not container or not container.get('image') no_default_container = not container or not container.get('image')
if no_default_container or allow_force_container_rules and session.check_min_api_version("2.13"): if no_default_container or allow_force_container_rules and session.check_min_api_version("2.13"):
original_container = copy(container) or {} original_container = copy(container or {})
updated_container = resolve_default_container( updated_container, entry = resolve_default_container(
session=session, task_id=task_id, session=session, task_id=task_id,
container_config=original_container, container_config=original_container,
ignore_match_rules=ignore_match_rules and not no_default_container, ignore_match_rules=ignore_match_rules and not no_default_container,
) )
if no_default_container and not ignore_match_rules: if entry and no_default_container and not ignore_match_rules:
# if we do not have a default container image / args, use the defaults from the resolver # if we do not have a default container image / args, use the defaults from the resolver
container = updated_container container = updated_container
elif allow_force_container_rules and updated_container.get('force_container_rules'): updated = True
elif entry and allow_force_container_rules and updated_container.get('force_container_rules'):
# if we allow to force rules (and we have requested container) # if we allow to force rules (and we have requested container)
# and the 'force_container_rules' is turned on in the rule, then overwrite container # and the 'force_container_rules' is turned on in the rule, then overwrite container
container = updated_container container = updated_container
updated = True
else:
updated = False
if updated:
print('INFO: Updating Task Container with matched rule:\n{}'.format(json.dumps(entry or {})))
# make sure we pop the new added fields # make sure we pop the new added fields
container.pop("force_container_rules", None) container.pop("force_container_rules", None)
# check if we need to update the Task based on the new matched container defaults or overrides # check if we need to update the Task based on the new matched container defaults or overrides
update_back_task = container.pop("update_back_task", None) or updated_container.pop("update_back_task", None) update_back_task = container.pop("update_back_task", None) or updated_container.pop("update_back_task", None)
if update_back_task: if update_back_task and container != original_container and session.check_min_api_version("2.13"):
# update back the task # update back the task
print('INFO: Updating the Task with the selected container with rule') print('INFO: Updating the Task with the selected container with rule')
try: try:
@ -455,9 +467,14 @@ def get_task_container(session, task_id, ignore_match_rules=False, allow_force_c
"task": task_id, "task": task_id,
"force": True, "force": True,
"container": { "container": {
"image": container.get('image') or "", "image": str(container.get('image') or ""),
"arguments": container.get('arguments') or "", # fix if we are left with the list of arguments, take the original arguments text,
"setup_shell_script": container.get('setup_shell_script') or "", # because combining back the list is a bit off
"arguments":
str(raw_container.get('arguments') or "")
if isinstance(container.get('arguments'), (list, tuple))
else str(container.get('arguments') or ""),
"setup_shell_script": str(container.get('setup_shell_script') or ""),
} }
}, },
) )