mirror of
https://github.com/clearml/clearml-agent
synced 2025-05-28 08:58:28 +00:00
Add support for container rulebook overrides ('force_container_rules: true') and container rulebook task update ('update_back_task: true').
This addition allows users to override container arguments forcefully based on the tasks properties (repo, tags, project, user etc.), as well as offer additional defaults based on python required packages or python versions
This commit is contained in:
parent
8887453328
commit
d87521c36c
@ -236,20 +236,35 @@
|
||||
docker_force_pull: false
|
||||
|
||||
default_docker: {
|
||||
# default docker image to use when running in docker mode
|
||||
image: "nvidia/cuda:11.0.3-cudnn8-runtime-ubuntu20.04"
|
||||
# default container image to use when running in docker mode
|
||||
image: "nvidia/cuda:12.6.3-cudnn-runtime-ubuntu20.04"
|
||||
|
||||
# optional arguments to pass to docker image
|
||||
# arguments: ["--ipc=host", ]
|
||||
|
||||
# Choose the default docker based on the Task properties,
|
||||
# Notice: Enterprise feature, ignored otherwise
|
||||
# Examples: 'script.requirements', 'script.binary', 'script.repository', 'script.branch', 'project'
|
||||
# Notice: Matching is done via regular expression, for example "^searchme$" will match exactly "searchme" string
|
||||
# if true update the Task container section based on the selected rule/default
|
||||
# can also be set/override per specific rule
|
||||
update_back_task: true
|
||||
|
||||
# **Notice**: Enterprise feature, ignored otherwise
|
||||
# Choose the default container based on the Task properties,
|
||||
# container values that can be changed: "image", "arguments" , "setup_shell_script"
|
||||
# Notice: adding "force_container_rules: true" to a rule,
|
||||
# will cause it to IGNORE the Task's requested container and use the configuration of the rule,
|
||||
# including all its entries (image, arguments, setup_shell_script)!
|
||||
# Rule selector keys: 'script.requirements', 'script.binary',
|
||||
# 'script.repository', 'script.branch',
|
||||
# 'project', 'project_id', 'user_id', 'container', 'tags'
|
||||
# Notice: Matching is done via regular expression and needs to match ALL entries inside the rule,
|
||||
# matching re example: "^searchme$" will match exactly "searchme" string inside an entry
|
||||
# specifically for tags single tag match is enough:
|
||||
# "^my_tag$" will match a Task that has
|
||||
# multiple tags one of them matches ["general_tag", "my_tag$"]
|
||||
"match_rules": [
|
||||
{
|
||||
"image": "python:3.6-bullseye",
|
||||
"arguments": "--ipc=host",
|
||||
"update_back_task": true,
|
||||
"match": {
|
||||
"script": {
|
||||
"binary": "python3.6$",
|
||||
@ -259,6 +274,7 @@
|
||||
{
|
||||
"image": "python:3.7-bullseye",
|
||||
"arguments": "--ipc=host",
|
||||
"update_back_task": true,
|
||||
"match": {
|
||||
"script": {
|
||||
"binary": "python3.7$",
|
||||
@ -310,6 +326,15 @@
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
"image": "python:3.13-bullseye",
|
||||
"arguments": "--ipc=host",
|
||||
"match": {
|
||||
"script": {
|
||||
"binary": "python3.13$",
|
||||
},
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ def resolve_default_container(session, task_id, container_config, ignore_match_r
|
||||
json={'id': [task_id],
|
||||
'only_fields': ['script.requirements', 'script.binary',
|
||||
'script.repository', 'script.branch',
|
||||
'project', 'container'],
|
||||
'project', 'container', 'tags', 'user'],
|
||||
'search_hidden': True},
|
||||
method=Request.def_method,
|
||||
async_enable=False,
|
||||
@ -72,62 +72,50 @@ def resolve_default_container(session, task_id, container_config, ignore_match_r
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
match_term_lookup = {
|
||||
"project": project_full_name,
|
||||
"project_id": task_info.get('project', ''),
|
||||
"script.repository": repository,
|
||||
"script.branch": branch,
|
||||
"script.binary": binary,
|
||||
"user_id": task_info.get('user', ""),
|
||||
"container": requested_container.get('image', ''),
|
||||
"tags": task_info.get('tags', []),
|
||||
}
|
||||
|
||||
task_packages_lookup = {}
|
||||
for entry in container_lookup:
|
||||
match = entry.get('match', None)
|
||||
if not match:
|
||||
continue
|
||||
if match.get('project', None):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if not re.search(match.get('project', None), project_full_name):
|
||||
continue
|
||||
except Exception:
|
||||
print('Failed parsing regular expression \"{}\" in rule: {}'.format(
|
||||
match.get('project', None), entry))
|
||||
continue
|
||||
|
||||
if match.get('script.repository', None):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if not re.search(match.get('script.repository', None), repository):
|
||||
continue
|
||||
except Exception:
|
||||
print('Failed parsing regular expression \"{}\" in rule: {}'.format(
|
||||
match.get('script.repository', None), entry))
|
||||
continue
|
||||
|
||||
if match.get('script.branch', None):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if not re.search(match.get('script.branch', None), branch):
|
||||
continue
|
||||
except Exception:
|
||||
print('Failed parsing regular expression \"{}\" in rule: {}'.format(
|
||||
match.get('script.branch', None), entry))
|
||||
continue
|
||||
|
||||
if match.get('script.binary', None):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if not re.search(match.get('script.binary', None), binary):
|
||||
continue
|
||||
except Exception:
|
||||
print('Failed parsing regular expression \"{}\" in rule: {}'.format(
|
||||
match.get('script.binary', None), entry))
|
||||
continue
|
||||
|
||||
# if match.get('image', None):
|
||||
# # noinspection PyBroadException
|
||||
# try:
|
||||
# if not re.search(match.get('image', None), requested_container.get('image', '')):
|
||||
# continue
|
||||
# except Exception:
|
||||
# print('Failed parsing regular expression \"{}\" in rule: {}'.format(
|
||||
# match.get('image', None), entry))
|
||||
# continue
|
||||
|
||||
matched = True
|
||||
for key, value in match_term_lookup.items():
|
||||
term = match.get(key, None)
|
||||
if not term:
|
||||
continue
|
||||
values = [value] if not isinstance(value, (list, tuple)) else value
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
terms = [term] if not isinstance(term, (list, tuple)) else term
|
||||
# we fail if we didn't find ANY match in the list
|
||||
if all(any(bool(re.search(t, v)) for v in values) for t in terms):
|
||||
# we found a match, go to the next match term
|
||||
pass
|
||||
else:
|
||||
# no match, stop, and we should go to the next rule
|
||||
matched = False
|
||||
break
|
||||
except Exception:
|
||||
print('Failed parsing regular expression \"{}\" in rule: {}'.format(term, entry))
|
||||
matched = False
|
||||
break
|
||||
|
||||
# we had at least a single key that was Not matched in this rule, go to the next one
|
||||
if not matched:
|
||||
continue
|
||||
|
||||
# look for the complicated stuff (i.e. requirements)
|
||||
for req_section in ['script.requirements.pip', 'script.requirements.conda']:
|
||||
if not match.get(req_section, None):
|
||||
continue
|
||||
@ -163,13 +151,28 @@ def resolve_default_container(session, task_id, container_config, ignore_match_r
|
||||
matched = False
|
||||
break
|
||||
|
||||
# if we found a match in the rulebook
|
||||
if matched:
|
||||
if not container_config.get('image'):
|
||||
container_config['image'] = entry.get('image', None)
|
||||
if not container_config.get('arguments'):
|
||||
allow_override = bool(entry.get('force_container_rules', None))
|
||||
container_config["force_container_rules"] = allow_override
|
||||
|
||||
if not container_config.get('image') or allow_override:
|
||||
container_config['image'] = entry.get('image', None) or ''
|
||||
|
||||
if not container_config.get('arguments') or allow_override:
|
||||
container_config['arguments'] = entry.get('arguments', None) or ''
|
||||
if isinstance(container_config.get('arguments'), str):
|
||||
container_config['arguments'] = shlex.split(str(container_config.get('arguments') or '').strip())
|
||||
|
||||
if not container_config.get('setup_shell_script') or allow_override:
|
||||
container_config['setup_shell_script'] = entry.get('setup_shell_script', None) or ''
|
||||
if isinstance(container_config['setup_shell_script'], (list, tuple)):
|
||||
container_config['setup_shell_script'] = "\n".join(container_config['setup_shell_script'])
|
||||
|
||||
update_back_task = entry.get('update_back_task', None)
|
||||
if update_back_task is None:
|
||||
update_back_task = session.config.get('agent.default_docker.update_back_task', None)
|
||||
|
||||
container_config['update_back_task'] = update_back_task
|
||||
|
||||
print('INFO: Matching default container with rule:\n{}'.format(json.dumps(entry)))
|
||||
return container_config
|
||||
|
||||
|
@ -388,7 +388,7 @@ def get_task_fields(session, task_id, fields: list, log=None) -> dict:
|
||||
return {}
|
||||
|
||||
|
||||
def get_task_container(session, task_id, ignore_match_rules=False):
|
||||
def get_task_container(session, task_id, ignore_match_rules=False, allow_force_container_rules=True):
|
||||
"""
|
||||
Returns dict with Task docker container setup {container: '', arguments: '', setup_shell_script: ''}
|
||||
"""
|
||||
@ -423,12 +423,54 @@ def get_task_container(session, task_id, ignore_match_rules=False):
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
if (not container or not container.get('image')) and session.check_min_api_version("2.13"):
|
||||
container = resolve_default_container(
|
||||
no_default_container = not container or not container.get('image')
|
||||
if no_default_container or allow_force_container_rules and session.check_min_api_version("2.13"):
|
||||
original_container = copy(container) or {}
|
||||
updated_container = resolve_default_container(
|
||||
session=session, task_id=task_id,
|
||||
container_config=container,
|
||||
ignore_match_rules=ignore_match_rules,
|
||||
container_config=original_container,
|
||||
ignore_match_rules=ignore_match_rules and not no_default_container,
|
||||
)
|
||||
if no_default_container and not ignore_match_rules:
|
||||
# if we do not have a default container image / args, use the defaults from the resolver
|
||||
container = updated_container
|
||||
elif allow_force_container_rules and updated_container.get('force_container_rules'):
|
||||
# if we allow to force rules (and we have requested container)
|
||||
# and the 'force_container_rules' is turned on in the rule, then overwrite container
|
||||
container = updated_container
|
||||
|
||||
# make sure we pop the new added fields
|
||||
container.pop("force_container_rules", None)
|
||||
|
||||
# check if we need to update the Task based on the new matched container defaults or overrides
|
||||
update_back_task = container.pop("update_back_task", None) or updated_container.pop("update_back_task", None)
|
||||
if update_back_task:
|
||||
# update back the task
|
||||
print('INFO: Updating the Task with the selected container with rule')
|
||||
try:
|
||||
res = session.send_request(
|
||||
service='tasks', action='edit', method=Request.def_method,
|
||||
version='2.13',
|
||||
json={
|
||||
"task": task_id,
|
||||
"force": True,
|
||||
"container": {
|
||||
"image": container.get('image') or "",
|
||||
"arguments": container.get('arguments') or "",
|
||||
"setup_shell_script": container.get('setup_shell_script') or "",
|
||||
}
|
||||
},
|
||||
)
|
||||
if not res.ok:
|
||||
raise Exception("failed setting runtime property")
|
||||
except Exception as ex:
|
||||
print("WARNING: failed setting container properties for task '{}': {}".format(task_id, ex))
|
||||
|
||||
# make sure we preserve backwards compatibility with the expected entries types
|
||||
if isinstance(container.get('arguments'), str):
|
||||
container['arguments'] = shlex.split(str(container.get('arguments') or '').strip())
|
||||
if container.get('image'):
|
||||
container['image'] = container.get('image').strip()
|
||||
|
||||
return container
|
||||
|
||||
|
@ -230,20 +230,39 @@ agent {
|
||||
docker_force_pull: false
|
||||
|
||||
default_docker: {
|
||||
# default docker image to use when running in docker mode
|
||||
image: "nvidia/cuda:11.0.3-cudnn8-runtime-ubuntu20.04"
|
||||
# default container image to use when running in docker mode
|
||||
image: "nvidia/cuda:12.6.3-cudnn-runtime-ubuntu20.04"
|
||||
|
||||
# optional arguments to pass to docker image
|
||||
# arguments: ["--ipc=host"]
|
||||
|
||||
# lookup table rules for default container
|
||||
# first matched rule will be picked, according to rule order
|
||||
# enterprise version only
|
||||
# **Notice**: Enterprise feature, ignored otherwise
|
||||
# Choose the default container based on the Task properties,
|
||||
# container values that can be changed: "image", "arguments" , "setup_shell_script"
|
||||
# Notice: adding "force_container_rules: true" to a rule,
|
||||
# will cause it to IGNORE the Task's requested container and use the configuration of the rule,
|
||||
# including all its entries (image, arguments, setup_shell_script)!
|
||||
# Rule selector keys: 'script.requirements', 'script.binary',
|
||||
# 'script.repository', 'script.branch',
|
||||
# 'project', 'project_id', 'user_id', 'container', 'tags'
|
||||
# Notice: Matching is done via regular expression and needs to match ALL entries inside the rule,
|
||||
# matching re example: "^searchme$" will match exactly "searchme" string inside an entry
|
||||
# specifically for tags single tag match is enough:
|
||||
# "^my_tag$" will match a Task that has
|
||||
# multiple tags one of them matches ["general_tag", "my_tag$"]
|
||||
#
|
||||
# match_rules: [
|
||||
# {
|
||||
# # default container image to use when running in docker mode
|
||||
# image: "nvidia/cuda:11.0.3-cudnn8-runtime-ubuntu20.04"
|
||||
# # optional arguments to pass to docker image
|
||||
# arguments: "-e define=value"
|
||||
# # if true update the Task container section based on the selected rule/default
|
||||
# # can also be set/override per specific rule
|
||||
# update_back_task: true
|
||||
#
|
||||
# match: {
|
||||
# force_container_rules: true
|
||||
# script{
|
||||
# # Optional: must match all requirements (not partial)
|
||||
# requirements: {
|
||||
|
Loading…
Reference in New Issue
Block a user