Add support for ClearML Task options --force-no-requirements, --skip-repo-detection and --skip-python-env-install

This commit is contained in:
clearml 2025-05-18 11:04:37 +03:00
parent 37ea48b619
commit afbbd3975a
4 changed files with 99 additions and 46 deletions

View File

@ -1590,7 +1590,8 @@ class PipelineController(object):
version: Optional[str] = None, version: Optional[str] = None,
add_run_number: bool = True, add_run_number: bool = True,
binary: Optional[str] = None, binary: Optional[str] = None,
module: Optional[str] = None module: Optional[str] = None,
detect_repository: bool = True
) -> "PipelineController": ) -> "PipelineController":
""" """
Manually create and populate a new Pipeline in the system. Manually create and populate a new Pipeline in the system.
@ -1613,6 +1614,7 @@ class PipelineController(object):
:param packages: Manually specify a list of required packages. Example: ``["tqdm>=2.1", "scikit-learn"]`` :param packages: Manually specify a list of required packages. Example: ``["tqdm>=2.1", "scikit-learn"]``
or `True` to automatically create requirements or `True` to automatically create requirements
based on locally installed packages (repository must be local). based on locally installed packages (repository must be local).
Pass an empty string to not install any packages (not even from the repository)
:param requirements_file: Specify requirements.txt file to install when setting the session. :param requirements_file: Specify requirements.txt file to install when setting the session.
If not provided, the requirements.txt from the repository will be used. If not provided, the requirements.txt from the repository will be used.
:param docker: Select the docker image to be executed in by the remote session :param docker: Select the docker image to be executed in by the remote session
@ -1626,6 +1628,8 @@ class PipelineController(object):
:param module: If specified instead of executing `script`, a module named `module` is executed. :param module: If specified instead of executing `script`, a module named `module` is executed.
Implies script is empty. Module can contain multiple argument for execution, Implies script is empty. Module can contain multiple argument for execution,
for example: module="my.module arg1 arg2" for example: module="my.module arg1 arg2"
:param detect_repository: If True, detect the repository if no repository has been specified.
If False, don't detect repository under any circumstance. Ignored if `repo` is specified
:return: The newly created PipelineController :return: The newly created PipelineController
""" """
@ -1648,7 +1652,8 @@ class PipelineController(object):
add_task_init_call=False, add_task_init_call=False,
force_single_script_file=force_single_script_file, force_single_script_file=force_single_script_file,
binary=binary, binary=binary,
module=module module=module,
detect_repository=detect_repository
) )
cls._create_pipeline_projects( cls._create_pipeline_projects(
task=pipeline_controller, task=pipeline_controller,

View File

@ -49,6 +49,7 @@ class CreateAndPopulate(object):
raise_on_missing_entries: bool = False, raise_on_missing_entries: bool = False,
verbose: bool = False, verbose: bool = False,
binary: Optional[str] = None, binary: Optional[str] = None,
detect_repository: bool = True
) -> None: ) -> None:
""" """
Create a new Task from an existing code base. Create a new Task from an existing code base.
@ -76,6 +77,7 @@ class CreateAndPopulate(object):
:param packages: Manually specify a list of required packages. Example: ["tqdm>=2.1", "scikit-learn"] :param packages: Manually specify a list of required packages. Example: ["tqdm>=2.1", "scikit-learn"]
or `True` to automatically create requirements or `True` to automatically create requirements
based on locally installed packages (repository must be local). based on locally installed packages (repository must be local).
Pass an empty string to not install any packages (not even from the repository)
:param requirements_file: Specify requirements.txt file to install when setting the session. :param requirements_file: Specify requirements.txt file to install when setting the session.
If not provided, the requirements.txt from the repository will be used. If not provided, the requirements.txt from the repository will be used.
:param docker: Select the docker image to be executed in by the remote session :param docker: Select the docker image to be executed in by the remote session
@ -91,6 +93,8 @@ class CreateAndPopulate(object):
:param raise_on_missing_entries: If True, raise ValueError on missing entries when populating :param raise_on_missing_entries: If True, raise ValueError on missing entries when populating
:param verbose: If True, print verbose logging :param verbose: If True, print verbose logging
:param binary: Binary used to launch the entry point :param binary: Binary used to launch the entry point
:param detect_repository: If True, detect the repository if no repository has been specified.
If False, don't detect repository under any circumstance. Ignored if `repo` is specified
""" """
if repo and len(urlparse(repo).scheme) <= 1 and not re.compile(self._VCS_SSH_REGEX).match(repo): if repo and len(urlparse(repo).scheme) <= 1 and not re.compile(self._VCS_SSH_REGEX).match(repo):
folder = repo folder = repo
@ -124,9 +128,12 @@ class CreateAndPopulate(object):
self.module = module self.module = module
self.cwd = working_directory self.cwd = working_directory
assert not packages or isinstance(packages, (tuple, list, bool)) assert not packages or isinstance(packages, (tuple, list, bool))
self.packages = ( if isinstance(packages, bool):
list(packages) if packages is not None and not isinstance(packages, bool) else (packages or None) self.packages = True if packages else None
) elif packages:
self.packages = list(packages)
else:
self.packages = packages
self.requirements_file = Path(requirements_file) if requirements_file else None self.requirements_file = Path(requirements_file) if requirements_file else None
self.base_task_id = base_task_id self.base_task_id = base_task_id
self.docker = dict(image=docker, args=docker_args, bash_script=docker_bash_setup_script) self.docker = dict(image=docker, args=docker_args, bash_script=docker_bash_setup_script)
@ -140,6 +147,7 @@ class CreateAndPopulate(object):
self.raise_on_missing_entries = raise_on_missing_entries self.raise_on_missing_entries = raise_on_missing_entries
self.verbose = verbose self.verbose = verbose
self.binary = binary self.binary = binary
self.detect_repository = detect_repository
def create_task(self, dry_run: bool = False) -> Union[Task, Dict]: def create_task(self, dry_run: bool = False) -> Union[Task, Dict]:
""" """
@ -196,45 +204,49 @@ class CreateAndPopulate(object):
local_entry_file = entry_point local_entry_file = entry_point
repo_info, requirements = ScriptInfo.get( if self.detect_repository:
filepaths=[local_entry_file], repo_info, requirements = ScriptInfo.get(
log=getLogger(), filepaths=[local_entry_file],
create_requirements=self.packages is True,
uncommitted_from_remote=True,
detect_jupyter_notebook=False,
add_missing_installed_packages=True,
detailed_req_report=False,
force_single_script=self.force_single_script_file,
)
if stand_alone_script_outside_repo:
# if we have a standalone script and a local repo we skip[ the local diff and store it
local_entry_file = Path(self.script).as_posix()
a_create_requirements = self.packages is True
a_repo_info, a_requirements = ScriptInfo.get(
filepaths=[Path(self.script).as_posix()],
log=getLogger(), log=getLogger(),
create_requirements=a_create_requirements, create_requirements=self.packages is True,
uncommitted_from_remote=True, uncommitted_from_remote=True,
detect_jupyter_notebook=False, detect_jupyter_notebook=False,
add_missing_installed_packages=True, add_missing_installed_packages=True,
detailed_req_report=False, detailed_req_report=False,
force_single_script=True, force_single_script=self.force_single_script_file,
) )
if repo_info.script["diff"]: else:
print( repo_info, requirements = None, None
"Warning: local git repo diff is ignored, "
"storing only the standalone script form {}".format(self.script) if stand_alone_script_outside_repo:
# if we have a standalone script and a local repo we skip[ the local diff and store it
local_entry_file = Path(self.script).as_posix()
if self.detect_repository:
a_create_requirements = self.packages is True
a_repo_info, a_requirements = ScriptInfo.get(
filepaths=[Path(self.script).as_posix()],
log=getLogger(),
create_requirements=a_create_requirements,
uncommitted_from_remote=True,
detect_jupyter_notebook=False,
add_missing_installed_packages=True,
detailed_req_report=False,
force_single_script=True,
) )
repo_info.script["diff"] = a_repo_info.script["diff"] or "" if repo_info.script["diff"]:
repo_info.script["entry_point"] = a_repo_info.script["entry_point"] print(
if a_create_requirements: "Warning: local git repo diff is ignored, "
repo_info.script["requirements"] = a_repo_info.script.get("requirements") or {} "storing only the standalone script form {}".format(self.script)
)
repo_info.script["diff"] = a_repo_info.script["diff"] or ""
repo_info.script["entry_point"] = a_repo_info.script["entry_point"]
if a_create_requirements:
repo_info.script["requirements"] = a_repo_info.script.get("requirements") or {}
# check if we have no repository and no requirements raise error # check if we have no repository and no requirements raise error
if ( if (
self.raise_on_missing_entries self.raise_on_missing_entries
and (not self.requirements_file and not self.packages) and (self.requirements_file is None and self.packages is None)
and not self.repo and not self.repo
and (not repo_info or not repo_info.script or not repo_info.script.get("repository")) and (not repo_info or not repo_info.script or not repo_info.script.get("repository"))
and (not entry_point or not entry_point.endswith(".sh")) and (not entry_point or not entry_point.endswith(".sh"))
@ -418,6 +430,8 @@ class CreateAndPopulate(object):
reqs = [line.strip() for line in f.readlines()] reqs = [line.strip() for line in f.readlines()]
if self.packages and self.packages is not True: if self.packages and self.packages is not True:
reqs += self.packages reqs += self.packages
if self.packages == "" and len(reqs) == 0:
reqs = [""]
if reqs: if reqs:
# make sure we have clearml. # make sure we have clearml.
clearml_found = False clearml_found = False
@ -428,7 +442,7 @@ class CreateAndPopulate(object):
if package == "clearml": if package == "clearml":
clearml_found = True clearml_found = True
break break
if not clearml_found: if not clearml_found and reqs != [""]:
reqs.append("clearml") reqs.append("clearml")
task_state["script"]["requirements"] = {"pip": "\n".join(reqs)} task_state["script"]["requirements"] = {"pip": "\n".join(reqs)}
elif not self.repo and repo_info and not repo_info.script.get("requirements"): elif not self.repo and repo_info and not repo_info.script.get("requirements"):
@ -537,7 +551,7 @@ class CreateAndPopulate(object):
) )
if self.verbose: if self.verbose:
if task_state["script"]["repository"]: if task_state["script"].get("repository"):
repo_details = { repo_details = {
k: v for k, v in task_state["script"].items() if v and k not in ("diff", "requirements", "binary") k: v for k, v in task_state["script"].items() if v and k not in ("diff", "requirements", "binary")
} }

View File

@ -180,6 +180,24 @@ def setup_parser(parser: ArgumentParser) -> None:
default=None, default=None,
help="Specify the path to the offline session you want to import.", help="Specify the path to the offline session you want to import.",
) )
parser.add_argument(
"--force-no-requirements",
action="store_true",
help="If specified, no requirements will be installed, nor do they need to be specified"
)
parser.add_argument(
"--skip-repo-detection",
action="store_true",
help="If specified, skip repository detection when no repository is specified. "
"No repository will be set in remote execution"
)
parser.add_argument(
"--skip-python-env-install",
action="store_true",
help="If specified, the agent will not install any required Python packages when running the task. "
"Instead, it will use the preexisting Python environment to run the task. "
"Only relevant when the agent is running in Docker mode or is running the task in Kubernetes"
)
parser.add_argument( parser.add_argument(
"--pipeline", "--pipeline",
action="store_true", action="store_true",
@ -231,6 +249,11 @@ def cli() -> None:
print("Importing offline session: {}".format(args.import_offline_session)) print("Importing offline session: {}".format(args.import_offline_session))
Task.import_offline_session(args.import_offline_session) Task.import_offline_session(args.import_offline_session)
else: else:
docker_args = args.docker_args
if args.skip_python_env_install:
docker_args = ((docker_args or "") + " -e CLEARML_AGENT_SKIP_PYTHON_ENV_INSTALL=1").lstrip(" ")
packages = "" if args.force_no_requirements else args.packages
requirements = "" if args.force_no_requirements else args.requirements
if args.script and args.script.endswith(".sh") and not args.binary: if args.script and args.script.endswith(".sh") and not args.binary:
print("Detected shell script. Binary will be set to '/bin/bash'") print("Detected shell script. Binary will be set to '/bin/bash'")
if args.pipeline: if args.pipeline:
@ -249,15 +272,16 @@ def cli() -> None:
script=args.script, script=args.script,
module=args.module, module=args.module,
working_directory=args.cwd, working_directory=args.cwd,
packages=args.packages, packages=packages,
requirements_file=args.requirements, requirements_file=requirements,
docker=args.docker, docker=args.docker,
docker_args=args.docker_args, docker_args=docker_args,
docker_bash_setup_script=bash_setup_script, docker_bash_setup_script=bash_setup_script,
version=args.pipeline_version, version=args.pipeline_version,
add_run_number=False if args.pipeline_dont_add_run_number else True, add_run_number=False if args.pipeline_dont_add_run_number else True,
binary=args.binary, binary=args.binary,
argparse_args=argparse_args or None argparse_args=argparse_args or None,
detect_repository=not args.skip_repo_detection
) )
created_task = pipeline._task created_task = pipeline._task
else: else:
@ -271,17 +295,18 @@ def cli() -> None:
script=args.script, script=args.script,
module=args.module, module=args.module,
working_directory=args.cwd, working_directory=args.cwd,
packages=args.packages, packages=packages,
requirements_file=args.requirements, requirements_file=requirements,
docker=args.docker, docker=args.docker,
docker_args=args.docker_args, docker_args=docker_args,
docker_bash_setup_script=bash_setup_script, docker_bash_setup_script=bash_setup_script,
output_uri=args.output_uri, output_uri=args.output_uri,
base_task_id=args.base_task_id, base_task_id=args.base_task_id,
add_task_init_call=not args.skip_task_init, add_task_init_call=not args.skip_task_init,
raise_on_missing_entries=True, raise_on_missing_entries=True,
verbose=True, verbose=True,
binary=args.binary binary=args.binary,
detect_repository=not args.skip_repo_detection
) )
# verify args before creating the Task # verify args before creating the Task
create_and_populate.update_task_args(args.args) create_and_populate.update_task_args(args.args)
@ -299,7 +324,11 @@ def cli() -> None:
print("New {} created id={}".format("pipeline" if args.pipeline else "task", created_task.id)) print("New {} created id={}".format("pipeline" if args.pipeline else "task", created_task.id))
if not args.queue: if not args.queue:
print("Warning: No queue was provided, leaving {} in draft-mode.", "pipeline" if args.pipeline else "task") print(
"Warning: No queue was provided, leaving {} in draft-mode.".format(
"pipeline" if args.pipeline else "task"
)
)
exit(0) exit(0)
Task.enqueue(created_task, queue_name=args.queue) Task.enqueue(created_task, queue_name=args.queue)

View File

@ -1212,7 +1212,8 @@ class Task(_Task):
add_task_init_call: bool = True, add_task_init_call: bool = True,
force_single_script_file: bool = False, force_single_script_file: bool = False,
binary: Optional[str] = None, binary: Optional[str] = None,
module: Optional[str] = None module: Optional[str] = None,
detect_repository: bool = True
) -> TaskInstance: ) -> TaskInstance:
""" """
Manually create and populate a new Task (experiment) in the system. Manually create and populate a new Task (experiment) in the system.
@ -1243,6 +1244,7 @@ class Task(_Task):
:param packages: Manually specify a list of required packages. Example: ``["tqdm>=2.1", "scikit-learn"]`` :param packages: Manually specify a list of required packages. Example: ``["tqdm>=2.1", "scikit-learn"]``
or `True` to automatically create requirements or `True` to automatically create requirements
based on locally installed packages (repository must be local). based on locally installed packages (repository must be local).
Pass an empty string to not install any packages (not even from the repository)
:param requirements_file: Specify requirements.txt file to install when setting the session. :param requirements_file: Specify requirements.txt file to install when setting the session.
If not provided, the requirements.txt from the repository will be used. If not provided, the requirements.txt from the repository will be used.
:param docker: Select the docker image to be executed in by the remote session :param docker: Select the docker image to be executed in by the remote session
@ -1259,6 +1261,8 @@ class Task(_Task):
:param module: If specified instead of executing `script`, a module named `module` is executed. :param module: If specified instead of executing `script`, a module named `module` is executed.
Implies script is empty. Module can contain multiple argument for execution, Implies script is empty. Module can contain multiple argument for execution,
for example: module="my.module arg1 arg2" for example: module="my.module arg1 arg2"
:param detect_repository: If True, detect the repository if no repository has been specified.
If False, don't detect repository under any circumstance. Ignored if `repo` is specified
:return: The newly created Task (experiment) :return: The newly created Task (experiment)
:rtype: Task :rtype: Task
@ -1293,7 +1297,8 @@ class Task(_Task):
force_single_script_file=force_single_script_file, force_single_script_file=force_single_script_file,
raise_on_missing_entries=False, raise_on_missing_entries=False,
module=module, module=module,
binary=binary binary=binary,
detect_repository=detect_repository
) )
task = manual_populate.create_task() task = manual_populate.create_task()
if task and argparse_args: if task and argparse_args: