Add support for ClearML Task options --force-no-requirements, --skip-repo-detection and --skip-python-env-install

This commit is contained in:
clearml 2025-05-18 11:04:37 +03:00
parent 37ea48b619
commit afbbd3975a
4 changed files with 99 additions and 46 deletions

View File

@ -1590,7 +1590,8 @@ class PipelineController(object):
version: Optional[str] = None,
add_run_number: bool = True,
binary: Optional[str] = None,
module: Optional[str] = None
module: Optional[str] = None,
detect_repository: bool = True
) -> "PipelineController":
"""
Manually create and populate a new Pipeline in the system.
@ -1613,6 +1614,7 @@ class PipelineController(object):
:param packages: Manually specify a list of required packages. Example: ``["tqdm>=2.1", "scikit-learn"]``
or `True` to automatically create requirements
based on locally installed packages (repository must be local).
Pass an empty string to not install any packages (not even from the repository)
:param requirements_file: Specify requirements.txt file to install when setting the session.
If not provided, the requirements.txt from the repository will be used.
:param docker: Select the docker image to be executed in by the remote session
@ -1626,6 +1628,8 @@ class PipelineController(object):
:param module: If specified instead of executing `script`, a module named `module` is executed.
Implies script is empty. Module can contain multiple argument for execution,
for example: module="my.module arg1 arg2"
:param detect_repository: If True, detect the repository if no repository has been specified.
If False, don't detect repository under any circumstance. Ignored if `repo` is specified
:return: The newly created PipelineController
"""
@ -1648,7 +1652,8 @@ class PipelineController(object):
add_task_init_call=False,
force_single_script_file=force_single_script_file,
binary=binary,
module=module
module=module,
detect_repository=detect_repository
)
cls._create_pipeline_projects(
task=pipeline_controller,

View File

@ -49,6 +49,7 @@ class CreateAndPopulate(object):
raise_on_missing_entries: bool = False,
verbose: bool = False,
binary: Optional[str] = None,
detect_repository: bool = True
) -> None:
"""
Create a new Task from an existing code base.
@ -76,6 +77,7 @@ class CreateAndPopulate(object):
:param packages: Manually specify a list of required packages. Example: ["tqdm>=2.1", "scikit-learn"]
or `True` to automatically create requirements
based on locally installed packages (repository must be local).
Pass an empty string to not install any packages (not even from the repository)
:param requirements_file: Specify requirements.txt file to install when setting the session.
If not provided, the requirements.txt from the repository will be used.
:param docker: Select the docker image to be executed in by the remote session
@ -91,6 +93,8 @@ class CreateAndPopulate(object):
:param raise_on_missing_entries: If True, raise ValueError on missing entries when populating
:param verbose: If True, print verbose logging
:param binary: Binary used to launch the entry point
:param detect_repository: If True, detect the repository if no repository has been specified.
If False, don't detect repository under any circumstance. Ignored if `repo` is specified
"""
if repo and len(urlparse(repo).scheme) <= 1 and not re.compile(self._VCS_SSH_REGEX).match(repo):
folder = repo
@ -124,9 +128,12 @@ class CreateAndPopulate(object):
self.module = module
self.cwd = working_directory
assert not packages or isinstance(packages, (tuple, list, bool))
self.packages = (
list(packages) if packages is not None and not isinstance(packages, bool) else (packages or None)
)
if isinstance(packages, bool):
self.packages = True if packages else None
elif packages:
self.packages = list(packages)
else:
self.packages = packages
self.requirements_file = Path(requirements_file) if requirements_file else None
self.base_task_id = base_task_id
self.docker = dict(image=docker, args=docker_args, bash_script=docker_bash_setup_script)
@ -140,6 +147,7 @@ class CreateAndPopulate(object):
self.raise_on_missing_entries = raise_on_missing_entries
self.verbose = verbose
self.binary = binary
self.detect_repository = detect_repository
def create_task(self, dry_run: bool = False) -> Union[Task, Dict]:
"""
@ -196,45 +204,49 @@ class CreateAndPopulate(object):
local_entry_file = entry_point
repo_info, requirements = ScriptInfo.get(
filepaths=[local_entry_file],
log=getLogger(),
create_requirements=self.packages is True,
uncommitted_from_remote=True,
detect_jupyter_notebook=False,
add_missing_installed_packages=True,
detailed_req_report=False,
force_single_script=self.force_single_script_file,
)
if stand_alone_script_outside_repo:
# if we have a standalone script and a local repo we skip[ the local diff and store it
local_entry_file = Path(self.script).as_posix()
a_create_requirements = self.packages is True
a_repo_info, a_requirements = ScriptInfo.get(
filepaths=[Path(self.script).as_posix()],
if self.detect_repository:
repo_info, requirements = ScriptInfo.get(
filepaths=[local_entry_file],
log=getLogger(),
create_requirements=a_create_requirements,
create_requirements=self.packages is True,
uncommitted_from_remote=True,
detect_jupyter_notebook=False,
add_missing_installed_packages=True,
detailed_req_report=False,
force_single_script=True,
force_single_script=self.force_single_script_file,
)
if repo_info.script["diff"]:
print(
"Warning: local git repo diff is ignored, "
"storing only the standalone script form {}".format(self.script)
else:
repo_info, requirements = None, None
if stand_alone_script_outside_repo:
# if we have a standalone script and a local repo we skip[ the local diff and store it
local_entry_file = Path(self.script).as_posix()
if self.detect_repository:
a_create_requirements = self.packages is True
a_repo_info, a_requirements = ScriptInfo.get(
filepaths=[Path(self.script).as_posix()],
log=getLogger(),
create_requirements=a_create_requirements,
uncommitted_from_remote=True,
detect_jupyter_notebook=False,
add_missing_installed_packages=True,
detailed_req_report=False,
force_single_script=True,
)
repo_info.script["diff"] = a_repo_info.script["diff"] or ""
repo_info.script["entry_point"] = a_repo_info.script["entry_point"]
if a_create_requirements:
repo_info.script["requirements"] = a_repo_info.script.get("requirements") or {}
if repo_info.script["diff"]:
print(
"Warning: local git repo diff is ignored, "
"storing only the standalone script form {}".format(self.script)
)
repo_info.script["diff"] = a_repo_info.script["diff"] or ""
repo_info.script["entry_point"] = a_repo_info.script["entry_point"]
if a_create_requirements:
repo_info.script["requirements"] = a_repo_info.script.get("requirements") or {}
# check if we have no repository and no requirements raise error
if (
self.raise_on_missing_entries
and (not self.requirements_file and not self.packages)
and (self.requirements_file is None and self.packages is None)
and not self.repo
and (not repo_info or not repo_info.script or not repo_info.script.get("repository"))
and (not entry_point or not entry_point.endswith(".sh"))
@ -418,6 +430,8 @@ class CreateAndPopulate(object):
reqs = [line.strip() for line in f.readlines()]
if self.packages and self.packages is not True:
reqs += self.packages
if self.packages == "" and len(reqs) == 0:
reqs = [""]
if reqs:
# make sure we have clearml.
clearml_found = False
@ -428,7 +442,7 @@ class CreateAndPopulate(object):
if package == "clearml":
clearml_found = True
break
if not clearml_found:
if not clearml_found and reqs != [""]:
reqs.append("clearml")
task_state["script"]["requirements"] = {"pip": "\n".join(reqs)}
elif not self.repo and repo_info and not repo_info.script.get("requirements"):
@ -537,7 +551,7 @@ class CreateAndPopulate(object):
)
if self.verbose:
if task_state["script"]["repository"]:
if task_state["script"].get("repository"):
repo_details = {
k: v for k, v in task_state["script"].items() if v and k not in ("diff", "requirements", "binary")
}

View File

@ -180,6 +180,24 @@ def setup_parser(parser: ArgumentParser) -> None:
default=None,
help="Specify the path to the offline session you want to import.",
)
parser.add_argument(
"--force-no-requirements",
action="store_true",
help="If specified, no requirements will be installed, nor do they need to be specified"
)
parser.add_argument(
"--skip-repo-detection",
action="store_true",
help="If specified, skip repository detection when no repository is specified. "
"No repository will be set in remote execution"
)
parser.add_argument(
"--skip-python-env-install",
action="store_true",
help="If specified, the agent will not install any required Python packages when running the task. "
"Instead, it will use the preexisting Python environment to run the task. "
"Only relevant when the agent is running in Docker mode or is running the task in Kubernetes"
)
parser.add_argument(
"--pipeline",
action="store_true",
@ -231,6 +249,11 @@ def cli() -> None:
print("Importing offline session: {}".format(args.import_offline_session))
Task.import_offline_session(args.import_offline_session)
else:
docker_args = args.docker_args
if args.skip_python_env_install:
docker_args = ((docker_args or "") + " -e CLEARML_AGENT_SKIP_PYTHON_ENV_INSTALL=1").lstrip(" ")
packages = "" if args.force_no_requirements else args.packages
requirements = "" if args.force_no_requirements else args.requirements
if args.script and args.script.endswith(".sh") and not args.binary:
print("Detected shell script. Binary will be set to '/bin/bash'")
if args.pipeline:
@ -249,15 +272,16 @@ def cli() -> None:
script=args.script,
module=args.module,
working_directory=args.cwd,
packages=args.packages,
requirements_file=args.requirements,
packages=packages,
requirements_file=requirements,
docker=args.docker,
docker_args=args.docker_args,
docker_args=docker_args,
docker_bash_setup_script=bash_setup_script,
version=args.pipeline_version,
add_run_number=False if args.pipeline_dont_add_run_number else True,
binary=args.binary,
argparse_args=argparse_args or None
argparse_args=argparse_args or None,
detect_repository=not args.skip_repo_detection
)
created_task = pipeline._task
else:
@ -271,17 +295,18 @@ def cli() -> None:
script=args.script,
module=args.module,
working_directory=args.cwd,
packages=args.packages,
requirements_file=args.requirements,
packages=packages,
requirements_file=requirements,
docker=args.docker,
docker_args=args.docker_args,
docker_args=docker_args,
docker_bash_setup_script=bash_setup_script,
output_uri=args.output_uri,
base_task_id=args.base_task_id,
add_task_init_call=not args.skip_task_init,
raise_on_missing_entries=True,
verbose=True,
binary=args.binary
binary=args.binary,
detect_repository=not args.skip_repo_detection
)
# verify args before creating the Task
create_and_populate.update_task_args(args.args)
@ -299,7 +324,11 @@ def cli() -> None:
print("New {} created id={}".format("pipeline" if args.pipeline else "task", created_task.id))
if not args.queue:
print("Warning: No queue was provided, leaving {} in draft-mode.", "pipeline" if args.pipeline else "task")
print(
"Warning: No queue was provided, leaving {} in draft-mode.".format(
"pipeline" if args.pipeline else "task"
)
)
exit(0)
Task.enqueue(created_task, queue_name=args.queue)

View File

@ -1212,7 +1212,8 @@ class Task(_Task):
add_task_init_call: bool = True,
force_single_script_file: bool = False,
binary: Optional[str] = None,
module: Optional[str] = None
module: Optional[str] = None,
detect_repository: bool = True
) -> TaskInstance:
"""
Manually create and populate a new Task (experiment) in the system.
@ -1243,6 +1244,7 @@ class Task(_Task):
:param packages: Manually specify a list of required packages. Example: ``["tqdm>=2.1", "scikit-learn"]``
or `True` to automatically create requirements
based on locally installed packages (repository must be local).
Pass an empty string to not install any packages (not even from the repository)
:param requirements_file: Specify requirements.txt file to install when setting the session.
If not provided, the requirements.txt from the repository will be used.
:param docker: Select the docker image to be executed in by the remote session
@ -1259,6 +1261,8 @@ class Task(_Task):
:param module: If specified instead of executing `script`, a module named `module` is executed.
Implies script is empty. Module can contain multiple argument for execution,
for example: module="my.module arg1 arg2"
:param detect_repository: If True, detect the repository if no repository has been specified.
If False, don't detect repository under any circumstance. Ignored if `repo` is specified
:return: The newly created Task (experiment)
:rtype: Task
@ -1293,7 +1297,8 @@ class Task(_Task):
force_single_script_file=force_single_script_file,
raise_on_missing_entries=False,
module=module,
binary=binary
binary=binary,
detect_repository=detect_repository
)
task = manual_populate.create_task()
if task and argparse_args: