From afbbd3975a3cc8b24d0fa79e8b86b2488ef388d3 Mon Sep 17 00:00:00 2001 From: clearml <> Date: Sun, 18 May 2025 11:04:37 +0300 Subject: [PATCH] Add support for ClearML Task options `--force-no-requirements`, `--skip-repo-detection` and `--skip-python-env-install` --- clearml/automation/controller.py | 9 ++- clearml/backend_interface/task/populate.py | 80 +++++++++++++--------- clearml/cli/task/__main__.py | 47 ++++++++++--- clearml/task.py | 9 ++- 4 files changed, 99 insertions(+), 46 deletions(-) diff --git a/clearml/automation/controller.py b/clearml/automation/controller.py index 7c9ad1a7..9e9f951d 100755 --- a/clearml/automation/controller.py +++ b/clearml/automation/controller.py @@ -1590,7 +1590,8 @@ class PipelineController(object): version: Optional[str] = None, add_run_number: bool = True, binary: Optional[str] = None, - module: Optional[str] = None + module: Optional[str] = None, + detect_repository: bool = True ) -> "PipelineController": """ Manually create and populate a new Pipeline in the system. @@ -1613,6 +1614,7 @@ class PipelineController(object): :param packages: Manually specify a list of required packages. Example: ``["tqdm>=2.1", "scikit-learn"]`` or `True` to automatically create requirements based on locally installed packages (repository must be local). + Pass an empty string to not install any packages (not even from the repository) :param requirements_file: Specify requirements.txt file to install when setting the session. If not provided, the requirements.txt from the repository will be used. :param docker: Select the docker image to be executed in by the remote session @@ -1626,6 +1628,8 @@ class PipelineController(object): :param module: If specified instead of executing `script`, a module named `module` is executed. Implies script is empty. Module can contain multiple argument for execution, for example: module="my.module arg1 arg2" + :param detect_repository: If True, detect the repository if no repository has been specified. + If False, don't detect repository under any circumstance. Ignored if `repo` is specified :return: The newly created PipelineController """ @@ -1648,7 +1652,8 @@ class PipelineController(object): add_task_init_call=False, force_single_script_file=force_single_script_file, binary=binary, - module=module + module=module, + detect_repository=detect_repository ) cls._create_pipeline_projects( task=pipeline_controller, diff --git a/clearml/backend_interface/task/populate.py b/clearml/backend_interface/task/populate.py index 0c6413b1..4962d258 100644 --- a/clearml/backend_interface/task/populate.py +++ b/clearml/backend_interface/task/populate.py @@ -49,6 +49,7 @@ class CreateAndPopulate(object): raise_on_missing_entries: bool = False, verbose: bool = False, binary: Optional[str] = None, + detect_repository: bool = True ) -> None: """ Create a new Task from an existing code base. @@ -76,6 +77,7 @@ class CreateAndPopulate(object): :param packages: Manually specify a list of required packages. Example: ["tqdm>=2.1", "scikit-learn"] or `True` to automatically create requirements based on locally installed packages (repository must be local). + Pass an empty string to not install any packages (not even from the repository) :param requirements_file: Specify requirements.txt file to install when setting the session. If not provided, the requirements.txt from the repository will be used. :param docker: Select the docker image to be executed in by the remote session @@ -91,6 +93,8 @@ class CreateAndPopulate(object): :param raise_on_missing_entries: If True, raise ValueError on missing entries when populating :param verbose: If True, print verbose logging :param binary: Binary used to launch the entry point + :param detect_repository: If True, detect the repository if no repository has been specified. + If False, don't detect repository under any circumstance. Ignored if `repo` is specified """ if repo and len(urlparse(repo).scheme) <= 1 and not re.compile(self._VCS_SSH_REGEX).match(repo): folder = repo @@ -124,9 +128,12 @@ class CreateAndPopulate(object): self.module = module self.cwd = working_directory assert not packages or isinstance(packages, (tuple, list, bool)) - self.packages = ( - list(packages) if packages is not None and not isinstance(packages, bool) else (packages or None) - ) + if isinstance(packages, bool): + self.packages = True if packages else None + elif packages: + self.packages = list(packages) + else: + self.packages = packages self.requirements_file = Path(requirements_file) if requirements_file else None self.base_task_id = base_task_id self.docker = dict(image=docker, args=docker_args, bash_script=docker_bash_setup_script) @@ -140,6 +147,7 @@ class CreateAndPopulate(object): self.raise_on_missing_entries = raise_on_missing_entries self.verbose = verbose self.binary = binary + self.detect_repository = detect_repository def create_task(self, dry_run: bool = False) -> Union[Task, Dict]: """ @@ -196,45 +204,49 @@ class CreateAndPopulate(object): local_entry_file = entry_point - repo_info, requirements = ScriptInfo.get( - filepaths=[local_entry_file], - log=getLogger(), - create_requirements=self.packages is True, - uncommitted_from_remote=True, - detect_jupyter_notebook=False, - add_missing_installed_packages=True, - detailed_req_report=False, - force_single_script=self.force_single_script_file, - ) - - if stand_alone_script_outside_repo: - # if we have a standalone script and a local repo we skip[ the local diff and store it - local_entry_file = Path(self.script).as_posix() - a_create_requirements = self.packages is True - a_repo_info, a_requirements = ScriptInfo.get( - filepaths=[Path(self.script).as_posix()], + if self.detect_repository: + repo_info, requirements = ScriptInfo.get( + filepaths=[local_entry_file], log=getLogger(), - create_requirements=a_create_requirements, + create_requirements=self.packages is True, uncommitted_from_remote=True, detect_jupyter_notebook=False, add_missing_installed_packages=True, detailed_req_report=False, - force_single_script=True, + force_single_script=self.force_single_script_file, ) - if repo_info.script["diff"]: - print( - "Warning: local git repo diff is ignored, " - "storing only the standalone script form {}".format(self.script) + else: + repo_info, requirements = None, None + + if stand_alone_script_outside_repo: + # if we have a standalone script and a local repo we skip[ the local diff and store it + local_entry_file = Path(self.script).as_posix() + if self.detect_repository: + a_create_requirements = self.packages is True + a_repo_info, a_requirements = ScriptInfo.get( + filepaths=[Path(self.script).as_posix()], + log=getLogger(), + create_requirements=a_create_requirements, + uncommitted_from_remote=True, + detect_jupyter_notebook=False, + add_missing_installed_packages=True, + detailed_req_report=False, + force_single_script=True, ) - repo_info.script["diff"] = a_repo_info.script["diff"] or "" - repo_info.script["entry_point"] = a_repo_info.script["entry_point"] - if a_create_requirements: - repo_info.script["requirements"] = a_repo_info.script.get("requirements") or {} + if repo_info.script["diff"]: + print( + "Warning: local git repo diff is ignored, " + "storing only the standalone script form {}".format(self.script) + ) + repo_info.script["diff"] = a_repo_info.script["diff"] or "" + repo_info.script["entry_point"] = a_repo_info.script["entry_point"] + if a_create_requirements: + repo_info.script["requirements"] = a_repo_info.script.get("requirements") or {} # check if we have no repository and no requirements raise error if ( self.raise_on_missing_entries - and (not self.requirements_file and not self.packages) + and (self.requirements_file is None and self.packages is None) and not self.repo and (not repo_info or not repo_info.script or not repo_info.script.get("repository")) and (not entry_point or not entry_point.endswith(".sh")) @@ -418,6 +430,8 @@ class CreateAndPopulate(object): reqs = [line.strip() for line in f.readlines()] if self.packages and self.packages is not True: reqs += self.packages + if self.packages == "" and len(reqs) == 0: + reqs = [""] if reqs: # make sure we have clearml. clearml_found = False @@ -428,7 +442,7 @@ class CreateAndPopulate(object): if package == "clearml": clearml_found = True break - if not clearml_found: + if not clearml_found and reqs != [""]: reqs.append("clearml") task_state["script"]["requirements"] = {"pip": "\n".join(reqs)} elif not self.repo and repo_info and not repo_info.script.get("requirements"): @@ -537,7 +551,7 @@ class CreateAndPopulate(object): ) if self.verbose: - if task_state["script"]["repository"]: + if task_state["script"].get("repository"): repo_details = { k: v for k, v in task_state["script"].items() if v and k not in ("diff", "requirements", "binary") } diff --git a/clearml/cli/task/__main__.py b/clearml/cli/task/__main__.py index 13326135..b3a9cb06 100644 --- a/clearml/cli/task/__main__.py +++ b/clearml/cli/task/__main__.py @@ -180,6 +180,24 @@ def setup_parser(parser: ArgumentParser) -> None: default=None, help="Specify the path to the offline session you want to import.", ) + parser.add_argument( + "--force-no-requirements", + action="store_true", + help="If specified, no requirements will be installed, nor do they need to be specified" + ) + parser.add_argument( + "--skip-repo-detection", + action="store_true", + help="If specified, skip repository detection when no repository is specified. " + "No repository will be set in remote execution" + ) + parser.add_argument( + "--skip-python-env-install", + action="store_true", + help="If specified, the agent will not install any required Python packages when running the task. " + "Instead, it will use the preexisting Python environment to run the task. " + "Only relevant when the agent is running in Docker mode or is running the task in Kubernetes" + ) parser.add_argument( "--pipeline", action="store_true", @@ -231,6 +249,11 @@ def cli() -> None: print("Importing offline session: {}".format(args.import_offline_session)) Task.import_offline_session(args.import_offline_session) else: + docker_args = args.docker_args + if args.skip_python_env_install: + docker_args = ((docker_args or "") + " -e CLEARML_AGENT_SKIP_PYTHON_ENV_INSTALL=1").lstrip(" ") + packages = "" if args.force_no_requirements else args.packages + requirements = "" if args.force_no_requirements else args.requirements if args.script and args.script.endswith(".sh") and not args.binary: print("Detected shell script. Binary will be set to '/bin/bash'") if args.pipeline: @@ -249,15 +272,16 @@ def cli() -> None: script=args.script, module=args.module, working_directory=args.cwd, - packages=args.packages, - requirements_file=args.requirements, + packages=packages, + requirements_file=requirements, docker=args.docker, - docker_args=args.docker_args, + docker_args=docker_args, docker_bash_setup_script=bash_setup_script, version=args.pipeline_version, add_run_number=False if args.pipeline_dont_add_run_number else True, binary=args.binary, - argparse_args=argparse_args or None + argparse_args=argparse_args or None, + detect_repository=not args.skip_repo_detection ) created_task = pipeline._task else: @@ -271,17 +295,18 @@ def cli() -> None: script=args.script, module=args.module, working_directory=args.cwd, - packages=args.packages, - requirements_file=args.requirements, + packages=packages, + requirements_file=requirements, docker=args.docker, - docker_args=args.docker_args, + docker_args=docker_args, docker_bash_setup_script=bash_setup_script, output_uri=args.output_uri, base_task_id=args.base_task_id, add_task_init_call=not args.skip_task_init, raise_on_missing_entries=True, verbose=True, - binary=args.binary + binary=args.binary, + detect_repository=not args.skip_repo_detection ) # verify args before creating the Task create_and_populate.update_task_args(args.args) @@ -299,7 +324,11 @@ def cli() -> None: print("New {} created id={}".format("pipeline" if args.pipeline else "task", created_task.id)) if not args.queue: - print("Warning: No queue was provided, leaving {} in draft-mode.", "pipeline" if args.pipeline else "task") + print( + "Warning: No queue was provided, leaving {} in draft-mode.".format( + "pipeline" if args.pipeline else "task" + ) + ) exit(0) Task.enqueue(created_task, queue_name=args.queue) diff --git a/clearml/task.py b/clearml/task.py index efb6647a..73b208f3 100644 --- a/clearml/task.py +++ b/clearml/task.py @@ -1212,7 +1212,8 @@ class Task(_Task): add_task_init_call: bool = True, force_single_script_file: bool = False, binary: Optional[str] = None, - module: Optional[str] = None + module: Optional[str] = None, + detect_repository: bool = True ) -> TaskInstance: """ Manually create and populate a new Task (experiment) in the system. @@ -1243,6 +1244,7 @@ class Task(_Task): :param packages: Manually specify a list of required packages. Example: ``["tqdm>=2.1", "scikit-learn"]`` or `True` to automatically create requirements based on locally installed packages (repository must be local). + Pass an empty string to not install any packages (not even from the repository) :param requirements_file: Specify requirements.txt file to install when setting the session. If not provided, the requirements.txt from the repository will be used. :param docker: Select the docker image to be executed in by the remote session @@ -1259,6 +1261,8 @@ class Task(_Task): :param module: If specified instead of executing `script`, a module named `module` is executed. Implies script is empty. Module can contain multiple argument for execution, for example: module="my.module arg1 arg2" + :param detect_repository: If True, detect the repository if no repository has been specified. + If False, don't detect repository under any circumstance. Ignored if `repo` is specified :return: The newly created Task (experiment) :rtype: Task @@ -1293,7 +1297,8 @@ class Task(_Task): force_single_script_file=force_single_script_file, raise_on_missing_entries=False, module=module, - binary=binary + binary=binary, + detect_repository=detect_repository ) task = manual_populate.create_task() if task and argparse_args: