From cf1178ff5fd20b1839f273bd7d76d0c248299f96 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Wed, 14 Aug 2024 13:11:12 +0300 Subject: [PATCH] Add custom task binary support to clearml-task and CreateAndPopulate (allows bash script execution, requires agent version >=1.9.0) --- clearml/backend_interface/task/populate.py | 21 ++++++++++++++++----- clearml/cli/task/__main__.py | 18 +++++++++++++++--- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/clearml/backend_interface/task/populate.py b/clearml/backend_interface/task/populate.py index 76aaaab5..06d8a27c 100644 --- a/clearml/backend_interface/task/populate.py +++ b/clearml/backend_interface/task/populate.py @@ -48,6 +48,7 @@ class CreateAndPopulate(object): force_single_script_file=False, # type: bool raise_on_missing_entries=False, # type: bool verbose=False, # type: bool + binary=None # type: Optional[str] ): # type: (...) -> None """ @@ -90,6 +91,7 @@ class CreateAndPopulate(object): :param force_single_script_file: If True, do not auto-detect local repository :param raise_on_missing_entries: If True, raise ValueError on missing entries when populating :param verbose: If True, print verbose logging + :param binary: Binary used to launch the entry point """ if repo and len(urlparse(repo).scheme) <= 1 and not re.compile(self._VCS_SSH_REGEX).match(repo): folder = repo @@ -136,6 +138,7 @@ class CreateAndPopulate(object): self.force_single_script_file = bool(force_single_script_file) self.raise_on_missing_entries = raise_on_missing_entries self.verbose = verbose + self.binary = binary def create_task(self, dry_run=False): # type: (bool) -> Union[Task, Dict] @@ -148,6 +151,7 @@ class CreateAndPopulate(object): local_entry_file = None repo_info = None stand_alone_script_outside_repo = False + entry_point = "" # populate from local repository / script if self.folder or (self.script and Path(self.script).is_file() and not self.repo): self.folder = os.path.expandvars(os.path.expanduser(self.folder)) if self.folder else None @@ -222,7 +226,8 @@ class CreateAndPopulate(object): # check if we have no repository and no requirements raise error if self.raise_on_missing_entries and (not self.requirements_file and not self.packages) \ and not self.repo and ( - not repo_info or not repo_info.script or not repo_info.script.get('repository')): + not repo_info or not repo_info.script or not repo_info.script.get('repository')) \ + and (not entry_point or not entry_point.endswith(".sh")): raise ValueError("Standalone script detected \'{}\', but no requirements provided".format(self.script)) if dry_run: task = None @@ -266,10 +271,10 @@ class CreateAndPopulate(object): task_state['script']['diff'] = repo_info.script['diff'] or '' task_state['script']['working_dir'] = repo_info.script['working_dir'] task_state['script']['entry_point'] = repo_info.script['entry_point'] - task_state['script']['binary'] = '/bin/bash' if ( + task_state['script']['binary'] = self.binary or ('/bin/bash' if ( (repo_info.script['entry_point'] or '').lower().strip().endswith('.sh') and not (repo_info.script['entry_point'] or '').lower().strip().startswith('-m ')) \ - else repo_info.script['binary'] + else repo_info.script['binary']) task_state['script']['requirements'] = repo_info.script.get('requirements') or {} if self.cwd: cwd = self.cwd @@ -344,14 +349,20 @@ class CreateAndPopulate(object): detailed_req_report=False, force_single_script=True, ) - task_state['script']['binary'] = '/bin/bash' if ( + task_state['script']['binary'] = self.binary or ('/bin/bash' if ( (repo_info.script['entry_point'] or '').lower().strip().endswith('.sh') and not (repo_info.script['entry_point'] or '').lower().strip().startswith('-m ')) \ - else repo_info.script['binary'] + else repo_info.script['binary']) task_state['script']['diff'] = repo_info.script['diff'] or '' task_state['script']['entry_point'] = repo_info.script['entry_point'] if create_requirements: task_state['script']['requirements'] = repo_info.script.get('requirements') or {} + else: + if self.binary: + task_state["script"]["binary"] = self.binary + elif entry_point and entry_point.lower().strip().endswith(".sh") and not \ + entry_point.lower().strip().startswith("-m"): + task_state["script"]["binary"] = "/bin/bash" else: # standalone task task_state['script']['entry_point'] = self.script if self.script else \ diff --git a/clearml/cli/task/__main__.py b/clearml/cli/task/__main__.py index 10553d82..3e6d2ec7 100644 --- a/clearml/cli/task/__main__.py +++ b/clearml/cli/task/__main__.py @@ -59,11 +59,20 @@ def setup_parser(parser): type=str, default=None, help="Specify the entry point script for the remote execution. " - "Currently support .py .ipynb and .sh scripts (python, jupyter notebook, bash) " + "Currently supports .py .ipynb and .sh scripts (python, jupyter notebook, bash) " "When used in tandem with --repo the script should be a relative path inside " - "the repository, for example: --script source/train.py " + "the repository, for example: --script source/train.py." "When used with --folder it supports a direct path to a file inside the local " - "repository itself, for example: --script ~/project/source/train.py", + "repository itself, for example: --script ~/project/source/train.py. " + "To run a bash script, simply specify the path of that script; the script should " + "have the .sh extension, for example: --script init.sh" + ) + parser.add_argument( + "--binary", + type=str, + default=None, + help="Binary used to launch the entry point. For example: '--binary python3', '--binary /bin/bash'." + "By default, the binary will be auto-detected." ) parser.add_argument( "--module", @@ -186,6 +195,8 @@ def cli(): print("Importing offline session: {}".format(args.import_offline_session)) Task.import_offline_session(args.import_offline_session) else: + if args.script and args.script.endswith(".sh") and not args.binary: + print("Detected shell script. Binary will be set to '/bin/bash'") create_populate = CreateAndPopulate( project_name=args.project, task_name=args.name, @@ -206,6 +217,7 @@ def cli(): add_task_init_call=not args.skip_task_init, raise_on_missing_entries=True, verbose=True, + binary=args.binary ) # verify args before creating the Task create_populate.update_task_args(args.args)