From 81b4c49f8b27d21805d9a7b1275f07a12886eafd Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Thu, 4 Jul 2024 15:24:40 +0300 Subject: [PATCH] Add clearml-task and CreateAndPopulate support for bash scripts, ipynb and python modules. requires clearml-agent 1.9+ --- clearml/backend_interface/task/populate.py | 191 ++++++++++++++++----- clearml/cli/task/__main__.py | 8 +- 2 files changed, 155 insertions(+), 44 deletions(-) diff --git a/clearml/backend_interface/task/populate.py b/clearml/backend_interface/task/populate.py index 12dbb6fc..35af6d77 100644 --- a/clearml/backend_interface/task/populate.py +++ b/clearml/backend_interface/task/populate.py @@ -36,6 +36,7 @@ class CreateAndPopulate(object): commit=None, # type: Optional[str] script=None, # type: Optional[str] working_directory=None, # type: Optional[str] + module=None, # type: Optional[str] packages=None, # type: Optional[Union[bool, Sequence[str]]] requirements_file=None, # type: Optional[Union[str, Path]] docker=None, # type: Optional[str] @@ -67,6 +68,9 @@ class CreateAndPopulate(object): remote git repository the script should be a relative path inside the repository, for example: './source/train.py' . When used with local repository path it supports a direct path to a file inside the local repository itself, for example: '~/project/source/train.py' + :param module: If specified instead of executing `script`, a module named `module` is executed. + Implies script is empty. Module can contain multiple argument for execution, + for example: module="my.module arg1 arg2" :param working_directory: Working directory to launch the script from. Default: repository root folder. Relative to repo root or local folder. :param packages: Manually specify a list of required packages. Example: ["tqdm>=2.1", "scikit-learn"] @@ -92,10 +96,14 @@ class CreateAndPopulate(object): repo = None else: folder = None + + if script and module: + raise ValueError("Entry point script or module need to be specified not both") + if raise_on_missing_entries and not base_task_id: - if not script: + if not script and not module: raise ValueError("Entry point script not provided") - if not repo and not folder and not Path(script).is_file(): + if not repo and not folder and (script and not Path(script).is_file()): raise ValueError("Script file \'{}\' could not be found".format(script)) if raise_on_missing_entries and commit and branch: raise ValueError( @@ -111,6 +119,7 @@ class CreateAndPopulate(object): self.branch = branch self.repo = repo self.script = script + self.module = module self.cwd = working_directory assert not packages or isinstance(packages, (tuple, list, bool)) self.packages = list(packages) if packages is not None and not isinstance(packages, bool) \ @@ -138,21 +147,47 @@ class CreateAndPopulate(object): """ local_entry_file = None repo_info = None + stand_alone_script_outside_repo = False + # populate from local repository / script if self.folder or (self.script and Path(self.script).is_file() and not self.repo): self.folder = os.path.expandvars(os.path.expanduser(self.folder)) if self.folder else None self.script = os.path.expandvars(os.path.expanduser(self.script)) if self.script else None self.cwd = os.path.expandvars(os.path.expanduser(self.cwd)) if self.cwd else None - if Path(self.script).is_file(): - entry_point = self.script - else: - entry_point = (Path(self.folder) / self.script).as_posix() - entry_point = os.path.abspath(entry_point) - if not os.path.isfile(entry_point): - raise ValueError("Script entrypoint file \'{}\' could not be found".format(entry_point)) - local_entry_file = entry_point + if self.module: + entry_point = "-m {}".format(self.module) + # we must have a folder if we are here + local_entry_file = self.folder.rstrip("/") + "/." + else: + if Path(self.script).is_file(): + entry_point = self.script + else: + entry_point = (Path(self.folder) / self.script).as_posix() + + entry_point = os.path.abspath(entry_point) + + try: + if entry_point and Path(entry_point).is_file() and self.folder and Path(self.folder).is_dir(): + # make sure we raise exception if this is outside the local repo folder + entry_point = (Path(entry_point) / (Path(entry_point).relative_to(self.folder))).as_posix() + except ValueError: + entry_point = self.folder + stand_alone_script_outside_repo = True + + if not os.path.isfile(entry_point) and not stand_alone_script_outside_repo: + if (not Path(self.script).is_absolute() and not Path(self.cwd).is_absolute() and + (Path(self.folder) / self.cwd / self.script).is_file()): + entry_point = (Path(self.folder) / self.cwd / self.script).as_posix() + elif (Path(self.cwd).is_absolute() and not Path(self.script).is_absolute() and + (Path(self.cwd) / self.script).is_file()): + entry_point = (Path(self.cwd) / self.script).as_posix() + else: + raise ValueError("Script entrypoint file \'{}\' could not be found".format(entry_point)) + + local_entry_file = entry_point + repo_info, requirements = ScriptInfo.get( - filepaths=[entry_point], + filepaths=[local_entry_file], log=getLogger(), create_requirements=self.packages is True, uncommitted_from_remote=True, @@ -162,6 +197,28 @@ class CreateAndPopulate(object): force_single_script=self.force_single_script_file, ) + if stand_alone_script_outside_repo: + # if we have a standalone script and a local repo we skip[ the local diff and store it + local_entry_file = Path(self.script).as_posix() + a_create_requirements = self.packages is True + a_repo_info, a_requirements = ScriptInfo.get( + filepaths=[Path(self.script).as_posix()], + log=getLogger(), + create_requirements=a_create_requirements, + uncommitted_from_remote=True, + detect_jupyter_notebook=False, + add_missing_installed_packages=True, + detailed_req_report=False, + force_single_script=True, + ) + if repo_info.script['diff']: + print("Warning: local git repo diff is ignored, " + "storing only the standalone script form {}".format(self.script)) + repo_info.script['diff'] = a_repo_info.script['diff'] or '' + repo_info.script['entry_point'] = a_repo_info.script['entry_point'] + if a_create_requirements: + repo_info['requirements'] = a_repo_info.script.get('requirements') or {} + # check if we have no repository and no requirements raise error if self.raise_on_missing_entries and (not self.requirements_file and not self.packages) \ and not self.repo and ( @@ -195,7 +252,7 @@ class CreateAndPopulate(object): # if there is nothing to populate, return if not any([ - self.folder, self.commit, self.branch, self.repo, self.script, self.cwd, + self.folder, self.commit, self.branch, self.repo, self.script, self.module, self.cwd, self.packages, self.requirements_file, self.base_task_id] + (list(self.docker.values())) ): return task @@ -209,31 +266,63 @@ class CreateAndPopulate(object): task_state['script']['diff'] = repo_info.script['diff'] or '' task_state['script']['working_dir'] = repo_info.script['working_dir'] task_state['script']['entry_point'] = repo_info.script['entry_point'] - task_state['script']['binary'] = repo_info.script['binary'] + task_state['script']['binary'] = '/bin/bash' if ( + (repo_info.script['entry_point'] or '').lower().strip().endswith('.sh') and + not (repo_info.script['entry_point'] or '').lower().strip().startswith('-m ')) \ + else repo_info.script['binary'] task_state['script']['requirements'] = repo_info.script.get('requirements') or {} if self.cwd: - self.cwd = self.cwd - # cwd should be relative to the repo_root, but we need the full path - # (repo_root + cwd) in order to resolve the entry point - cwd = (Path(repo_info.script['repo_root']) / self.cwd).as_posix() + cwd = self.cwd + if not Path(cwd).is_absolute(): + # cwd should be relative to the repo_root, but we need the full path + # (repo_root + cwd) in order to resolve the entry point + cwd = os.path.normpath((Path(repo_info.script['repo_root']) / self.cwd).as_posix()) + if not Path(cwd).is_dir(): + # we need to leave it as is, we have no idea, and this is a repo + cwd = self.cwd - if not Path(cwd).is_dir(): + elif not Path(cwd).is_dir(): + # we were passed an absolute dir and it does not exist raise ValueError("Working directory \'{}\' could not be found".format(cwd)) - entry_point = \ - Path(repo_info.script['repo_root']) / repo_info.script['working_dir'] / repo_info.script[ - 'entry_point'] - # resolve entry_point relative to the current working directory - entry_point = entry_point.relative_to(cwd).as_posix() + + if self.module: + entry_point = "-m {}".format(self.module) + elif stand_alone_script_outside_repo: + # this should be relative and the temp file we generated + entry_point = repo_info.script['entry_point'] + else: + entry_point = os.path.normpath( + Path(repo_info.script['repo_root']) / + repo_info.script['working_dir'] / repo_info.script['entry_point'] + ) + # resolve entry_point relative to the current working directory + if Path(cwd).is_absolute(): + entry_point = Path(entry_point).relative_to(cwd).as_posix() + else: + entry_point = repo_info.script['entry_point'] + # restore cwd - make it relative to the repo_root again - cwd = Path(cwd).relative_to(repo_info.script['repo_root']).as_posix() + if Path(cwd).is_absolute(): + # now cwd is relative again + cwd = Path(cwd).relative_to(repo_info.script['repo_root']).as_posix() + + # make sure we always have / (never \\) + if platform == "win32": + entry_point = entry_point.replace('\\', '/') if entry_point else "" + cwd = cwd.replace('\\', '/') if cwd else "" + task_state['script']['entry_point'] = entry_point or "" task_state['script']['working_dir'] = cwd or "." elif self.repo: - # normalize backslashes and remove first one - entry_point = '/'.join([p for p in self.script.split('/') if p and p != '.']) cwd = '/'.join([p for p in (self.cwd or '.').split('/') if p and p != '.']) - if cwd and entry_point.startswith(cwd + '/'): - entry_point = entry_point[len(cwd) + 1:] + # normalize backslashes and remove first one + if self.module: + entry_point = "-m {}".format(self.module) + else: + entry_point = '/'.join([p for p in self.script.split('/') if p and p != '.']) + if cwd and entry_point.startswith(cwd + '/'): + entry_point = entry_point[len(cwd) + 1:] + task_state['script']['repository'] = self.repo task_state['script']['version_num'] = self.commit or None task_state['script']['branch'] = self.branch or None @@ -241,7 +330,9 @@ class CreateAndPopulate(object): task_state['script']['working_dir'] = cwd or '.' task_state['script']['entry_point'] = entry_point or "" - if self.force_single_script_file and Path(self.script).is_file(): + if self.script and Path(self.script).is_file() and ( + self.force_single_script_file or Path(self.script).is_absolute()): + self.force_single_script_file = True create_requirements = self.packages is True repo_info, requirements = ScriptInfo.get( filepaths=[Path(self.script).as_posix()], @@ -251,15 +342,20 @@ class CreateAndPopulate(object): detect_jupyter_notebook=False, add_missing_installed_packages=True, detailed_req_report=False, - force_single_script=self.force_single_script_file, + force_single_script=True, ) + task_state['script']['binary'] = '/bin/bash' if ( + (repo_info.script['entry_point'] or '').lower().strip().endswith('.sh') and + not (repo_info.script['entry_point'] or '').lower().strip().startswith('-m ')) \ + else repo_info.script['binary'] task_state['script']['diff'] = repo_info.script['diff'] or '' task_state['script']['entry_point'] = repo_info.script['entry_point'] if create_requirements: task_state['script']['requirements'] = repo_info.script.get('requirements') or {} else: # standalone task - task_state['script']['entry_point'] = self.script or "" + task_state['script']['entry_point'] = self.script if self.script else \ + ("-m {}".format(self.module) if self.module else "") task_state['script']['working_dir'] = '.' # update requirements reqs = [] @@ -300,7 +396,8 @@ class CreateAndPopulate(object): idx_a = 0 lines = None # find the right entry for the patch if we have a local file (basically after __future__ - if local_entry_file: + if (local_entry_file and not stand_alone_script_outside_repo and not self.module and + str(local_entry_file).lower().endswith(".py")): with open(local_entry_file, 'rt') as f: lines = f.readlines() future_found = self._locate_future_import(lines) @@ -308,7 +405,8 @@ class CreateAndPopulate(object): idx_a = future_found + 1 task_init_patch = '' - if self.repo or task_state.get('script', {}).get('repository'): + if ((self.repo or task_state.get('script', {}).get('repository')) and + not self.force_single_script_file and not stand_alone_script_outside_repo): # if we do not have requirements, add clearml to the requirements.txt if not reqs: task_init_patch += \ @@ -319,26 +417,33 @@ class CreateAndPopulate(object): "+clearml\n" # Add Task.init call - task_init_patch += \ - "diff --git a{script_entry} b{script_entry}\n" \ - "--- a{script_entry}\n" \ - "+++ b{script_entry}\n" \ - "@@ -{idx_a},0 +{idx_b},3 @@\n" \ - "+from clearml import Task\n" \ - "+(__name__ != \"__main__\") or Task.init()\n" \ - "+\n".format( - script_entry=script_entry, idx_a=idx_a, idx_b=idx_a + 1) + if not self.module and script_entry and str(script_entry).lower().endswith(".py"): + task_init_patch += \ + "diff --git a{script_entry} b{script_entry}\n" \ + "--- a{script_entry}\n" \ + "+++ b{script_entry}\n" \ + "@@ -{idx_a},0 +{idx_b},3 @@\n" \ + "+from clearml import Task\n" \ + "+(__name__ != \"__main__\") or Task.init()\n" \ + "+\n".format( + script_entry=script_entry, idx_a=idx_a, idx_b=idx_a + 1) + elif self.module: + # if we are here, do nothing + pass elif local_entry_file and lines: # if we are here it means we do not have a git diff, but a single script file init_lines = ["from clearml import Task\n", "(__name__ != \"__main__\") or Task.init()\n\n"] task_state['script']['diff'] = ''.join(lines[:idx_a] + init_lines + lines[idx_a:]) # no need to add anything, we patched it. task_init_patch = "" - else: + elif str(script_entry or "").lower().endswith(".py"): # Add Task.init call + # if we are here it means we do not have a git diff, but a single script file task_init_patch += \ "from clearml import Task\n" \ "(__name__ != \"__main__\") or Task.init()\n\n" + task_state['script']['diff'] = task_init_patch + task_state['script'].get('diff', '') + task_init_patch = "" # make sure we add the diff at the end of the current diff task_state['script']['diff'] = task_state['script'].get('diff', '') diff --git a/clearml/cli/task/__main__.py b/clearml/cli/task/__main__.py index cb5007e6..05a98b42 100644 --- a/clearml/cli/task/__main__.py +++ b/clearml/cli/task/__main__.py @@ -37,10 +37,15 @@ def setup_parser(parser): 'and will be replicated on the remote machine') parser.add_argument('--script', type=str, default=None, help='Specify the entry point script for the remote execution. ' + 'Currently support .py .ipynb and .sh scripts (python, jupyter notebook, bash) ' 'When used in tandem with --repo the script should be a relative path inside ' - 'the repository, for example: --script source/train.py .' + 'the repository, for example: --script source/train.py ' 'When used with --folder it supports a direct path to a file inside the local ' 'repository itself, for example: --script ~/project/source/train.py') + parser.add_argument('--module', type=str, default=None, + help='Instead of a script entry point, specify a python module to be remotely executed. ' + 'Notice: It cannot be used with --script at the same time. ' + 'for example: --module "torch.distributed.launch train_script.py"') parser.add_argument('--cwd', type=str, default=None, help='Working directory to launch the script from. Default: repository root folder. ' 'Relative to repo root or local folder') @@ -125,6 +130,7 @@ def cli(): branch=args.branch, commit=args.commit, script=args.script, + module=args.module, working_directory=args.cwd, packages=args.packages, requirements_file=args.requirements,