Add clearml-data close now auto uploads

This commit is contained in:
allegroai 2021-01-10 13:06:30 +02:00
parent cc40b04a3c
commit 4beb21eb2b
2 changed files with 28 additions and 9 deletions

View File

@ -116,7 +116,7 @@ def cli():
upload.add_argument('--id', type=str, required=False,
help='Previously created dataset id. Default: previously created/accessed dataset')
upload.add_argument('--storage', type=str, default=None,
help='Remote storage to use for the dataset (default: files server). '
help='Remote storage to use for the dataset files (default: files_server). '
'Examples: \'s3://bucket/data\', \'gs://bucket/data\', \'azure://bucket/data\', '
'\'/mnt/shared/folder/data\'')
upload.add_argument('--verbose', default=False, action='store_true', help='Verbose reporting')
@ -125,6 +125,12 @@ def cli():
finalize = subparsers.add_parser('close', help='Finalize and close the dataset (implies auto upload)')
finalize.add_argument('--id', type=str, required=False,
help='Previously created dataset id. Default: previously created/accessed dataset')
finalize.add_argument('--storage', type=str, default=None,
help='Remote storage to use for the dataset files (default: files_server). '
'Examples: \'s3://bucket/data\', \'gs://bucket/data\', \'azure://bucket/data\', '
'\'/mnt/shared/folder/data\'')
finalize.add_argument('--disable-upload', action='store_true', default=False,
help='Disable automatic upload when closing the dataset')
finalize.add_argument('--verbose', action='store_true', default=False, help='Verbose reporting')
finalize.set_defaults(func=ds_close)
@ -197,11 +203,7 @@ def cli():
args = restore_state(args)
if args.command:
try:
args.func(args)
except Exception as ex:
print('Error: {}'.format(ex))
return 1
args.func(args)
else:
parser.print_help()
return 0
@ -344,7 +346,12 @@ def ds_close(args):
print_args(args)
ds = Dataset.get(dataset_id=args.id)
if ds.is_dirty():
raise ValueError("Pending uploads, cannot finalize dataset. run `clearml-data upload`")
if args.disable_upload:
raise ValueError("Pending uploads, cannot finalize dataset. run `clearml-data upload`")
# upload the files
print("Pending uploads, starting dataset upload to {}".format(args.storage or ds.get_default_storage()))
ds.upload(show_progress=True, verbose=args.verbose, output_url=args.storage or None)
ds.finalize()
print('Dataset closed and finalized')
clear_state()
@ -397,7 +404,7 @@ def ds_add(args):
num_files += ds.add_files(
path=file, recursive=not args.non_recursive,
verbose=args.verbose, dataset_path=args.dataset_folder or None)
print('{} files added'.format(num_files))
print('{} file{} added'.format(num_files, 's' if num_files > 1 else ''))
return 0

View File

@ -358,7 +358,8 @@ class Dataset(object):
# start upload
zip_file_size = humanfriendly.format_size(Path(zip_file).stat().st_size)
self._task.get_logger().report_text(
'Uploading compressed dataset changes ({} files, total {})'.format(count, zip_file_size))
'Uploading compressed dataset changes ({} files, total {}) to {}'.format(
count, zip_file_size, self.get_default_storage()))
self._task.upload_artifact(
name=self.__data_entry_name, artifact_object=Path(zip_file), preview=archive_preview,
delete_after_upload=True, wait_on_upload=True)
@ -601,6 +602,17 @@ class Dataset(object):
pool.close()
return [f.relative_path for f in matching_errors if f is not None]
def get_default_storage(self):
# type: () -> Optional[str]
"""
Return the default storage location of the dataset
:return: URL for the default storage location
"""
if not self._task:
return None
return self._task.output_uri or self._task.get_logger().get_default_upload_destination()
@classmethod
def create(cls, dataset_name, dataset_project=None, parent_datasets=None):
# type: (str, Optional[str], Optional[Sequence[Union[str, Dataset]]]) -> Dataset