mirror of
https://github.com/clearml/clearml
synced 2025-02-07 21:33:25 +00:00
Add clearml-data close now auto uploads
This commit is contained in:
parent
cc40b04a3c
commit
4beb21eb2b
@ -116,7 +116,7 @@ def cli():
|
||||
upload.add_argument('--id', type=str, required=False,
|
||||
help='Previously created dataset id. Default: previously created/accessed dataset')
|
||||
upload.add_argument('--storage', type=str, default=None,
|
||||
help='Remote storage to use for the dataset (default: files server). '
|
||||
help='Remote storage to use for the dataset files (default: files_server). '
|
||||
'Examples: \'s3://bucket/data\', \'gs://bucket/data\', \'azure://bucket/data\', '
|
||||
'\'/mnt/shared/folder/data\'')
|
||||
upload.add_argument('--verbose', default=False, action='store_true', help='Verbose reporting')
|
||||
@ -125,6 +125,12 @@ def cli():
|
||||
finalize = subparsers.add_parser('close', help='Finalize and close the dataset (implies auto upload)')
|
||||
finalize.add_argument('--id', type=str, required=False,
|
||||
help='Previously created dataset id. Default: previously created/accessed dataset')
|
||||
finalize.add_argument('--storage', type=str, default=None,
|
||||
help='Remote storage to use for the dataset files (default: files_server). '
|
||||
'Examples: \'s3://bucket/data\', \'gs://bucket/data\', \'azure://bucket/data\', '
|
||||
'\'/mnt/shared/folder/data\'')
|
||||
finalize.add_argument('--disable-upload', action='store_true', default=False,
|
||||
help='Disable automatic upload when closing the dataset')
|
||||
finalize.add_argument('--verbose', action='store_true', default=False, help='Verbose reporting')
|
||||
finalize.set_defaults(func=ds_close)
|
||||
|
||||
@ -197,11 +203,7 @@ def cli():
|
||||
args = restore_state(args)
|
||||
|
||||
if args.command:
|
||||
try:
|
||||
args.func(args)
|
||||
except Exception as ex:
|
||||
print('Error: {}'.format(ex))
|
||||
return 1
|
||||
args.func(args)
|
||||
else:
|
||||
parser.print_help()
|
||||
return 0
|
||||
@ -344,7 +346,12 @@ def ds_close(args):
|
||||
print_args(args)
|
||||
ds = Dataset.get(dataset_id=args.id)
|
||||
if ds.is_dirty():
|
||||
raise ValueError("Pending uploads, cannot finalize dataset. run `clearml-data upload`")
|
||||
if args.disable_upload:
|
||||
raise ValueError("Pending uploads, cannot finalize dataset. run `clearml-data upload`")
|
||||
# upload the files
|
||||
print("Pending uploads, starting dataset upload to {}".format(args.storage or ds.get_default_storage()))
|
||||
ds.upload(show_progress=True, verbose=args.verbose, output_url=args.storage or None)
|
||||
|
||||
ds.finalize()
|
||||
print('Dataset closed and finalized')
|
||||
clear_state()
|
||||
@ -397,7 +404,7 @@ def ds_add(args):
|
||||
num_files += ds.add_files(
|
||||
path=file, recursive=not args.non_recursive,
|
||||
verbose=args.verbose, dataset_path=args.dataset_folder or None)
|
||||
print('{} files added'.format(num_files))
|
||||
print('{} file{} added'.format(num_files, 's' if num_files > 1 else ''))
|
||||
return 0
|
||||
|
||||
|
||||
|
@ -358,7 +358,8 @@ class Dataset(object):
|
||||
# start upload
|
||||
zip_file_size = humanfriendly.format_size(Path(zip_file).stat().st_size)
|
||||
self._task.get_logger().report_text(
|
||||
'Uploading compressed dataset changes ({} files, total {})'.format(count, zip_file_size))
|
||||
'Uploading compressed dataset changes ({} files, total {}) to {}'.format(
|
||||
count, zip_file_size, self.get_default_storage()))
|
||||
self._task.upload_artifact(
|
||||
name=self.__data_entry_name, artifact_object=Path(zip_file), preview=archive_preview,
|
||||
delete_after_upload=True, wait_on_upload=True)
|
||||
@ -601,6 +602,17 @@ class Dataset(object):
|
||||
pool.close()
|
||||
return [f.relative_path for f in matching_errors if f is not None]
|
||||
|
||||
def get_default_storage(self):
|
||||
# type: () -> Optional[str]
|
||||
"""
|
||||
Return the default storage location of the dataset
|
||||
|
||||
:return: URL for the default storage location
|
||||
"""
|
||||
if not self._task:
|
||||
return None
|
||||
return self._task.output_uri or self._task.get_logger().get_default_upload_destination()
|
||||
|
||||
@classmethod
|
||||
def create(cls, dataset_name, dataset_project=None, parent_datasets=None):
|
||||
# type: (str, Optional[str], Optional[Sequence[Union[str, Dataset]]]) -> Dataset
|
||||
|
Loading…
Reference in New Issue
Block a user