diff --git a/clearml/storage/callbacks.py b/clearml/storage/callbacks.py index 29359e56..20d94f2f 100644 --- a/clearml/storage/callbacks.py +++ b/clearml/storage/callbacks.py @@ -6,6 +6,9 @@ from ..config import config class ProgressReport(object): + report_upload_chunk_size_mb = None + report_download_chunk_size_mb = None + def __init__(self, verbose, total_size, log, report_chunk_size_mb): self.current_status_mb = 0. self.last_reported = 0. @@ -34,9 +37,10 @@ class ProgressReport(object): class UploadProgressReport(ProgressReport): - def __init__(self, filename, verbose, total_size, log, report_chunk_size_mb=0): - if not report_chunk_size_mb: - report_chunk_size_mb = int(config.get('storage.log.report_upload_chunk_size_mb', 0) or 5) + def __init__(self, filename, verbose, total_size, log, report_chunk_size_mb=None): + report_chunk_size_mb = report_chunk_size_mb if report_chunk_size_mb is not None \ + else ProgressReport.report_upload_chunk_size_mb or \ + int(config.get("storage.log.report_upload_chunk_size_mb", 5)) super(UploadProgressReport, self).__init__(verbose, total_size, log, report_chunk_size_mb) self._filename = filename @@ -74,10 +78,10 @@ class UploadProgressReport(ProgressReport): class DownloadProgressReport(ProgressReport): - def __init__(self, total_size, verbose, remote_path, log, report_chunk_size_mb=0): - if not report_chunk_size_mb: - report_chunk_size_mb = int(config.get('storage.log.report_download_chunk_size_mb', 0) or 5) - + def __init__(self, total_size, verbose, remote_path, log, report_chunk_size_mb=None): + report_chunk_size_mb = report_chunk_size_mb if report_chunk_size_mb is not None \ + else ProgressReport.report_download_chunk_size_mb or \ + int(config.get("storage.log.report_download_chunk_size_mb", 5)) super(DownloadProgressReport, self).__init__(verbose, total_size, log, report_chunk_size_mb) self._remote_path = remote_path diff --git a/clearml/storage/helper.py b/clearml/storage/helper.py index 9d1a1e75..c2a802d3 100644 --- a/clearml/storage/helper.py +++ b/clearml/storage/helper.py @@ -2051,7 +2051,7 @@ class _AzureBlobServiceStorageDriver(_Driver): self, container_name, object_name, blob_name, data, max_connections=None, progress_callback=None, content_settings=None ): - max_connections = max_connections or _AzureBlobServiceStorageDriver._max_connections + max_connections = max_connections or int(_AzureBlobServiceStorageDriver._max_connections) if self.__legacy: self.__blob_service.create_blob_from_bytes( container_name, @@ -2071,7 +2071,7 @@ class _AzureBlobServiceStorageDriver(_Driver): def create_blob_from_path( self, container_name, blob_name, path, max_connections=None, content_settings=None, progress_callback=None ): - max_connections = max_connections or _AzureBlobServiceStorageDriver._max_connections + max_connections = max_connections or int(_AzureBlobServiceStorageDriver._max_connections) if self.__legacy: self.__blob_service.create_blob_from_path( container_name, @@ -2131,7 +2131,7 @@ class _AzureBlobServiceStorageDriver(_Driver): return client.download_blob().content_as_bytes() def get_blob_to_path(self, container_name, blob_name, path, max_connections=None, progress_callback=None): - max_connections = max_connections or _AzureBlobServiceStorageDriver._max_connections + max_connections = max_connections or int(_AzureBlobServiceStorageDriver._max_connections) if self.__legacy: return self.__blob_service.get_blob_to_path( container_name, diff --git a/clearml/storage/manager.py b/clearml/storage/manager.py index 5fb4f2da..e3d1418e 100644 --- a/clearml/storage/manager.py +++ b/clearml/storage/manager.py @@ -12,6 +12,7 @@ from six.moves.urllib.parse import urlparse from pathlib2 import Path from .cache import CacheManager +from .callbacks import ProgressReport from .helper import StorageHelper from .util import encode_string_to_filename, safe_extract from ..debugging.log import LoggerRoot @@ -463,7 +464,7 @@ class StorageManager(object): @classmethod def get_metadata(cls, remote_url, return_full_path=False): - # type: (str) -> Optional[dict] + # type: (str, bool) -> Optional[dict] """ Get the metadata of the a remote object. The metadata is a dict containing the following keys: `name`, `size`. @@ -471,6 +472,7 @@ class StorageManager(object): :param str remote_url: Source remote storage location, tree structure of `remote_url` will be created under the target local_folder. Supports S3/GS/Azure, shared filesystem and http(s). Example: 's3://bucket/data/' + :param return_full_path: True for returning a full path (with the base url) :return: A dict containing the metadata of the remote object. In case of an error, `None` is returned """ @@ -482,3 +484,33 @@ class StorageManager(object): if return_full_path and not metadata["name"].startswith(helper.base_url): metadata["name"] = helper.base_url + ("/" if not helper.base_url.endswith("/") else "") + metadata["name"] return metadata + + @classmethod + def set_report_upload_chunk_size(cls, chunk_size_mb): + # type: (int) -> () + """ + Set the upload progress report chunk size (in MB). The chunk size + determines how often the progress reports are logged: + every time a chunk of data with a size greater than `chunk_size_mb` + is uploaded, log the report. + This function overwrites the `sdk.storage.log.report_upload_chunk_size_mb` + config entry + + :param chunk_size_mb: The chunk size, in megabytes + """ + ProgressReport.report_upload_chunk_size_mb = int(chunk_size_mb) + + @classmethod + def set_report_download_chunk_size(cls, chunk_size_mb): + # type: (int) -> () + """ + Set the download progress report chunk size (in MB). The chunk size + determines how often the progress reports are logged: + every time a chunk of data with a size greater than `chunk_size_mb` + is downloaded, log the report. + This function overwrites the `sdk.storage.log.report_download_chunk_size_mb` + config entry + + :param chunk_size_mb: The chunk size, in megabytes + """ + ProgressReport.report_download_chunk_size_mb = int(chunk_size_mb)