diff --git a/clearml/backend_config/bucket_config.py b/clearml/backend_config/bucket_config.py index 0d6c94ac..62fc5b74 100644 --- a/clearml/backend_config/bucket_config.py +++ b/clearml/backend_config/bucket_config.py @@ -32,14 +32,18 @@ class S3BucketConfig(object): region = attrib(type=str, converter=_none_to_empty_string, default="") verify = attrib(type=bool, default=True) use_credentials_chain = attrib(type=bool, default=False) + extra_args = attrib(type=dict, default=None) - def update(self, key, secret, multipart=True, region=None, use_credentials_chain=False, token=""): + def update( + self, key, secret, multipart=True, region=None, use_credentials_chain=False, token="", extra_args=None + ): self.key = key self.secret = secret self.token = token self.multipart = multipart self.region = region self.use_credentials_chain = use_credentials_chain + self.extra_args = extra_args def is_valid(self): return (self.key and self.secret) or self.use_credentials_chain @@ -100,6 +104,7 @@ class S3BucketConfigurations(BaseBucketConfigurations): default_region="", default_use_credentials_chain=False, default_token="", + default_extra_args=None, ): super(S3BucketConfigurations, self).__init__() self._buckets = buckets if buckets else list() @@ -109,6 +114,7 @@ class S3BucketConfigurations(BaseBucketConfigurations): self._default_region = default_region self._default_multipart = True self._default_use_credentials_chain = default_use_credentials_chain + self._default_extra_args = default_extra_args @classmethod def from_config(cls, s3_configuration): @@ -121,13 +127,22 @@ class S3BucketConfigurations(BaseBucketConfigurations): default_token = s3_configuration.get("token", "") or getenv("AWS_SESSION_TOKEN", "") default_region = s3_configuration.get("region", "") or getenv("AWS_DEFAULT_REGION", "") default_use_credentials_chain = s3_configuration.get("use_credentials_chain") or False + default_extra_args = s3_configuration.get("extra_args") default_key = _none_to_empty_string(default_key) default_secret = _none_to_empty_string(default_secret) default_token = _none_to_empty_string(default_token) default_region = _none_to_empty_string(default_region) - return cls(config_list, default_key, default_secret, default_region, default_use_credentials_chain, default_token) + return cls( + config_list, + default_key, + default_secret, + default_region, + default_use_credentials_chain, + default_token, + default_extra_args + ) def add_config(self, bucket_config): self._buckets.insert(0, bucket_config) @@ -157,6 +172,7 @@ class S3BucketConfigurations(BaseBucketConfigurations): multipart=bucket_config.multipart or self._default_multipart, use_credentials_chain=self._default_use_credentials_chain, token=self._default_token, + extra_args=self._default_extra_args, ) def _get_prefix_from_bucket_config(self, config): @@ -221,7 +237,8 @@ class S3BucketConfigurations(BaseBucketConfigurations): use_credentials_chain=self._default_use_credentials_chain, bucket=bucket, host=host, - token=self._default_token + token=self._default_token, + extra_args=self._default_extra_args, ) diff --git a/clearml/backend_config/config.py b/clearml/backend_config/config.py index c2650639..674cd7b4 100644 --- a/clearml/backend_config/config.py +++ b/clearml/backend_config/config.py @@ -410,6 +410,7 @@ class Config(object): multipart=True, bucket=bucket, host=host, + extra_args=self.get("sdk.aws.s3.extra_args", None), ) def set_overrides(self, *dicts): diff --git a/clearml/config/default/sdk.conf b/clearml/config/default/sdk.conf index 1caefe4e..41df772f 100644 --- a/clearml/config/default/sdk.conf +++ b/clearml/config/default/sdk.conf @@ -75,6 +75,9 @@ # Refer to the latest Boto3 docs use_credentials_chain: false + # Additional ExtraArgs passed to boto3 when uploading files. Can also be set per-bucket under "credentials". + extra_args: {} + credentials: [ # specifies key/secret credentials to use when handling s3 urls (read or write) # { diff --git a/clearml/storage/helper.py b/clearml/storage/helper.py index 8b2ffce8..a3789eed 100644 --- a/clearml/storage/helper.py +++ b/clearml/storage/helper.py @@ -344,7 +344,8 @@ class StorageHelper(object): multipart=self._conf.multipart, region=final_region, use_credentials_chain=self._conf.use_credentials_chain, - token=token or self._conf.token + token=token or self._conf.token, + extra_args=self._conf.extra_args, ) if not self._conf.use_credentials_chain: @@ -1441,12 +1442,16 @@ class _Boto3Driver(_Driver): import boto3.s3.transfer stream = _Stream(iterator) try: + extra_args = { + 'ContentType': get_file_mimetype(object_name), + **(container.config.extra_args or {}) + } container.bucket.upload_fileobj(stream, object_name, Config=boto3.s3.transfer.TransferConfig( use_threads=container.config.multipart, max_concurrency=self._max_multipart_concurrency if container.config.multipart else 1, num_download_attempts=container.config.retries), Callback=callback, - ExtraArgs={'ContentType': get_file_mimetype(object_name)} + ExtraArgs=extra_args, ) except Exception as ex: self.get_logger().error('Failed uploading: %s' % ex) @@ -1456,12 +1461,16 @@ class _Boto3Driver(_Driver): def upload_object(self, file_path, container, object_name, callback=None, extra=None, **kwargs): import boto3.s3.transfer try: + extra_args = { + 'ContentType': get_file_mimetype(object_name or file_path), + **(container.config.extra_args or {}) + } container.bucket.upload_file(file_path, object_name, Config=boto3.s3.transfer.TransferConfig( use_threads=container.config.multipart, max_concurrency=self._max_multipart_concurrency if container.config.multipart else 1, num_download_attempts=container.config.retries), Callback=callback, - ExtraArgs={'ContentType': get_file_mimetype(object_name or file_path)} + ExtraArgs=extra_args, ) except Exception as ex: self.get_logger().error('Failed uploading: %s' % ex) diff --git a/docs/clearml.conf b/docs/clearml.conf index 92c009ce..8a47fc3d 100644 --- a/docs/clearml.conf +++ b/docs/clearml.conf @@ -85,6 +85,8 @@ sdk { # Refer to the latest Boto3 docs use_credentials_chain: false + # Additional ExtraArgs passed to boto3 when uploading files. Can also be set per-bucket under "credentials". + extra_args: {} credentials: [ # specifies key/secret credentials to use when handling s3 urls (read or write)