Add support for providing ExtraArgs to boto3 when uploading files using the sdk.aws.s3.extra_args configuration option

This commit is contained in:
allegroai 2022-08-09 09:43:30 +03:00
parent 7ed5264ca6
commit e5846f6ba2
5 changed files with 38 additions and 6 deletions

View File

@ -32,14 +32,18 @@ class S3BucketConfig(object):
region = attrib(type=str, converter=_none_to_empty_string, default="")
verify = attrib(type=bool, default=True)
use_credentials_chain = attrib(type=bool, default=False)
extra_args = attrib(type=dict, default=None)
def update(self, key, secret, multipart=True, region=None, use_credentials_chain=False, token=""):
def update(
self, key, secret, multipart=True, region=None, use_credentials_chain=False, token="", extra_args=None
):
self.key = key
self.secret = secret
self.token = token
self.multipart = multipart
self.region = region
self.use_credentials_chain = use_credentials_chain
self.extra_args = extra_args
def is_valid(self):
return (self.key and self.secret) or self.use_credentials_chain
@ -100,6 +104,7 @@ class S3BucketConfigurations(BaseBucketConfigurations):
default_region="",
default_use_credentials_chain=False,
default_token="",
default_extra_args=None,
):
super(S3BucketConfigurations, self).__init__()
self._buckets = buckets if buckets else list()
@ -109,6 +114,7 @@ class S3BucketConfigurations(BaseBucketConfigurations):
self._default_region = default_region
self._default_multipart = True
self._default_use_credentials_chain = default_use_credentials_chain
self._default_extra_args = default_extra_args
@classmethod
def from_config(cls, s3_configuration):
@ -121,13 +127,22 @@ class S3BucketConfigurations(BaseBucketConfigurations):
default_token = s3_configuration.get("token", "") or getenv("AWS_SESSION_TOKEN", "")
default_region = s3_configuration.get("region", "") or getenv("AWS_DEFAULT_REGION", "")
default_use_credentials_chain = s3_configuration.get("use_credentials_chain") or False
default_extra_args = s3_configuration.get("extra_args")
default_key = _none_to_empty_string(default_key)
default_secret = _none_to_empty_string(default_secret)
default_token = _none_to_empty_string(default_token)
default_region = _none_to_empty_string(default_region)
return cls(config_list, default_key, default_secret, default_region, default_use_credentials_chain, default_token)
return cls(
config_list,
default_key,
default_secret,
default_region,
default_use_credentials_chain,
default_token,
default_extra_args
)
def add_config(self, bucket_config):
self._buckets.insert(0, bucket_config)
@ -157,6 +172,7 @@ class S3BucketConfigurations(BaseBucketConfigurations):
multipart=bucket_config.multipart or self._default_multipart,
use_credentials_chain=self._default_use_credentials_chain,
token=self._default_token,
extra_args=self._default_extra_args,
)
def _get_prefix_from_bucket_config(self, config):
@ -221,7 +237,8 @@ class S3BucketConfigurations(BaseBucketConfigurations):
use_credentials_chain=self._default_use_credentials_chain,
bucket=bucket,
host=host,
token=self._default_token
token=self._default_token,
extra_args=self._default_extra_args,
)

View File

@ -410,6 +410,7 @@ class Config(object):
multipart=True,
bucket=bucket,
host=host,
extra_args=self.get("sdk.aws.s3.extra_args", None),
)
def set_overrides(self, *dicts):

View File

@ -75,6 +75,9 @@
# Refer to the latest Boto3 docs
use_credentials_chain: false
# Additional ExtraArgs passed to boto3 when uploading files. Can also be set per-bucket under "credentials".
extra_args: {}
credentials: [
# specifies key/secret credentials to use when handling s3 urls (read or write)
# {

View File

@ -344,7 +344,8 @@ class StorageHelper(object):
multipart=self._conf.multipart,
region=final_region,
use_credentials_chain=self._conf.use_credentials_chain,
token=token or self._conf.token
token=token or self._conf.token,
extra_args=self._conf.extra_args,
)
if not self._conf.use_credentials_chain:
@ -1441,12 +1442,16 @@ class _Boto3Driver(_Driver):
import boto3.s3.transfer
stream = _Stream(iterator)
try:
extra_args = {
'ContentType': get_file_mimetype(object_name),
**(container.config.extra_args or {})
}
container.bucket.upload_fileobj(stream, object_name, Config=boto3.s3.transfer.TransferConfig(
use_threads=container.config.multipart,
max_concurrency=self._max_multipart_concurrency if container.config.multipart else 1,
num_download_attempts=container.config.retries),
Callback=callback,
ExtraArgs={'ContentType': get_file_mimetype(object_name)}
ExtraArgs=extra_args,
)
except Exception as ex:
self.get_logger().error('Failed uploading: %s' % ex)
@ -1456,12 +1461,16 @@ class _Boto3Driver(_Driver):
def upload_object(self, file_path, container, object_name, callback=None, extra=None, **kwargs):
import boto3.s3.transfer
try:
extra_args = {
'ContentType': get_file_mimetype(object_name or file_path),
**(container.config.extra_args or {})
}
container.bucket.upload_file(file_path, object_name, Config=boto3.s3.transfer.TransferConfig(
use_threads=container.config.multipart,
max_concurrency=self._max_multipart_concurrency if container.config.multipart else 1,
num_download_attempts=container.config.retries),
Callback=callback,
ExtraArgs={'ContentType': get_file_mimetype(object_name or file_path)}
ExtraArgs=extra_args,
)
except Exception as ex:
self.get_logger().error('Failed uploading: %s' % ex)

View File

@ -85,6 +85,8 @@ sdk {
# Refer to the latest Boto3 docs
use_credentials_chain: false
# Additional ExtraArgs passed to boto3 when uploading files. Can also be set per-bucket under "credentials".
extra_args: {}
credentials: [
# specifies key/secret credentials to use when handling s3 urls (read or write)