From 233f94f7411a36e89d4d101ce6ae541ec7ef9b4b Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Sun, 4 Feb 2024 19:31:30 +0200
Subject: [PATCH] Support AWS profile when specifying bucket storage
 credentials

---
 clearml/backend_config/bucket_config.py | 38 +++++++++++++++++++------
 clearml/storage/helper.py               | 23 ++++++++++-----
 docs/clearml.conf                       |  3 ++
 3 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/clearml/backend_config/bucket_config.py b/clearml/backend_config/bucket_config.py
index 0bc5ac8e..bbb9eb1d 100644
--- a/clearml/backend_config/bucket_config.py
+++ b/clearml/backend_config/bucket_config.py
@@ -34,9 +34,18 @@ class S3BucketConfig(object):
     verify = attrib(type=bool, default=None)
     use_credentials_chain = attrib(type=bool, default=False)
     extra_args = attrib(type=dict, default=None)
+    profile = attrib(type=str, default="")
 
     def update(
-        self, key, secret, multipart=True, region=None, use_credentials_chain=False, token="", extra_args=None
+        self,
+        key="",
+        secret="",
+        multipart=True,
+        region=None,
+        use_credentials_chain=False,
+        token="",
+        extra_args=None,
+        profile=""
     ):
         self.key = key
         self.secret = secret
@@ -45,6 +54,7 @@ class S3BucketConfig(object):
         self.region = region
         self.use_credentials_chain = use_credentials_chain
         self.extra_args = extra_args
+        self.profile = profile
 
     def is_valid(self):
         return (self.key and self.secret) or self.use_credentials_chain
@@ -107,6 +117,8 @@ class S3BucketConfigurations(BaseBucketConfigurations):
         default_token="",
         default_extra_args=None,
         default_verify=None,
+        default_profile="",
+        default_secure=True
     ):
         super(S3BucketConfigurations, self).__init__()
         self._buckets = buckets if buckets else list()
@@ -118,12 +130,12 @@ class S3BucketConfigurations(BaseBucketConfigurations):
         self._default_use_credentials_chain = default_use_credentials_chain
         self._default_extra_args = default_extra_args
         self._default_verify = default_verify
+        self._default_profile = default_profile
+        self._default_secure = default_secure
 
     @classmethod
     def from_config(cls, s3_configuration):
-        config_list = S3BucketConfig.from_list(
-            s3_configuration.get("credentials", [])
-        )
+        config_list = S3BucketConfig.from_list(s3_configuration.get("credentials", []))
 
         default_key = s3_configuration.get("key", "") or getenv("AWS_ACCESS_KEY_ID", "")
         default_secret = s3_configuration.get("secret", "") or getenv("AWS_SECRET_ACCESS_KEY", "")
@@ -132,11 +144,14 @@ class S3BucketConfigurations(BaseBucketConfigurations):
         default_use_credentials_chain = s3_configuration.get("use_credentials_chain") or False
         default_extra_args = s3_configuration.get("extra_args")
         default_verify = s3_configuration.get("verify", None)
+        default_profile = s3_configuration.get("profile", "") or getenv("AWS_PROFILE", "")
+        default_secure = s3_configuration.get("secure", True)
 
-        default_key = _none_to_empty_string(default_key)
-        default_secret = _none_to_empty_string(default_secret)
-        default_token = _none_to_empty_string(default_token)
-        default_region = _none_to_empty_string(default_region)
+        default_key = _none_to_empty_string(default_key).strip()
+        default_secret = _none_to_empty_string(default_secret).strip()
+        default_token = _none_to_empty_string(default_token).strip()
+        default_region = _none_to_empty_string(default_region).strip()
+        default_profile = _none_to_empty_string(default_profile).strip()
 
         return cls(
             config_list,
@@ -147,6 +162,8 @@ class S3BucketConfigurations(BaseBucketConfigurations):
             default_token,
             default_extra_args,
             default_verify,
+            default_profile,
+            default_secure
         )
 
     def add_config(self, bucket_config):
@@ -178,6 +195,8 @@ class S3BucketConfigurations(BaseBucketConfigurations):
             use_credentials_chain=self._default_use_credentials_chain,
             token=self._default_token,
             extra_args=self._default_extra_args,
+            profile=self._default_profile,
+            secure=self._default_secure
         )
 
     def _get_prefix_from_bucket_config(self, config):
@@ -204,7 +223,6 @@ class S3BucketConfigurations(BaseBucketConfigurations):
         :param uri: URI of bucket, directory or file
         :return: S3BucketConfig: bucket config
         """
-
         def find_match(uri):
             self._update_prefixes(refresh=False)
             uri = uri.lower()
@@ -244,6 +262,8 @@ class S3BucketConfigurations(BaseBucketConfigurations):
             host=host,
             token=self._default_token,
             extra_args=self._default_extra_args,
+            profile=self._default_profile,
+            secure=self._default_secure
         )
 
 
diff --git a/clearml/storage/helper.py b/clearml/storage/helper.py
index 77c8c020..c8e78f9f 100644
--- a/clearml/storage/helper.py
+++ b/clearml/storage/helper.py
@@ -461,15 +461,15 @@ class _Boto3Driver(_Driver):
                     )
                 }
                 if not cfg.use_credentials_chain:
-                    boto_kwargs["aws_access_key_id"] = cfg.key
-                    boto_kwargs["aws_secret_access_key"] = cfg.secret
+                    boto_kwargs["aws_access_key_id"] = cfg.key or None
+                    boto_kwargs["aws_secret_access_key"] = cfg.secret or None
                     if cfg.token:
                         boto_kwargs["aws_session_token"] = cfg.token
 
-                self.resource = boto3.resource(
-                    "s3",
-                    **boto_kwargs
+                boto_session = boto3.Session(
+                    profile_name=cfg.profile or None,
                 )
+                self.resource = boto_session.resource("s3", **boto_kwargs)
 
                 self.config = cfg
                 bucket_name = self.name[len(cfg.host) + 1:] if cfg.host else self.name
@@ -683,7 +683,12 @@ class _Boto3Driver(_Driver):
                 'time': datetime.utcnow().isoformat()
             }
 
-            boto_session = boto3.Session(conf.key, conf.secret, aws_session_token=conf.token)
+            boto_session = boto3.Session(
+                aws_access_key_id=conf.key or None,
+                aws_secret_access_key=conf.secret or None,
+                aws_session_token=conf.token or None,
+                profile_name=conf.profile or None
+            )
             endpoint = (('https://' if conf.secure else 'http://') + conf.host) if conf.host else None
             boto_resource = boto_session.resource('s3', region_name=conf.region or None, endpoint_url=endpoint)
             bucket = boto_resource.Bucket(bucket_name)
@@ -738,7 +743,9 @@ class _Boto3Driver(_Driver):
                 cls._bucket_location_failure_reported.add(conf.get_bucket_host())
 
         try:
-            boto_session = boto3.Session(conf.key, conf.secret, aws_session_token=conf.token)
+            boto_session = boto3.Session(
+                conf.key, conf.secret, aws_session_token=conf.token, profile_name=conf.profile_name or None
+            )
             boto_resource = boto_session.resource('s3')
             return boto_resource.meta.client.get_bucket_location(Bucket=conf.bucket)["LocationConstraint"]
 
@@ -2018,6 +2025,7 @@ class StorageHelper(object):
         logger=None,
         retries=5,
         token=None,
+        profile=None,
         **kwargs
     ):
         level = config.get("storage.log.level", None)
@@ -2072,6 +2080,7 @@ class StorageHelper(object):
                 region=final_region,
                 use_credentials_chain=self._conf.use_credentials_chain,
                 token=token or self._conf.token,
+                profile=profile or self._conf.profile,
                 extra_args=self._conf.extra_args,
             )
 
diff --git a/docs/clearml.conf b/docs/clearml.conf
index e7b045c5..09e3ecc3 100644
--- a/docs/clearml.conf
+++ b/docs/clearml.conf
@@ -92,6 +92,8 @@ sdk {
             # Specify explicit keys
             key: ""
             secret: ""
+            # Specify profile
+            profile: ""
             # Or enable credentials chain to let Boto3 pick the right credentials. 
             # This includes picking credentials from environment variables, 
             # credential file and IAM role using metadata service. 
@@ -103,6 +105,7 @@ sdk {
 
             credentials: [
                 # specifies key/secret credentials to use when handling s3 urls (read or write)
+                # Note that all all fields in the global s3 config section are supported here
                 # {
                 #     bucket: "my-bucket-name"
                 #     key: "my-access-key"