diff --git a/clearml_agent/helper/docker_args.py b/clearml_agent/helper/docker_args.py index 3a1d252..0421251 100644 --- a/clearml_agent/helper/docker_args.py +++ b/clearml_agent/helper/docker_args.py @@ -17,6 +17,30 @@ if TYPE_CHECKING: from clearml_agent.session import Session +def sanitize_urls(s: str) -> Tuple[str, bool]: + """ + Replaces passwords in URLs with asterisks. + Returns the sanitized string and a boolean indicating whether sanitation was performed. + """ + regex = re.compile("^([^:]*:)[^@]+(.*)$") + tokens = re.split(r"\s", s) + changed = False + for k in range(len(tokens)): + if "@" in tokens[k]: + res = urlparse(tokens[k]) + if regex.match(res.netloc): + changed = True + tokens[k] = urlunparse(( + res.scheme, + regex.sub("\\1********\\2", res.netloc), + res.path, + res.params, + res.query, + res.fragment + )) + return " ".join(tokens) if changed else s, changed + + class DockerArgsSanitizer: @classmethod def sanitize_docker_command(cls, session, docker_command): @@ -62,11 +86,11 @@ class DockerArgsSanitizer: elif key in keys: val = "********" elif parse_embedded_urls: - val = cls._sanitize_urls(val)[0] + val = sanitize_urls(val)[0] result[i + 1] = "{}={}".format(key, val) skip_next = True elif parse_embedded_urls and not item.startswith("-"): - item, changed = cls._sanitize_urls(item) + item, changed = sanitize_urls(item) if changed: result[i] = item except (KeyError, TypeError): @@ -74,27 +98,6 @@ class DockerArgsSanitizer: return result - @staticmethod - def _sanitize_urls(s: str) -> Tuple[str, bool]: - """ Replaces passwords in URLs with asterisks """ - regex = re.compile("^([^:]*:)[^@]+(.*)$") - tokens = re.split(r"\s", s) - changed = False - for k in range(len(tokens)): - if "@" in tokens[k]: - res = urlparse(tokens[k]) - if regex.match(res.netloc): - changed = True - tokens[k] = urlunparse(( - res.scheme, - regex.sub("\\1********\\2", res.netloc), - res.path, - res.params, - res.query, - res.fragment - )) - return " ".join(tokens) if changed else s, changed - @staticmethod def get_list_of_switches(docker_args: List[str]) -> List[str]: args = [] diff --git a/clearml_agent/session.py b/clearml_agent/session.py index ee0226c..462d087 100644 --- a/clearml_agent/session.py +++ b/clearml_agent/session.py @@ -19,7 +19,7 @@ from clearml_agent.definitions import ENVIRONMENT_CONFIG, ENV_TASK_EXECUTE_AS_US from clearml_agent.errors import APIError from clearml_agent.helper.base import HOCONEncoder from clearml_agent.helper.process import Argv -from clearml_agent.helper.docker_args import DockerArgsSanitizer +from clearml_agent.helper.docker_args import DockerArgsSanitizer, sanitize_urls from .version import __version__ POETRY = "poetry" @@ -245,33 +245,43 @@ class Session(_Session): remove_secret_keys=("secret", "pass", "token", "account_key", "contents"), skip_value_keys=("environment", ), docker_args_sanitize_keys=("extra_docker_arguments", ), + sanitize_urls_keys=("extra_index_url", ), ): # remove all the secrets from the print - def recursive_remove_secrets(dictionary, secret_keys=(), empty_keys=()): + def recursive_remove_secrets(dictionary): for k in list(dictionary): - for s in secret_keys: + for s in remove_secret_keys: if s in k: dictionary.pop(k) break - for s in empty_keys: + for s in skip_value_keys: if s == k: dictionary[k] = {key: '****' for key in dictionary[k]} \ if isinstance(dictionary[k], dict) else '****' break + for s in sanitize_urls_keys: + if s == k: + value = dictionary.get(k, None) + if isinstance(value, str): + dictionary[k] = sanitize_urls(value)[0] + elif isinstance(value, (list, tuple)): + dictionary[k] = [sanitize_urls(v)[0] for v in value] + elif isinstance(value, dict): + dictionary[k] = {k_: sanitize_urls(v)[0] for k_, v in value.items()} if isinstance(dictionary.get(k, None), dict): - recursive_remove_secrets(dictionary[k], secret_keys=secret_keys, empty_keys=empty_keys) + recursive_remove_secrets(dictionary[k]) elif isinstance(dictionary.get(k, None), (list, tuple)): if k in (docker_args_sanitize_keys or []): dictionary[k] = DockerArgsSanitizer.sanitize_docker_command(self, dictionary[k]) for item in dictionary[k]: if isinstance(item, dict): - recursive_remove_secrets(item, secret_keys=secret_keys, empty_keys=empty_keys) + recursive_remove_secrets(item) config = deepcopy(self.config.to_dict()) # remove the env variable, it's not important config.pop('env', None) - if remove_secret_keys or skip_value_keys or docker_args_sanitize_keys: - recursive_remove_secrets(config, secret_keys=remove_secret_keys, empty_keys=skip_value_keys) + if remove_secret_keys or skip_value_keys or docker_args_sanitize_keys or sanitize_urls_keys: + recursive_remove_secrets(config) # remove logging.loggers.urllib3.level from the print try: config['logging']['loggers']['urllib3'].pop('level', None)