Fix docker extra args showing up in configuration printout

This commit is contained in:
allegroai 2022-10-23 12:37:13 +03:00
parent e8493d3807
commit f0fd62a28f
3 changed files with 101 additions and 77 deletions
clearml_agent

View File

@ -32,7 +32,6 @@ import six
from pathlib2 import Path
from pyhocon import ConfigTree, ConfigFactory
from six.moves.urllib.parse import quote
from six.moves.urllib.parse import urlparse, urlunparse
from clearml_agent.backend_api.services import auth as auth_api
from clearml_agent.backend_api.services import queues as queues_api
@ -140,6 +139,7 @@ from clearml_agent.helper.repo import clone_repository_cached, RepoInfo, VCS, fi
from clearml_agent.helper.resource_monitor import ResourceMonitor
from clearml_agent.helper.runtime_verification import check_runtime, print_uptime_properties
from clearml_agent.helper.singleton import Singleton
from clearml_agent.helper.docker_agrs import DockerArgsSanitizer
from clearml_agent.session import Session
from .events import Events
@ -799,7 +799,7 @@ class Worker(ServiceCommandSection):
lines=
['Running Task {} inside {}docker: {} arguments: {}\n'.format(
task_id, "default " if default_docker else '',
docker_image, self._sanitize_docker_command(docker_arguments or []))]
docker_image, DockerArgsSanitizer.sanitize_docker_command(self._session, docker_arguments or []))]
+ (['custom_setup_bash_script:\n{}'.format(docker_setup_script)] if docker_setup_script else []),
level="INFO",
session=task_session,
@ -862,7 +862,7 @@ class Worker(ServiceCommandSection):
'--standalone-mode' if self._standalone_mode else '',
task_id)
display_docker_command = self._sanitize_docker_command(full_docker_cmd)
display_docker_command = DockerArgsSanitizer.sanitize_docker_command(self._session, full_docker_cmd)
# send the actual used command line to the backend
self.send_logs(
@ -2603,7 +2603,9 @@ class Worker(ServiceCommandSection):
print("Executing task id [%s]:" % current_task.id)
sanitized_execution = attr.evolve(
execution,
docker_cmd=" ".join(self._sanitize_docker_command(shlex.split(execution.docker_cmd or ""))),
docker_cmd=" ".join(DockerArgsSanitizer.sanitize_docker_command(
self._session, shlex.split(execution.docker_cmd or ""))
),
)
for pair in attr.asdict(sanitized_execution).items():
print("{} = {}".format(*pair))
@ -3418,7 +3420,7 @@ class Worker(ServiceCommandSection):
print("Running in Docker{} mode (v19.03 and above) - using default docker image: {} {}\n".format(
' *standalone*' if self._standalone_mode else '', self._docker_image,
self._sanitize_docker_command(self._docker_arguments) or ''))
DockerArgsSanitizer.sanitize_docker_command(self._session, self._docker_arguments) or ''))
temp_config = deepcopy(self._session.config)
mounted_cache_dir = temp_config.get(
@ -4078,76 +4080,6 @@ class Worker(ServiceCommandSection):
queue_ids.append(q_id)
return queue_ids
@staticmethod
def _sanitize_urls(s: str) -> Tuple[str, bool]:
""" Replaces passwords in URLs with asterisks """
regex = re.compile("^([^:]*:)[^@]+(.*)$")
tokens = re.split(r"\s", s)
changed = False
for k in range(len(tokens)):
if "@" in tokens[k]:
res = urlparse(tokens[k])
if regex.match(res.netloc):
changed = True
tokens[k] = urlunparse((
res.scheme,
regex.sub("\\1********\\2", res.netloc),
res.path,
res.params,
res.query,
res.fragment
))
return " ".join(tokens) if changed else s, changed
def _sanitize_docker_command(self, docker_command):
# type: (List[str]) -> List[str]
if not docker_command:
return docker_command
if not self._session.config.get('agent.hide_docker_command_env_vars.enabled', False):
return docker_command
keys = set(self._session.config.get('agent.hide_docker_command_env_vars.extra_keys', []))
keys.update(
ENV_AGENT_GIT_PASS.vars,
ENV_AGENT_SECRET_KEY.vars,
ENV_AWS_SECRET_KEY.vars,
ENV_AZURE_ACCOUNT_KEY.vars,
ENV_AGENT_AUTH_TOKEN.vars,
)
parse_embedded_urls = bool(self._session.config.get(
'agent.hide_docker_command_env_vars.parse_embedded_urls', True
))
skip_next = False
result = docker_command[:]
for i, item in enumerate(docker_command):
if skip_next:
skip_next = False
continue
try:
if item in ("-e", "--env"):
key, sep, val = result[i + 1].partition("=")
if not sep:
continue
if key in ENV_DOCKER_IMAGE.vars:
# special case - this contains a complete docker command
val = " ".join(self._sanitize_docker_command(re.split(r"\s", val)))
elif key in keys:
val = "********"
elif parse_embedded_urls:
val = self._sanitize_urls(val)[0]
result[i + 1] = "{}={}".format(key, val)
skip_next = True
elif parse_embedded_urls and not item.startswith("-"):
item, changed = self._sanitize_urls(item)
if changed:
result[i] = item
except (KeyError, TypeError):
pass
return result
@staticmethod
def _valid_docker_container_name(name):
# type: (str) -> bool

View File

@ -0,0 +1,88 @@
import re
from typing import Tuple, List, TYPE_CHECKING
from urllib.parse import urlunparse, urlparse
from clearml_agent.definitions import (
ENV_AGENT_GIT_PASS,
ENV_AGENT_SECRET_KEY,
ENV_AWS_SECRET_KEY,
ENV_AZURE_ACCOUNT_KEY,
ENV_AGENT_AUTH_TOKEN,
ENV_DOCKER_IMAGE,
)
if TYPE_CHECKING:
from clearml_agent.session import Session
class DockerArgsSanitizer:
@classmethod
def sanitize_docker_command(cls, session, docker_command):
# type: (Session, List[str]) -> List[str]
if not docker_command:
return docker_command
if not session.config.get('agent.hide_docker_command_env_vars.enabled', False):
return docker_command
keys = set(session.config.get('agent.hide_docker_command_env_vars.extra_keys', []))
keys.update(
ENV_AGENT_GIT_PASS.vars,
ENV_AGENT_SECRET_KEY.vars,
ENV_AWS_SECRET_KEY.vars,
ENV_AZURE_ACCOUNT_KEY.vars,
ENV_AGENT_AUTH_TOKEN.vars,
)
parse_embedded_urls = bool(session.config.get(
'agent.hide_docker_command_env_vars.parse_embedded_urls', True
))
skip_next = False
result = docker_command[:]
for i, item in enumerate(docker_command):
if skip_next:
skip_next = False
continue
try:
if item in ("-e", "--env"):
key, sep, val = result[i + 1].partition("=")
if not sep:
continue
if key in ENV_DOCKER_IMAGE.vars:
# special case - this contains a complete docker command
val = " ".join(cls.sanitize_docker_command(session, re.split(r"\s", val)))
elif key in keys:
val = "********"
elif parse_embedded_urls:
val = cls._sanitize_urls(val)[0]
result[i + 1] = "{}={}".format(key, val)
skip_next = True
elif parse_embedded_urls and not item.startswith("-"):
item, changed = cls._sanitize_urls(item)
if changed:
result[i] = item
except (KeyError, TypeError):
pass
return result
@staticmethod
def _sanitize_urls(s: str) -> Tuple[str, bool]:
""" Replaces passwords in URLs with asterisks """
regex = re.compile("^([^:]*:)[^@]+(.*)$")
tokens = re.split(r"\s", s)
changed = False
for k in range(len(tokens)):
if "@" in tokens[k]:
res = urlparse(tokens[k])
if regex.match(res.netloc):
changed = True
tokens[k] = urlunparse((
res.scheme,
regex.sub("\\1********\\2", res.netloc),
res.path,
res.params,
res.query,
res.fragment
))
return " ".join(tokens) if changed else s, changed

View File

@ -19,6 +19,7 @@ from clearml_agent.definitions import ENVIRONMENT_CONFIG, ENV_TASK_EXECUTE_AS_US
from clearml_agent.errors import APIError
from clearml_agent.helper.base import HOCONEncoder
from clearml_agent.helper.process import Argv
from clearml_agent.helper.docker_agrs import DockerArgsSanitizer
from .version import __version__
POETRY = "poetry"
@ -232,7 +233,8 @@ class Session(_Session):
def print_configuration(
self,
remove_secret_keys=("secret", "pass", "token", "account_key", "contents"),
skip_value_keys=("environment", )
skip_value_keys=("environment", ),
docker_args_sanitize_keys=("extra_docker_arguments", ),
):
# remove all the secrets from the print
def recursive_remove_secrets(dictionary, secret_keys=(), empty_keys=()):
@ -249,6 +251,8 @@ class Session(_Session):
if isinstance(dictionary.get(k, None), dict):
recursive_remove_secrets(dictionary[k], secret_keys=secret_keys, empty_keys=empty_keys)
elif isinstance(dictionary.get(k, None), (list, tuple)):
if k in (docker_args_sanitize_keys or []):
dictionary[k] = DockerArgsSanitizer.sanitize_docker_command(self, dictionary[k])
for item in dictionary[k]:
if isinstance(item, dict):
recursive_remove_secrets(item, secret_keys=secret_keys, empty_keys=empty_keys)
@ -256,7 +260,7 @@ class Session(_Session):
config = deepcopy(self.config.to_dict())
# remove the env variable, it's not important
config.pop('env', None)
if remove_secret_keys or skip_value_keys:
if remove_secret_keys or skip_value_keys or docker_args_sanitize_keys:
recursive_remove_secrets(config, secret_keys=remove_secret_keys, empty_keys=skip_value_keys)
# remove logging.loggers.urllib3.level from the print
try: