Fix docker extra args showing up in configuration printout

This commit is contained in:
allegroai 2022-10-23 12:37:13 +03:00
parent e8493d3807
commit f0fd62a28f
3 changed files with 101 additions and 77 deletions

View File

@ -32,7 +32,6 @@ import six
from pathlib2 import Path from pathlib2 import Path
from pyhocon import ConfigTree, ConfigFactory from pyhocon import ConfigTree, ConfigFactory
from six.moves.urllib.parse import quote from six.moves.urllib.parse import quote
from six.moves.urllib.parse import urlparse, urlunparse
from clearml_agent.backend_api.services import auth as auth_api from clearml_agent.backend_api.services import auth as auth_api
from clearml_agent.backend_api.services import queues as queues_api from clearml_agent.backend_api.services import queues as queues_api
@ -140,6 +139,7 @@ from clearml_agent.helper.repo import clone_repository_cached, RepoInfo, VCS, fi
from clearml_agent.helper.resource_monitor import ResourceMonitor from clearml_agent.helper.resource_monitor import ResourceMonitor
from clearml_agent.helper.runtime_verification import check_runtime, print_uptime_properties from clearml_agent.helper.runtime_verification import check_runtime, print_uptime_properties
from clearml_agent.helper.singleton import Singleton from clearml_agent.helper.singleton import Singleton
from clearml_agent.helper.docker_agrs import DockerArgsSanitizer
from clearml_agent.session import Session from clearml_agent.session import Session
from .events import Events from .events import Events
@ -799,7 +799,7 @@ class Worker(ServiceCommandSection):
lines= lines=
['Running Task {} inside {}docker: {} arguments: {}\n'.format( ['Running Task {} inside {}docker: {} arguments: {}\n'.format(
task_id, "default " if default_docker else '', task_id, "default " if default_docker else '',
docker_image, self._sanitize_docker_command(docker_arguments or []))] docker_image, DockerArgsSanitizer.sanitize_docker_command(self._session, docker_arguments or []))]
+ (['custom_setup_bash_script:\n{}'.format(docker_setup_script)] if docker_setup_script else []), + (['custom_setup_bash_script:\n{}'.format(docker_setup_script)] if docker_setup_script else []),
level="INFO", level="INFO",
session=task_session, session=task_session,
@ -862,7 +862,7 @@ class Worker(ServiceCommandSection):
'--standalone-mode' if self._standalone_mode else '', '--standalone-mode' if self._standalone_mode else '',
task_id) task_id)
display_docker_command = self._sanitize_docker_command(full_docker_cmd) display_docker_command = DockerArgsSanitizer.sanitize_docker_command(self._session, full_docker_cmd)
# send the actual used command line to the backend # send the actual used command line to the backend
self.send_logs( self.send_logs(
@ -2603,7 +2603,9 @@ class Worker(ServiceCommandSection):
print("Executing task id [%s]:" % current_task.id) print("Executing task id [%s]:" % current_task.id)
sanitized_execution = attr.evolve( sanitized_execution = attr.evolve(
execution, execution,
docker_cmd=" ".join(self._sanitize_docker_command(shlex.split(execution.docker_cmd or ""))), docker_cmd=" ".join(DockerArgsSanitizer.sanitize_docker_command(
self._session, shlex.split(execution.docker_cmd or ""))
),
) )
for pair in attr.asdict(sanitized_execution).items(): for pair in attr.asdict(sanitized_execution).items():
print("{} = {}".format(*pair)) print("{} = {}".format(*pair))
@ -3418,7 +3420,7 @@ class Worker(ServiceCommandSection):
print("Running in Docker{} mode (v19.03 and above) - using default docker image: {} {}\n".format( print("Running in Docker{} mode (v19.03 and above) - using default docker image: {} {}\n".format(
' *standalone*' if self._standalone_mode else '', self._docker_image, ' *standalone*' if self._standalone_mode else '', self._docker_image,
self._sanitize_docker_command(self._docker_arguments) or '')) DockerArgsSanitizer.sanitize_docker_command(self._session, self._docker_arguments) or ''))
temp_config = deepcopy(self._session.config) temp_config = deepcopy(self._session.config)
mounted_cache_dir = temp_config.get( mounted_cache_dir = temp_config.get(
@ -4078,76 +4080,6 @@ class Worker(ServiceCommandSection):
queue_ids.append(q_id) queue_ids.append(q_id)
return queue_ids return queue_ids
@staticmethod
def _sanitize_urls(s: str) -> Tuple[str, bool]:
""" Replaces passwords in URLs with asterisks """
regex = re.compile("^([^:]*:)[^@]+(.*)$")
tokens = re.split(r"\s", s)
changed = False
for k in range(len(tokens)):
if "@" in tokens[k]:
res = urlparse(tokens[k])
if regex.match(res.netloc):
changed = True
tokens[k] = urlunparse((
res.scheme,
regex.sub("\\1********\\2", res.netloc),
res.path,
res.params,
res.query,
res.fragment
))
return " ".join(tokens) if changed else s, changed
def _sanitize_docker_command(self, docker_command):
# type: (List[str]) -> List[str]
if not docker_command:
return docker_command
if not self._session.config.get('agent.hide_docker_command_env_vars.enabled', False):
return docker_command
keys = set(self._session.config.get('agent.hide_docker_command_env_vars.extra_keys', []))
keys.update(
ENV_AGENT_GIT_PASS.vars,
ENV_AGENT_SECRET_KEY.vars,
ENV_AWS_SECRET_KEY.vars,
ENV_AZURE_ACCOUNT_KEY.vars,
ENV_AGENT_AUTH_TOKEN.vars,
)
parse_embedded_urls = bool(self._session.config.get(
'agent.hide_docker_command_env_vars.parse_embedded_urls', True
))
skip_next = False
result = docker_command[:]
for i, item in enumerate(docker_command):
if skip_next:
skip_next = False
continue
try:
if item in ("-e", "--env"):
key, sep, val = result[i + 1].partition("=")
if not sep:
continue
if key in ENV_DOCKER_IMAGE.vars:
# special case - this contains a complete docker command
val = " ".join(self._sanitize_docker_command(re.split(r"\s", val)))
elif key in keys:
val = "********"
elif parse_embedded_urls:
val = self._sanitize_urls(val)[0]
result[i + 1] = "{}={}".format(key, val)
skip_next = True
elif parse_embedded_urls and not item.startswith("-"):
item, changed = self._sanitize_urls(item)
if changed:
result[i] = item
except (KeyError, TypeError):
pass
return result
@staticmethod @staticmethod
def _valid_docker_container_name(name): def _valid_docker_container_name(name):
# type: (str) -> bool # type: (str) -> bool

View File

@ -0,0 +1,88 @@
import re
from typing import Tuple, List, TYPE_CHECKING
from urllib.parse import urlunparse, urlparse
from clearml_agent.definitions import (
ENV_AGENT_GIT_PASS,
ENV_AGENT_SECRET_KEY,
ENV_AWS_SECRET_KEY,
ENV_AZURE_ACCOUNT_KEY,
ENV_AGENT_AUTH_TOKEN,
ENV_DOCKER_IMAGE,
)
if TYPE_CHECKING:
from clearml_agent.session import Session
class DockerArgsSanitizer:
@classmethod
def sanitize_docker_command(cls, session, docker_command):
# type: (Session, List[str]) -> List[str]
if not docker_command:
return docker_command
if not session.config.get('agent.hide_docker_command_env_vars.enabled', False):
return docker_command
keys = set(session.config.get('agent.hide_docker_command_env_vars.extra_keys', []))
keys.update(
ENV_AGENT_GIT_PASS.vars,
ENV_AGENT_SECRET_KEY.vars,
ENV_AWS_SECRET_KEY.vars,
ENV_AZURE_ACCOUNT_KEY.vars,
ENV_AGENT_AUTH_TOKEN.vars,
)
parse_embedded_urls = bool(session.config.get(
'agent.hide_docker_command_env_vars.parse_embedded_urls', True
))
skip_next = False
result = docker_command[:]
for i, item in enumerate(docker_command):
if skip_next:
skip_next = False
continue
try:
if item in ("-e", "--env"):
key, sep, val = result[i + 1].partition("=")
if not sep:
continue
if key in ENV_DOCKER_IMAGE.vars:
# special case - this contains a complete docker command
val = " ".join(cls.sanitize_docker_command(session, re.split(r"\s", val)))
elif key in keys:
val = "********"
elif parse_embedded_urls:
val = cls._sanitize_urls(val)[0]
result[i + 1] = "{}={}".format(key, val)
skip_next = True
elif parse_embedded_urls and not item.startswith("-"):
item, changed = cls._sanitize_urls(item)
if changed:
result[i] = item
except (KeyError, TypeError):
pass
return result
@staticmethod
def _sanitize_urls(s: str) -> Tuple[str, bool]:
""" Replaces passwords in URLs with asterisks """
regex = re.compile("^([^:]*:)[^@]+(.*)$")
tokens = re.split(r"\s", s)
changed = False
for k in range(len(tokens)):
if "@" in tokens[k]:
res = urlparse(tokens[k])
if regex.match(res.netloc):
changed = True
tokens[k] = urlunparse((
res.scheme,
regex.sub("\\1********\\2", res.netloc),
res.path,
res.params,
res.query,
res.fragment
))
return " ".join(tokens) if changed else s, changed

View File

@ -19,6 +19,7 @@ from clearml_agent.definitions import ENVIRONMENT_CONFIG, ENV_TASK_EXECUTE_AS_US
from clearml_agent.errors import APIError from clearml_agent.errors import APIError
from clearml_agent.helper.base import HOCONEncoder from clearml_agent.helper.base import HOCONEncoder
from clearml_agent.helper.process import Argv from clearml_agent.helper.process import Argv
from clearml_agent.helper.docker_agrs import DockerArgsSanitizer
from .version import __version__ from .version import __version__
POETRY = "poetry" POETRY = "poetry"
@ -232,7 +233,8 @@ class Session(_Session):
def print_configuration( def print_configuration(
self, self,
remove_secret_keys=("secret", "pass", "token", "account_key", "contents"), remove_secret_keys=("secret", "pass", "token", "account_key", "contents"),
skip_value_keys=("environment", ) skip_value_keys=("environment", ),
docker_args_sanitize_keys=("extra_docker_arguments", ),
): ):
# remove all the secrets from the print # remove all the secrets from the print
def recursive_remove_secrets(dictionary, secret_keys=(), empty_keys=()): def recursive_remove_secrets(dictionary, secret_keys=(), empty_keys=()):
@ -249,6 +251,8 @@ class Session(_Session):
if isinstance(dictionary.get(k, None), dict): if isinstance(dictionary.get(k, None), dict):
recursive_remove_secrets(dictionary[k], secret_keys=secret_keys, empty_keys=empty_keys) recursive_remove_secrets(dictionary[k], secret_keys=secret_keys, empty_keys=empty_keys)
elif isinstance(dictionary.get(k, None), (list, tuple)): elif isinstance(dictionary.get(k, None), (list, tuple)):
if k in (docker_args_sanitize_keys or []):
dictionary[k] = DockerArgsSanitizer.sanitize_docker_command(self, dictionary[k])
for item in dictionary[k]: for item in dictionary[k]:
if isinstance(item, dict): if isinstance(item, dict):
recursive_remove_secrets(item, secret_keys=secret_keys, empty_keys=empty_keys) recursive_remove_secrets(item, secret_keys=secret_keys, empty_keys=empty_keys)
@ -256,7 +260,7 @@ class Session(_Session):
config = deepcopy(self.config.to_dict()) config = deepcopy(self.config.to_dict())
# remove the env variable, it's not important # remove the env variable, it's not important
config.pop('env', None) config.pop('env', None)
if remove_secret_keys or skip_value_keys: if remove_secret_keys or skip_value_keys or docker_args_sanitize_keys:
recursive_remove_secrets(config, secret_keys=remove_secret_keys, empty_keys=skip_value_keys) recursive_remove_secrets(config, secret_keys=remove_secret_keys, empty_keys=skip_value_keys)
# remove logging.loggers.urllib3.level from the print # remove logging.loggers.urllib3.level from the print
try: try: