From bed94ee431f84479182d704290da7884a7076701 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Tue, 19 Oct 2021 10:46:43 +0300 Subject: [PATCH] Add support for configuration env and files section --- .../backend_api/config/default/agent.conf | 34 ++++++ clearml_agent/backend_api/session/defs.py | 2 + clearml_agent/backend_api/session/session.py | 4 +- clearml_agent/backend_config/utils.py | 103 ++++++++++++++++++ clearml_agent/commands/worker.py | 28 +++++ docs/clearml.conf | 34 ++++++ 6 files changed, 202 insertions(+), 3 deletions(-) diff --git a/clearml_agent/backend_api/config/default/agent.conf b/clearml_agent/backend_api/config/default/agent.conf index 0de3472..6b02566 100644 --- a/clearml_agent/backend_api/config/default/agent.conf +++ b/clearml_agent/backend_api/config/default/agent.conf @@ -221,4 +221,38 @@ # Note: resulting name must start with an alphanumeric character and continue with alphanumeric characters, # underscores (_), dots (.) and/or dashes (-) #docker_container_name_format: "clearml-id-{task_id}-{rand_string:.8}" + + # Apply top-level environment section from configuration into os.environ + apply_environment: true + # Top-level environment section is in the form of: + # environment { + # key: value + # ... + # } + # and is applied to the OS environment as `key=value` for each key/value pair + + # Apply top-level files section from configuration into local file system + apply_files: true + # Top-level files section allows auto-generating files at designated paths with a predefined contents + # and target format. Options include: + # contents: the target file's content, typically a string (or any base type int/float/list/dict etc.) + # format: a custom format for the contents. Currently supported value is `base64` to automatically decode a + # base64-encoded contents string, otherwise ignored + # path: the target file's path, may include ~ and inplace env vars + # target_format: format used to encode contents before writing into the target file. Supported values are json, + # yaml, yml and bytes (in which case the file will be written in binary mode). Default is text mode. + # overwrite: overwrite the target file in case it exists. Default is true. + # + # Example: + # files { + # myfile1 { + # contents: "The quick brown fox jumped over the lazy dog" + # path: "/tmp/fox.txt" + # } + # myjsonfile { + # contents: {some: nested: value: [1, 2, 3, 4]} + # path: "/tmp/test.json" + # target_format: json + # } + # } } diff --git a/clearml_agent/backend_api/session/defs.py b/clearml_agent/backend_api/session/defs.py index d311660..d49f920 100644 --- a/clearml_agent/backend_api/session/defs.py +++ b/clearml_agent/backend_api/session/defs.py @@ -13,6 +13,8 @@ ENV_HOST_VERIFY_CERT = EnvEntry("CLEARML_API_HOST_VERIFY_CERT", "TRAINS_API_HOST ENV_CONDA_ENV_PACKAGE = EnvEntry("CLEARML_CONDA_ENV_PACKAGE", "TRAINS_CONDA_ENV_PACKAGE") ENV_NO_DEFAULT_SERVER = EnvEntry("CLEARML_NO_DEFAULT_SERVER", "TRAINS_NO_DEFAULT_SERVER", type=bool, default=True) ENV_DISABLE_VAULT_SUPPORT = EnvEntry('CLEARML_AGENT_DISABLE_VAULT_SUPPORT', type=bool) +ENV_ENABLE_ENV_CONFIG_SECTION = EnvEntry('CLEARML_AGENT_ENABLE_ENV_CONFIG_SECTION', type=bool) +ENV_ENABLE_FILES_CONFIG_SECTION = EnvEntry('CLEARML_AGENT_ENABLE_FILES_CONFIG_SECTION', type=bool) ENV_INITIAL_CONNECT_RETRY_OVERRIDE = EnvEntry( 'CLEARML_AGENT_INITIAL_CONNECT_RETRY_OVERRIDE', default=True, converter=safe_text_to_bool ) diff --git a/clearml_agent/backend_api/session/session.py b/clearml_agent/backend_api/session/session.py index cbe9074..71a9ea1 100644 --- a/clearml_agent/backend_api/session/session.py +++ b/clearml_agent/backend_api/session/session.py @@ -183,8 +183,6 @@ class Session(TokenManager): # notice: this is across the board warning omission urllib_log_warning_setup(total_retries=http_retries_config.get('total', 0), display_warning_after=3) - self._load_vaults() - def _setup_session(self, http_retries_config, initial_session=False, default_initial_connect_override=None): # type: (dict, bool, Optional[bool]) -> (dict, requests.Session) http_retries_config = http_retries_config or self.config.get( @@ -210,7 +208,7 @@ class Session(TokenManager): return http_retries_config, get_http_session_with_retry(**http_retries_config) - def _load_vaults(self): + def load_vaults(self): if not self.check_min_api_version("2.15") or self.feature_set == "basic": return diff --git a/clearml_agent/backend_config/utils.py b/clearml_agent/backend_config/utils.py index f5a2923..951b553 100644 --- a/clearml_agent/backend_config/utils.py +++ b/clearml_agent/backend_config/utils.py @@ -1,3 +1,14 @@ +import base64 +import os +from os.path import expandvars, expanduser +from pathlib import Path +from typing import List, TYPE_CHECKING + +from pyhocon import HOCONConverter, ConfigTree + +if TYPE_CHECKING: + from .config import Config + def get_items(cls): """ get key/value items from an enum-like class (members represent enumeration key/value) """ @@ -7,3 +18,95 @@ def get_items(cls): def get_options(cls): """ get options from an enum-like class (members represent enumeration key/value) """ return get_items(cls).values() + + +def apply_environment(config): + # type: (Config) -> List[str] + env_vars = config.get("environment", None) + if not env_vars: + return [] + if isinstance(env_vars, (list, tuple)): + env_vars = dict(env_vars) + + keys = list(filter(None, env_vars.keys())) + + for key in keys: + os.environ[str(key)] = str(env_vars[key] or "") + + return keys + + +def apply_files(config): + # type: (Config) -> None + files = config.get("files", None) + if not files: + return + + if isinstance(files, (list, tuple)): + files = dict(files) + + print("Creating files from configuration") + for key, data in files.items(): + path = data.get("path") + fmt = data.get("format", "string") + target_fmt = data.get("target_format", "string") + overwrite = bool(data.get("overwrite", True)) + contents = data.get("contents") + + target = Path(expanduser(expandvars(path))) + + # noinspection PyBroadException + try: + if target.is_dir(): + print("Skipped [{}]: is a directory {}".format(key, target)) + continue + + if not overwrite and target.is_file(): + print("Skipped [{}]: file exists {}".format(key, target)) + continue + except Exception as ex: + print("Skipped [{}]: can't access {} ({})".format(key, target, ex)) + continue + + if contents: + try: + if fmt == "base64": + contents = base64.b64decode(contents) + if target_fmt != "bytes": + contents = contents.decode("utf-8") + except Exception as ex: + print("Skipped [{}]: failed decoding {} ({})".format(key, fmt, ex)) + continue + + # noinspection PyBroadException + try: + target.parent.mkdir(parents=True, exist_ok=True) + except Exception as ex: + print("Skipped [{}]: failed creating path {} ({})".format(key, target.parent, ex)) + continue + + try: + if target_fmt == "bytes": + try: + target.write_bytes(contents) + except TypeError: + # simpler error so the user won't get confused + raise TypeError("a bytes-like object is required") + else: + try: + if target_fmt == "json": + text = HOCONConverter.to_json(contents) + elif target_fmt in ("yaml", "yml"): + text = HOCONConverter.to_yaml(contents) + else: + if isinstance(contents, ConfigTree): + contents = contents.as_plain_ordered_dict() + text = str(contents) + except Exception as ex: + print("Skipped [{}]: failed encoding to {} ({})".format(key, target_fmt, ex)) + continue + target.write_text(text) + print("Saved [{}]: {}".format(key, target)) + except Exception as ex: + print("Skipped [{}]: failed saving file {} ({})".format(key, target, ex)) + continue diff --git a/clearml_agent/commands/worker.py b/clearml_agent/commands/worker.py index 9336c44..cd2def2 100644 --- a/clearml_agent/commands/worker.py +++ b/clearml_agent/commands/worker.py @@ -37,7 +37,9 @@ from clearml_agent.backend_api.services import queues as queues_api from clearml_agent.backend_api.services import tasks as tasks_api from clearml_agent.backend_api.services import workers as workers_api from clearml_agent.backend_api.session import CallResult +from clearml_agent.backend_api.session.defs import ENV_ENABLE_ENV_CONFIG_SECTION, ENV_ENABLE_FILES_CONFIG_SECTION from clearml_agent.backend_config.defs import UptimeConf +from clearml_agent.backend_config.utils import apply_environment, apply_files from clearml_agent.commands.base import resolve_names, ServiceCommandSection from clearml_agent.definitions import ( ENVIRONMENT_SDK_PARAMS, @@ -60,6 +62,7 @@ from clearml_agent.definitions import ( ENV_SSH_AUTH_SOCK, ENV_AGENT_SKIP_PIP_VENV_INSTALL, ENV_EXTRA_DOCKER_ARGS, + ) from clearml_agent.definitions import WORKING_REPOSITORY_DIR, PIP_EXTRA_INDICES from clearml_agent.errors import APIError, CommandFailedError, Sigterm @@ -1737,6 +1740,29 @@ class Worker(ServiceCommandSection): raise ValueError("Failed applying git diff:\n{}\n\n" "ERROR! Failed applying git diff, see diff above.".format(diff)) + def _apply_extra_configuration(self): + try: + self._session.load_vaults() + except Exception as ex: + print("Error: failed applying extra configuration: {}".format(ex)) + + config = self._session.config + default = config.get("agent.apply_environment", False) + if ENV_ENABLE_ENV_CONFIG_SECTION.get(default=default): + try: + keys = apply_environment(config) + if keys: + print("Environment variables set from configuration: {}".format(keys)) + except Exception as ex: + print("Error: failed applying environment from configuration: {}".format(ex)) + + default = config.get("agent.apply_files", default=False) + if ENV_ENABLE_FILES_CONFIG_SECTION.get(default=default): + try: + apply_files(config) + except Exception as ex: + print("Error: failed applying files from configuration: {}".format(ex)) + @resolve_names def build( self, @@ -2017,6 +2043,8 @@ class Worker(ServiceCommandSection): Singleton.close_pid_file() return + self._apply_extra_configuration() + self._session.print_configuration() # now mark the task as started diff --git a/docs/clearml.conf b/docs/clearml.conf index 7444b3a..8505812 100644 --- a/docs/clearml.conf +++ b/docs/clearml.conf @@ -359,5 +359,39 @@ sdk { log_stdout: True } } + + # Apply top-level environment section from configuration into os.environ + apply_environment: true + # Top-level environment section is in the form of: + # environment { + # key: value + # ... + # } + # and is applied to the OS environment as `key=value` for each key/value pair + + # Apply top-level files section from configuration into local file system + apply_files: true + # Top-level files section allows auto-generating files at designated paths with a predefined contents + # and target format. Options include: + # contents: the target file's content, typically a string (or any base type int/float/list/dict etc.) + # format: a custom format for the contents. Currently supported value is `base64` to automatically decode a + # base64-encoded contents string, otherwise ignored + # path: the target file's path, may include ~ and inplace env vars + # target_format: format used to encode contents before writing into the target file. Supported values are json, + # yaml, yml and bytes (in which case the file will be written in binary mode). Default is text mode. + # overwrite: overwrite the target file in case it exists. Default is true. + # + # Example: + # files { + # myfile1 { + # contents: "The quick brown fox jumped over the lazy dog" + # path: "/tmp/fox.txt" + # } + # myjsonfile { + # contents: {some: nested: value: [1, 2, 3, 4]} + # path: "/tmp/test.json" + # target_format: json + # } + # } }