diff --git a/trains/backend_api/schema/service.py b/trains/backend_api/schema/service.py index 14592207..32c417bf 100644 --- a/trains/backend_api/schema/service.py +++ b/trains/backend_api/schema/service.py @@ -4,8 +4,7 @@ import re import attr import six -import pyhocon - +from ...utilities.pyhocon import ConfigTree from .action import Action @@ -56,7 +55,7 @@ class Service(object): def parse(self, service_config): self._default = service_config.get( - "_default", pyhocon.ConfigTree() + "_default", ConfigTree() ).as_plain_ordered_dict() self._doc = '{} service'.format(self.name) @@ -64,7 +63,7 @@ class Service(object): if description: self._doc += '\n\n{}'.format(description) self._definitions = service_config.get( - "_definitions", pyhocon.ConfigTree() + "_definitions", ConfigTree() ).as_plain_ordered_dict() self._definitions_refs = { k: self._get_schema_references(v) for k, v in self._definitions.items() diff --git a/trains/backend_api/session/session.py b/trains/backend_api/session/session.py index 5c8d5a2f..c171d8bf 100644 --- a/trains/backend_api/session/session.py +++ b/trains/backend_api/session/session.py @@ -7,7 +7,7 @@ from six.moves.urllib.parse import urlparse, urlunparse import jwt import requests import six -from pyhocon import ConfigTree +from ...utilities.pyhocon import ConfigTree from requests.auth import HTTPBasicAuth from .callresult import CallResult diff --git a/trains/backend_config/config.py b/trains/backend_config/config.py index d898bef8..f7324aef 100644 --- a/trains/backend_config/config.py +++ b/trains/backend_config/config.py @@ -9,10 +9,9 @@ from fnmatch import fnmatch from os.path import expanduser from typing import Any -import pyhocon import six from pathlib2 import Path -from pyhocon import ConfigTree +from ..utilities.pyhocon import ConfigTree, ConfigFactory from pyparsing import ( ParseFatalException, ParseException, @@ -310,7 +309,7 @@ class Config(object): print("Loading config from file %s" % file_path) try: - return pyhocon.ConfigFactory.parse_file(file_path) + return ConfigFactory.parse_file(file_path) except ParseSyntaxException as ex: msg = "Failed parsing {0} ({1.__class__.__name__}): (at char {1.loc}, line:{1.lineno}, col:{1.column})".format( file_path, ex diff --git a/trains/config/default/__main__.py b/trains/config/default/__main__.py index 9affe73a..92f567de 100644 --- a/trains/config/default/__main__.py +++ b/trains/config/default/__main__.py @@ -1,7 +1,7 @@ from __future__ import print_function from six.moves import input -from pyhocon import ConfigFactory +from ...utilities.pyhocon import ConfigFactory from pathlib2 import Path from six.moves.urllib.parse import urlparse diff --git a/trains/model.py b/trains/model.py index 0c54f708..a675d4e2 100644 --- a/trains/model.py +++ b/trains/model.py @@ -10,7 +10,7 @@ import six from .backend_api import Session from .backend_api.services import models from pathlib2 import Path -from pyhocon import ConfigFactory, HOCONConverter +from .utilities.pyhocon import ConfigFactory, HOCONConverter from .backend_interface.util import validate_dict, get_single_result, mutually_exclusive from .debugging.log import get_logger @@ -288,14 +288,15 @@ class BaseModel(object): if not isinstance(config, dict): raise ValueError("Model configuration only supports dictionary objects") try: - # hack, pyhocon is not very good with dict conversion so we pass through json try: + text = HOCONConverter.to_hocon(ConfigFactory.from_dict(config)) + except Exception: + # fallback json+pyhocon + # hack, pyhocon is not very good with dict conversion so we pass through json import json text = json.dumps(config) - text = HOCONConverter.convert(ConfigFactory.parse_string(text), 'hocon') - except Exception: - # fallback pyhocon - text = HOCONConverter.convert(ConfigFactory.from_dict(config), 'hocon') + text = HOCONConverter.to_hocon(ConfigFactory.parse_string(text)) + except Exception: raise ValueError("Could not serialize configuration dictionary:\n", config) return text diff --git a/trains/utilities/pyhocon/__init__.py b/trains/utilities/pyhocon/__init__.py new file mode 100755 index 00000000..4aa44f0c --- /dev/null +++ b/trains/utilities/pyhocon/__init__.py @@ -0,0 +1,3 @@ +from .config_parser import ConfigParser, ConfigFactory +from .config_tree import ConfigTree +from .converter import HOCONConverter diff --git a/trains/utilities/pyhocon/config_parser.py b/trains/utilities/pyhocon/config_parser.py new file mode 100755 index 00000000..2efe33d5 --- /dev/null +++ b/trains/utilities/pyhocon/config_parser.py @@ -0,0 +1,743 @@ +import itertools +import re +import os +import socket +import contextlib +import codecs +from datetime import timedelta + +from pyparsing import Forward, Keyword, QuotedString, Word, Literal, Suppress, Regex, Optional, SkipTo, ZeroOrMore, \ + Group, lineno, col, TokenConverter, replaceWith, alphanums, alphas8bit, ParseSyntaxException, StringEnd +from pyparsing import ParserElement +from .config_tree import ConfigTree, ConfigSubstitution, ConfigList, ConfigValues, ConfigUnquotedString, \ + ConfigInclude, NoneValue, ConfigQuotedString +from .exceptions import ConfigSubstitutionException, ConfigMissingException, ConfigException +import logging +import copy + +use_urllib2 = False +try: + # For Python 3.0 and later + from urllib.request import urlopen + from urllib.error import HTTPError, URLError +except ImportError: # pragma: no cover + # Fall back to Python 2's urllib2 + from urllib2 import urlopen, HTTPError, URLError + + use_urllib2 = True +try: + basestring +except NameError: # pragma: no cover + basestring = str + unicode = str + +logger = logging.getLogger(__name__) + +# +# Substitution Defaults +# + + +class DEFAULT_SUBSTITUTION(object): + pass + + +class MANDATORY_SUBSTITUTION(object): + pass + + +class NO_SUBSTITUTION(object): + pass + + +class STR_SUBSTITUTION(object): + pass + + +def period(period_value, period_unit): + try: + from dateutil.relativedelta import relativedelta as period_impl + except Exception: + from datetime import timedelta as period_impl + + if period_unit == 'nanoseconds': + period_unit = 'microseconds' + period_value = int(period_value / 1000) + + arguments = dict(zip((period_unit,), (period_value,))) + + if period_unit == 'milliseconds': + return timedelta(**arguments) + + return period_impl(**arguments) + + +class ConfigFactory(object): + + @classmethod + def parse_file(cls, filename, encoding='utf-8', required=True, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): + """Parse file + + :param filename: filename + :type filename: basestring + :param encoding: file encoding + :type encoding: basestring + :param required: If true, raises an exception if can't load file + :type required: boolean + :param resolve: if true, resolve substitutions + :type resolve: boolean + :param unresolved_value: assigned value value to unresolved substitution. + If overriden with a default value, it will replace all unresolved value to the default value. + If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) + :type unresolved_value: boolean + :return: Config object + :type return: Config + """ + try: + with codecs.open(filename, 'r', encoding=encoding) as fd: + content = fd.read() + return cls.parse_string(content, os.path.dirname(filename), resolve, unresolved_value) + except IOError as e: + if required: + raise e + logger.warn('Cannot include file %s. File does not exist or cannot be read.', filename) + return [] + + @classmethod + def parse_URL(cls, url, timeout=None, resolve=True, required=False, unresolved_value=DEFAULT_SUBSTITUTION): + """Parse URL + + :param url: url to parse + :type url: basestring + :param resolve: if true, resolve substitutions + :type resolve: boolean + :param unresolved_value: assigned value value to unresolved substitution. + If overriden with a default value, it will replace all unresolved value to the default value. + If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) + :type unresolved_value: boolean + :return: Config object or [] + :type return: Config or list + """ + socket_timeout = socket._GLOBAL_DEFAULT_TIMEOUT if timeout is None else timeout + + try: + with contextlib.closing(urlopen(url, timeout=socket_timeout)) as fd: + content = fd.read() if use_urllib2 else fd.read().decode('utf-8') + return cls.parse_string(content, os.path.dirname(url), resolve, unresolved_value) + except (HTTPError, URLError) as e: + logger.warn('Cannot include url %s. Resource is inaccessible.', url) + if required: + raise e + else: + return [] + + @classmethod + def parse_string(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): + """Parse URL + + :param content: content to parse + :type content: basestring + :param resolve: If true, resolve substitutions + :param resolve: if true, resolve substitutions + :type resolve: boolean + :param unresolved_value: assigned value value to unresolved substitution. + If overriden with a default value, it will replace all unresolved value to the default value. + If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) + :type unresolved_value: boolean + :return: Config object + :type return: Config + """ + return ConfigParser().parse(content, basedir, resolve, unresolved_value) + + @classmethod + def from_dict(cls, dictionary, root=False): + """Convert dictionary (and ordered dictionary) into a ConfigTree + :param dictionary: dictionary to convert + :type dictionary: dict + :return: Config object + :type return: Config + """ + + def create_tree(value): + if isinstance(value, dict): + res = ConfigTree(root=root) + for key, child_value in value.items(): + res.put(key, create_tree(child_value)) + return res + if isinstance(value, list): + return [create_tree(v) for v in value] + else: + return value + + return create_tree(dictionary) + + +class ConfigParser(object): + """ + Parse HOCON files: https://github.com/typesafehub/config/blob/master/HOCON.md + """ + + REPLACEMENTS = { + '\\\\': '\\', + '\\\n': '\n', + '\\n': '\n', + '\\r': '\r', + '\\t': '\t', + '\\=': '=', + '\\#': '#', + '\\!': '!', + '\\"': '"', + } + + period_type_map = { + 'nanoseconds': ['ns', 'nano', 'nanos', 'nanosecond', 'nanoseconds'], + + 'microseconds': ['us', 'micro', 'micros', 'microsecond', 'microseconds'], + 'milliseconds': ['ms', 'milli', 'millis', 'millisecond', 'milliseconds'], + 'seconds': ['s', 'second', 'seconds'], + 'minutes': ['m', 'minute', 'minutes'], + 'hours': ['h', 'hour', 'hours'], + 'weeks': ['w', 'week', 'weeks'], + 'days': ['d', 'day', 'days'], + + } + + optional_period_type_map = { + 'months': ['mo', 'month', 'months'], # 'm' from hocon spec removed. conflicts with minutes syntax. + 'years': ['y', 'year', 'years'] + } + + supported_period_map = None + + @classmethod + def get_supported_period_type_map(cls): + if cls.supported_period_map is None: + cls.supported_period_map = {} + cls.supported_period_map.update(cls.period_type_map) + + try: + from dateutil import relativedelta + + if relativedelta is not None: + cls.supported_period_map.update(cls.optional_period_type_map) + except Exception: + pass + + return cls.supported_period_map + + @classmethod + def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): + """parse a HOCON content + + :param content: HOCON content to parse + :type content: basestring + :param resolve: if true, resolve substitutions + :type resolve: boolean + :param unresolved_value: assigned value value to unresolved substitution. + If overriden with a default value, it will replace all unresolved value to the default value. + If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) + :type unresolved_value: boolean + :return: a ConfigTree or a list + """ + + unescape_pattern = re.compile(r'\\.') + + def replace_escape_sequence(match): + value = match.group(0) + return cls.REPLACEMENTS.get(value, value) + + def norm_string(value): + return unescape_pattern.sub(replace_escape_sequence, value) + + def unescape_string(tokens): + return ConfigUnquotedString(norm_string(tokens[0])) + + def parse_multi_string(tokens): + # remove the first and last 3 " + return tokens[0][3: -3] + + def convert_number(tokens): + n = tokens[0] + try: + return int(n, 10) + except ValueError: + return float(n) + + def safe_convert_number(tokens): + n = tokens[0] + try: + return int(n, 10) + except ValueError: + try: + return float(n) + except ValueError: + return n + + def convert_period(tokens): + + period_value = int(tokens.value) + period_identifier = tokens.unit + + period_unit = next((single_unit for single_unit, values + in cls.get_supported_period_type_map().items() + if period_identifier in values)) + + return period(period_value, period_unit) + + # ${path} or ${?path} for optional substitution + SUBSTITUTION_PATTERN = r"\$\{(?P\?)?(?P[^}]+)\}(?P[ \t]*)" + + def create_substitution(instring, loc, token): + # remove the ${ and } + match = re.match(SUBSTITUTION_PATTERN, token[0]) + variable = match.group('variable') + ws = match.group('ws') + optional = match.group('optional') == '?' + substitution = ConfigSubstitution(variable, optional, ws, instring, loc) + return substitution + + # ${path} or ${?path} for optional substitution + STRING_PATTERN = '"(?P(?:[^"\\\\]|\\\\.)*)"(?P[ \t]*)' + + def create_quoted_string(instring, loc, token): + # remove the ${ and } + match = re.match(STRING_PATTERN, token[0]) + value = norm_string(match.group('value')) + ws = match.group('ws') + return ConfigQuotedString(value, ws, instring, loc) + + def include_config(instring, loc, token): + url = None + file = None + required = False + + if token[0] == 'required': + required = True + final_tokens = token[1:] + else: + final_tokens = token + + if len(final_tokens) == 1: # include "test" + value = final_tokens[0].value if isinstance(final_tokens[0], ConfigQuotedString) else final_tokens[0] + if value.startswith("http://") or value.startswith("https://") or value.startswith("file://"): + url = value + else: + file = value + elif len(final_tokens) == 2: # include url("test") or file("test") + value = final_tokens[1].value if isinstance(token[1], ConfigQuotedString) else final_tokens[1] + if final_tokens[0] == 'url': + url = value + else: + file = value + + if url is not None: + logger.debug('Loading config from url %s', url) + obj = ConfigFactory.parse_URL( + url, + resolve=False, + required=required, + unresolved_value=NO_SUBSTITUTION + ) + elif file is not None: + path = file if basedir is None else os.path.join(basedir, file) + logger.debug('Loading config from file %s', path) + obj = ConfigFactory.parse_file( + path, + resolve=False, + required=required, + unresolved_value=NO_SUBSTITUTION + ) + else: + raise ConfigException('No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring) + + return ConfigInclude(obj if isinstance(obj, list) else obj.items()) + + @contextlib.contextmanager + def set_default_white_spaces(): + default = ParserElement.DEFAULT_WHITE_CHARS + ParserElement.setDefaultWhitespaceChars(' \t') + yield + ParserElement.setDefaultWhitespaceChars(default) + + with set_default_white_spaces(): + assign_expr = Forward() + true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) + false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) + null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(NoneValue())) + # key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /') + key = QuotedString('"', escChar='\\', unquoteResults=False) | \ + Word("0123456789.").setParseAction(safe_convert_number) | Word(alphanums + alphas8bit + '._- /') + + eol = Word('\n\r').suppress() + eol_comma = Word('\n\r,').suppress() + comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) + comment_eol = Suppress(Optional(eol_comma) + comment) + comment_no_comma_eol = (comment | eol).suppress() + number_expr = Regex(r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', + re.DOTALL).setParseAction(convert_number) + + period_types = itertools.chain.from_iterable(cls.get_supported_period_type_map().values()) + period_expr = Regex(r'(?P\d+)\s*(?P' + '|'.join(period_types) + ')$' + ).setParseAction(convert_period) + + # multi line string using """ + # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 + multiline_string = Regex('""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) + # single quoted line string + quoted_string = Regex(r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) + # unquoted string that takes the rest of the line until an optional comment + # we support .properties multiline support which is like this: + # line1 \ + # line2 \ + # so a backslash precedes the \n + unquoted_string = Regex(r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(unescape_string) + substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution) + string_expr = multiline_string | quoted_string | unquoted_string + + value_expr = period_expr | number_expr | true_expr | false_expr | null_expr | string_expr + + include_content = (quoted_string | ((Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress())) + include_expr = ( + Keyword("include", caseless=True).suppress() + ( + include_content | ( + Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress() + ) + ) + ).setParseAction(include_config) + + root_dict_expr = Forward() + dict_expr = Forward() + list_expr = Forward() + multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal( + '\\') - eol).suppress()) + # for a dictionary : or = is optional + # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation + inside_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) + inside_root_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) + dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') + root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress('}') + list_entry = ConcatenatedValueParser(multi_value_expr) + list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']') + + # special case when we have a value assignment where the string can potentially be the remainder of the line + assign_expr << Group( + key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore( + comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr)) + ) + + # the file can be { ... } where {} can be omitted or [] + config_expr = ZeroOrMore(comment_eol | eol) + (list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore( + comment_eol | eol_comma) + config = config_expr.parseString(content, parseAll=True)[0] + + if resolve: + allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION + has_unresolved = cls.resolve_substitutions(config, allow_unresolved) + if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION: + raise ConfigSubstitutionException('resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION') + + if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION: + cls.unresolve_substitutions_to_value(config, unresolved_value) + return config + + @classmethod + def _resolve_variable(cls, config, substitution): + """ + :param config: + :param substitution: + :return: (is_resolved, resolved_variable) + """ + variable = substitution.variable + try: + return True, config.get(variable) + except ConfigMissingException: + # default to environment variable + value = os.environ.get(variable) + + if value is None: + if substitution.optional: + return False, None + else: + raise ConfigSubstitutionException( + "Cannot resolve variable ${{{variable}}} (line: {line}, col: {col})".format( + variable=variable, + line=lineno(substitution.loc, substitution.instring), + col=col(substitution.loc, substitution.instring))) + elif isinstance(value, ConfigList) or isinstance(value, ConfigTree): + raise ConfigSubstitutionException( + "Cannot substitute variable ${{{variable}}} because it does not point to a " + "string, int, float, boolean or null {type} (line:{line}, col: {col})".format( + variable=variable, + type=value.__class__.__name__, + line=lineno(substitution.loc, substitution.instring), + col=col(substitution.loc, substitution.instring))) + return True, value + + @classmethod + def _fixup_self_references(cls, config, accept_unresolved=False): + if isinstance(config, ConfigTree) and config.root: + for key in config: # Traverse history of element + history = config.history[key] + previous_item = history[0] + for current_item in history[1:]: + for substitution in cls._find_substitutions(current_item): + prop_path = ConfigTree.parse_key(substitution.variable) + if len(prop_path) > 1 and config.get(substitution.variable, None) is not None: + continue # If value is present in latest version, don't do anything + if prop_path[0] == key: + if isinstance(previous_item, ConfigValues) and not accept_unresolved: # We hit a dead end, we cannot evaluate + raise ConfigSubstitutionException( + "Property {variable} cannot be substituted. Check for cycles.".format( + variable=substitution.variable + ) + ) + else: + value = previous_item if len(prop_path) == 1 else previous_item.get(".".join(prop_path[1:])) + _, _, current_item = cls._do_substitute(substitution, value) + previous_item = current_item + + if len(history) == 1: # special case, when self optional referencing without existing + for substitution in cls._find_substitutions(previous_item): + prop_path = ConfigTree.parse_key(substitution.variable) + if len(prop_path) > 1 and config.get(substitution.variable, None) is not None: + continue # If value is present in latest version, don't do anything + if prop_path[0] == key and substitution.optional: + cls._do_substitute(substitution, None) + + # traverse config to find all the substitutions + @classmethod + def _find_substitutions(cls, item): + """Convert HOCON input into a JSON output + + :return: JSON string representation + :type return: basestring + """ + if isinstance(item, ConfigValues): + return item.get_substitutions() + + substitutions = [] + elements = [] + if isinstance(item, ConfigTree): + elements = item.values() + elif isinstance(item, list): + elements = item + + for child in elements: + substitutions += cls._find_substitutions(child) + return substitutions + + @classmethod + def _do_substitute(cls, substitution, resolved_value, is_optional_resolved=True): + unresolved = False + new_substitutions = [] + if isinstance(resolved_value, ConfigValues): + resolved_value = resolved_value.transform() + if isinstance(resolved_value, ConfigValues): + unresolved = True + result = resolved_value + else: + # replace token by substitution + config_values = substitution.parent + # if it is a string, then add the extra ws that was present in the original string after the substitution + formatted_resolved_value = resolved_value \ + if resolved_value is None \ + or isinstance(resolved_value, (dict, list)) \ + or substitution.index == len(config_values.tokens) - 1 \ + else (str(resolved_value) + substitution.ws) + # use a deepcopy of resolved_value to avoid mutation + config_values.put(substitution.index, copy.deepcopy(formatted_resolved_value)) + transformation = config_values.transform() + result = config_values.overriden_value \ + if transformation is None and not is_optional_resolved \ + else transformation + + if result is None and config_values.key in config_values.parent: + del config_values.parent[config_values.key] + else: + config_values.parent[config_values.key] = result + s = cls._find_substitutions(result) + if s: + new_substitutions = s + unresolved = True + + return (unresolved, new_substitutions, result) + + @classmethod + def _final_fixup(cls, item): + if isinstance(item, ConfigValues): + return item.transform() + elif isinstance(item, list): + return list([cls._final_fixup(child) for child in item]) + elif isinstance(item, ConfigTree): + items = list(item.items()) + for key, child in items: + item[key] = cls._final_fixup(child) + return item + + @classmethod + def unresolve_substitutions_to_value(cls, config, unresolved_value=STR_SUBSTITUTION): + for substitution in cls._find_substitutions(config): + if unresolved_value is STR_SUBSTITUTION: + value = substitution.raw_str() + elif unresolved_value is None: + value = NoneValue() + else: + value = unresolved_value + cls._do_substitute(substitution, value, False) + cls._final_fixup(config) + + @classmethod + def resolve_substitutions(cls, config, accept_unresolved=False): + has_unresolved = False + cls._fixup_self_references(config, accept_unresolved) + substitutions = cls._find_substitutions(config) + if len(substitutions) > 0: + unresolved = True + any_unresolved = True + _substitutions = [] + cache = {} + while any_unresolved and len(substitutions) > 0 and set(substitutions) != set(_substitutions): + unresolved = False + any_unresolved = True + _substitutions = substitutions[:] + + for substitution in _substitutions: + is_optional_resolved, resolved_value = cls._resolve_variable(config, substitution) + + # if the substitution is optional + if not is_optional_resolved and substitution.optional: + resolved_value = None + if isinstance(resolved_value, ConfigValues): + parents = cache.get(resolved_value) + if parents is None: + parents = [] + link = resolved_value + while isinstance(link, ConfigValues): + parents.append(link) + link = link.overriden_value + cache[resolved_value] = parents + + if isinstance(resolved_value, ConfigValues) \ + and substitution.parent in parents \ + and hasattr(substitution.parent, 'overriden_value') \ + and substitution.parent.overriden_value: + + # self resolution, backtrack + resolved_value = substitution.parent.overriden_value + + unresolved, new_substitutions, result = cls._do_substitute(substitution, resolved_value, is_optional_resolved) + any_unresolved = unresolved or any_unresolved + substitutions.extend(new_substitutions) + if not isinstance(result, ConfigValues): + substitutions.remove(substitution) + + cls._final_fixup(config) + if unresolved: + has_unresolved = True + if not accept_unresolved: + raise ConfigSubstitutionException("Cannot resolve {variables}. Check for cycles.".format( + variables=', '.join('${{{variable}}}: (line: {line}, col: {col})'.format( + variable=substitution.variable, + line=lineno(substitution.loc, substitution.instring), + col=col(substitution.loc, substitution.instring)) for substitution in substitutions))) + + cls._final_fixup(config) + return has_unresolved + + +class ListParser(TokenConverter): + """Parse a list [elt1, etl2, ...] + """ + + def __init__(self, expr=None): + super(ListParser, self).__init__(expr) + self.saveAsList = True + + def postParse(self, instring, loc, token_list): + """Create a list from the tokens + + :param instring: + :param loc: + :param token_list: + :return: + """ + cleaned_token_list = [token for tokens in (token.tokens if isinstance(token, ConfigInclude) else [token] + for token in token_list if token != '') + for token in tokens] + config_list = ConfigList(cleaned_token_list) + return [config_list] + + +class ConcatenatedValueParser(TokenConverter): + def __init__(self, expr=None): + super(ConcatenatedValueParser, self).__init__(expr) + self.parent = None + self.key = None + + def postParse(self, instring, loc, token_list): + config_values = ConfigValues(token_list, instring, loc) + return [config_values.transform()] + + +class ConfigTreeParser(TokenConverter): + """ + Parse a config tree from tokens + """ + + def __init__(self, expr=None, root=False): + super(ConfigTreeParser, self).__init__(expr) + self.root = root + self.saveAsList = True + + def postParse(self, instring, loc, token_list): + """Create ConfigTree from tokens + + :param instring: + :param loc: + :param token_list: + :return: + """ + config_tree = ConfigTree(root=self.root) + for element in token_list: + expanded_tokens = element.tokens if isinstance(element, ConfigInclude) else [element] + + for tokens in expanded_tokens: + # key, value1 (optional), ... + key = tokens[0].strip() if isinstance(tokens[0], (unicode, basestring)) else tokens[0] + operator = '=' + if len(tokens) == 3 and tokens[1].strip() in [':', '=', '+=']: + operator = tokens[1].strip() + values = tokens[2:] + elif len(tokens) == 2: + values = tokens[1:] + else: + raise ParseSyntaxException("Unknown tokens {tokens} received".format(tokens=tokens)) + # empty string + if len(values) == 0: + config_tree.put(key, '') + else: + value = values[0] + if isinstance(value, list) and operator == "+=": + value = ConfigValues([ConfigSubstitution(key, True, '', False, loc), value], False, loc) + config_tree.put(key, value, False) + elif isinstance(value, unicode) and operator == "+=": + value = ConfigValues([ConfigSubstitution(key, True, '', True, loc), ' ' + value], True, loc) + config_tree.put(key, value, False) + elif isinstance(value, list): + config_tree.put(key, value, False) + else: + existing_value = config_tree.get(key, None) + if isinstance(value, ConfigTree) and not isinstance(existing_value, list): + # Only Tree has to be merged with tree + config_tree.put(key, value, True) + elif isinstance(value, ConfigValues): + conf_value = value + value.parent = config_tree + value.key = key + if isinstance(existing_value, list) or isinstance(existing_value, ConfigTree): + config_tree.put(key, conf_value, True) + else: + config_tree.put(key, conf_value, False) + else: + config_tree.put(key, value, False) + return config_tree diff --git a/trains/utilities/pyhocon/config_tree.py b/trains/utilities/pyhocon/config_tree.py new file mode 100755 index 00000000..373c6e16 --- /dev/null +++ b/trains/utilities/pyhocon/config_tree.py @@ -0,0 +1,604 @@ +from collections import OrderedDict +from pyparsing import lineno +from pyparsing import col +try: + basestring +except NameError: # pragma: no cover + basestring = str + unicode = str + +import re +import copy +from .exceptions import ConfigException, ConfigWrongTypeException, ConfigMissingException + + +class UndefinedKey(object): + pass + + +class NonExistentKey(object): + pass + + +class NoneValue(object): + pass + + +class ConfigTree(OrderedDict): + KEY_SEP = '.' + + def __init__(self, *args, **kwds): + self.root = kwds.pop('root') if 'root' in kwds else False + if self.root: + self.history = {} + super(ConfigTree, self).__init__(*args, **kwds) + for key, value in self.items(): + if isinstance(value, ConfigValues): + value.parent = self + value.index = key + + @staticmethod + def merge_configs(a, b, copy_trees=False): + """Merge config b into a + + :param a: target config + :type a: ConfigTree + :param b: source config + :type b: ConfigTree + :return: merged config a + """ + for key, value in b.items(): + # if key is in both a and b and both values are dictionary then merge it otherwise override it + if key in a and isinstance(a[key], ConfigTree) and isinstance(b[key], ConfigTree): + if copy_trees: + a[key] = a[key].copy() + ConfigTree.merge_configs(a[key], b[key], copy_trees=copy_trees) + else: + if isinstance(value, ConfigValues): + value.parent = a + value.key = key + if key in a: + value.overriden_value = a[key] + a[key] = value + if a.root: + if b.root: + a.history[key] = a.history.get(key, []) + b.history.get(key, [value]) + else: + a.history[key] = a.history.get(key, []) + [value] + + return a + + def _put(self, key_path, value, append=False): + key_elt = key_path[0] + if len(key_path) == 1: + # if value to set does not exist, override + # if they are both configs then merge + # if not then override + if key_elt in self and isinstance(self[key_elt], ConfigTree) and isinstance(value, ConfigTree): + if self.root: + new_value = ConfigTree.merge_configs(ConfigTree(), self[key_elt], copy_trees=True) + new_value = ConfigTree.merge_configs(new_value, value, copy_trees=True) + self._push_history(key_elt, new_value) + self[key_elt] = new_value + else: + ConfigTree.merge_configs(self[key_elt], value) + elif append: + # If we have t=1 + # and we try to put t.a=5 then t is replaced by {a: 5} + l_value = self.get(key_elt, None) + if isinstance(l_value, ConfigValues): + l_value.tokens.append(value) + l_value.recompute() + elif isinstance(l_value, ConfigTree) and isinstance(value, ConfigValues): + value.overriden_value = l_value + value.tokens.insert(0, l_value) + value.recompute() + value.parent = self + value.key = key_elt + self._push_history(key_elt, value) + self[key_elt] = value + elif isinstance(l_value, list) and isinstance(value, ConfigValues): + self._push_history(key_elt, value) + value.overriden_value = l_value + value.parent = self + value.key = key_elt + self[key_elt] = value + elif isinstance(l_value, list): + self[key_elt] = l_value + value + self._push_history(key_elt, l_value) + elif l_value is None: + self._push_history(key_elt, value) + self[key_elt] = value + + else: + raise ConfigWrongTypeException( + u"Cannot concatenate the list {key}: {value} to {prev_value} of {type}".format( + key='.'.join(key_path), + value=value, + prev_value=l_value, + type=l_value.__class__.__name__) + ) + else: + # if there was an override keep overide value + if isinstance(value, ConfigValues): + value.parent = self + value.key = key_elt + value.overriden_value = self.get(key_elt, None) + self._push_history(key_elt, value) + self[key_elt] = value + else: + next_config_tree = super(ConfigTree, self).get(key_elt) + if not isinstance(next_config_tree, ConfigTree): + # create a new dictionary or overwrite a previous value + next_config_tree = ConfigTree() + self._push_history(key_elt, next_config_tree) + self[key_elt] = next_config_tree + next_config_tree._put(key_path[1:], value, append) + + def _push_history(self, key, value): + if self.root: + hist = self.history.get(key) + if hist is None: + hist = self.history[key] = [] + hist.append(value) + + def _get(self, key_path, key_index=0, default=UndefinedKey): + key_elt = key_path[key_index] + elt = super(ConfigTree, self).get(key_elt, UndefinedKey) + + if elt is UndefinedKey: + if default is UndefinedKey: + raise ConfigMissingException(u"No configuration setting found for key {key}".format(key='.'.join(key_path[:key_index + 1]))) + else: + return default + + if key_index == len(key_path) - 1: + if isinstance(elt, NoneValue): + return None + elif isinstance(elt, list): + return [None if isinstance(x, NoneValue) else x for x in elt] + else: + return elt + elif isinstance(elt, ConfigTree): + return elt._get(key_path, key_index + 1, default) + else: + if default is UndefinedKey: + raise ConfigWrongTypeException( + u"{key} has type {type} rather than dict".format(key='.'.join(key_path[:key_index + 1]), + type=type(elt).__name__)) + else: + return default + + @staticmethod + def parse_key(string): + """ + Split a key into path elements: + - a.b.c => a, b, c + - a."b.c" => a, QuotedKey("b.c") if . is any of the special characters: $}[]:=+#`^?!@*&. + - "a" => a + - a.b."c" => a, b, c (special case) + :param string: either string key (parse '.' as sub-key) or int / float as regular keys + :return: + """ + if isinstance(string, (int, float)): + return [string] + + special_characters = '$}[]:=+#`^?!@*&.' + tokens = re.findall(r'"[^"]+"|[^{special_characters}]+'.format(special_characters=re.escape(special_characters)), string) + + def contains_special_character(token): + return any((c in special_characters) for c in token) + + return [token if contains_special_character(token) else token.strip('"') for token in tokens] + + def put(self, key, value, append=False): + """Put a value in the tree (dot separated) + + :param key: key to use (dot separated). E.g., a.b.c + :type key: basestring + :param value: value to put + """ + self._put(ConfigTree.parse_key(key), value, append) + + def get(self, key, default=UndefinedKey): + """Get a value from the tree + + :param key: key to use (dot separated). E.g., a.b.c + :type key: basestring + :param default: default value if key not found + :type default: object + :return: value in the tree located at key + """ + return self._get(ConfigTree.parse_key(key), 0, default) + + def get_string(self, key, default=UndefinedKey): + """Return string representation of value found at key + + :param key: key to use (dot separated). E.g., a.b.c + :type key: basestring + :param default: default value if key not found + :type default: basestring + :return: string value + :type return: basestring + """ + value = self.get(key, default) + if value is None: + return None + + string_value = unicode(value) + if isinstance(value, bool): + string_value = string_value.lower() + return string_value + + def pop(self, key, default=UndefinedKey): + """Remove specified key and return the corresponding value. + If key is not found, default is returned if given, otherwise ConfigMissingException is raised + + This method assumes the user wants to remove the last value in the chain so it parses via parse_key + and pops the last value out of the dict. + + :param key: key to use (dot separated). E.g., a.b.c + :type key: basestring + :param default: default value if key not found + :type default: object + :param default: default value if key not found + :return: value in the tree located at key + """ + if default != UndefinedKey and key not in self: + return default + + value = self.get(key, UndefinedKey) + lst = ConfigTree.parse_key(key) + parent = self.KEY_SEP.join(lst[0:-1]) + child = lst[-1] + + if parent: + self.get(parent).__delitem__(child) + else: + self.__delitem__(child) + return value + + def get_int(self, key, default=UndefinedKey): + """Return int representation of value found at key + + :param key: key to use (dot separated). E.g., a.b.c + :type key: basestring + :param default: default value if key not found + :type default: int + :return: int value + :type return: int + """ + value = self.get(key, default) + try: + return int(value) if value is not None else None + except (TypeError, ValueError): + raise ConfigException( + u"{key} has type '{type}' rather than 'int'".format(key=key, type=type(value).__name__)) + + def get_float(self, key, default=UndefinedKey): + """Return float representation of value found at key + + :param key: key to use (dot separated). E.g., a.b.c + :type key: basestring + :param default: default value if key not found + :type default: float + :return: float value + :type return: float + """ + value = self.get(key, default) + try: + return float(value) if value is not None else None + except (TypeError, ValueError): + raise ConfigException( + u"{key} has type '{type}' rather than 'float'".format(key=key, type=type(value).__name__)) + + def get_bool(self, key, default=UndefinedKey): + """Return boolean representation of value found at key + + :param key: key to use (dot separated). E.g., a.b.c + :type key: basestring + :param default: default value if key not found + :type default: bool + :return: boolean value + :type return: bool + """ + + # String conversions as per API-recommendations: + # https://github.com/typesafehub/config/blob/master/HOCON.md#automatic-type-conversions + bool_conversions = { + None: None, + 'true': True, 'yes': True, 'on': True, + 'false': False, 'no': False, 'off': False + } + string_value = self.get_string(key, default) + if string_value is not None: + string_value = string_value.lower() + try: + return bool_conversions[string_value] + except KeyError: + raise ConfigException( + u"{key} does not translate to a Boolean value".format(key=key)) + + def get_list(self, key, default=UndefinedKey): + """Return list representation of value found at key + + :param key: key to use (dot separated). E.g., a.b.c + :type key: basestring + :param default: default value if key not found + :type default: list + :return: list value + :type return: list + """ + value = self.get(key, default) + if isinstance(value, list): + return value + elif isinstance(value, ConfigTree): + lst = [] + for k, v in sorted(value.items(), key=lambda kv: kv[0]): + if re.match('^[1-9][0-9]*$|0', k): + lst.append(v) + else: + raise ConfigException(u"{key} does not translate to a list".format(key=key)) + return lst + elif value is None: + return None + else: + raise ConfigException( + u"{key} has type '{type}' rather than 'list'".format(key=key, type=type(value).__name__)) + + def get_config(self, key, default=UndefinedKey): + """Return tree config representation of value found at key + + :param key: key to use (dot separated). E.g., a.b.c + :type key: basestring + :param default: default value if key not found + :type default: config + :return: config value + :type return: ConfigTree + """ + value = self.get(key, default) + if isinstance(value, dict): + return value + elif value is None: + return None + else: + raise ConfigException( + u"{key} has type '{type}' rather than 'config'".format(key=key, type=type(value).__name__)) + + def __getitem__(self, item): + val = self.get(item) + if val is UndefinedKey: + raise KeyError(item) + return val + + try: + from collections import _OrderedDictItemsView + except ImportError: # pragma: nocover + pass + else: + def items(self): # pragma: nocover + return self._OrderedDictItemsView(self) + + def __getattr__(self, item): + val = self.get(item, NonExistentKey) + if val is NonExistentKey: + return super(ConfigTree, self).__getattr__(item) + return val + + def __contains__(self, item): + return self._get(self.parse_key(item), default=NoneValue) is not NoneValue + + def with_fallback(self, config, resolve=True): + """ + return a new config with fallback on config + :param config: config or filename of the config to fallback on + :param resolve: resolve substitutions + :return: new config with fallback on config + """ + if isinstance(config, ConfigTree): + result = ConfigTree.merge_configs(copy.deepcopy(config), copy.deepcopy(self)) + else: + from . import ConfigFactory + result = ConfigTree.merge_configs(ConfigFactory.parse_file(config, resolve=False), copy.deepcopy(self)) + + if resolve: + from . import ConfigParser + ConfigParser.resolve_substitutions(result) + return result + + def as_plain_ordered_dict(self): + """return a deep copy of this config as a plain OrderedDict + + The config tree should be fully resolved. + + This is useful to get an object with no special semantics such as path expansion for the keys. + In particular this means that keys that contain dots are not surrounded with '"' in the plain OrderedDict. + + :return: this config as an OrderedDict + :type return: OrderedDict + """ + def plain_value(v): + if isinstance(v, list): + return [plain_value(e) for e in v] + elif isinstance(v, ConfigTree): + return v.as_plain_ordered_dict() + else: + if isinstance(v, ConfigValues): + raise ConfigException("The config tree contains unresolved elements") + return v + + return OrderedDict((key.strip('"') if isinstance(key, (unicode, basestring)) else key, plain_value(value)) + for key, value in self.items()) + + +class ConfigList(list): + def __init__(self, iterable=[]): + new_list = list(iterable) + super(ConfigList, self).__init__(new_list) + for index, value in enumerate(new_list): + if isinstance(value, ConfigValues): + value.parent = self + value.key = index + + +class ConfigInclude(object): + def __init__(self, tokens): + self.tokens = tokens + + +class ConfigValues(object): + def __init__(self, tokens, instring, loc): + self.tokens = tokens + self.parent = None + self.key = None + self._instring = instring + self._loc = loc + self.overriden_value = None + self.recompute() + + def recompute(self): + for index, token in enumerate(self.tokens): + if isinstance(token, ConfigSubstitution): + token.parent = self + token.index = index + + # no value return empty string + if len(self.tokens) == 0: + self.tokens = [''] + + # if the last token is an unquoted string then right strip it + if isinstance(self.tokens[-1], ConfigUnquotedString): + # rstrip only whitespaces, not \n\r because they would have been used escaped + self.tokens[-1] = self.tokens[-1].rstrip(' \t') + + def has_substitution(self): + return len(self.get_substitutions()) > 0 + + def get_substitutions(self): + lst = [] + node = self + while node: + lst = [token for token in node.tokens if isinstance(token, ConfigSubstitution)] + lst + if hasattr(node, 'overriden_value'): + node = node.overriden_value + if not isinstance(node, ConfigValues): + break + else: + break + return lst + + def transform(self): + def determine_type(token): + return ConfigTree if isinstance(token, ConfigTree) else ConfigList if isinstance(token, list) else str + + def format_str(v, last=False): + if isinstance(v, ConfigQuotedString): + return v.value + ('' if last else v.ws) + else: + return '' if v is None else unicode(v) + + if self.has_substitution(): + return self + + # remove None tokens + tokens = [token for token in self.tokens if token is not None] + + if not tokens: + return None + + # check if all tokens are compatible + first_tok_type = determine_type(tokens[0]) + for index, token in enumerate(tokens[1:]): + tok_type = determine_type(token) + if first_tok_type is not tok_type: + raise ConfigWrongTypeException( + "Token '{token}' of type {tok_type} (index {index}) must be of type {req_tok_type} (line: {line}, col: {col})".format( + token=token, + index=index + 1, + tok_type=tok_type.__name__, + req_tok_type=first_tok_type.__name__, + line=lineno(self._loc, self._instring), + col=col(self._loc, self._instring))) + + if first_tok_type is ConfigTree: + child = [] + if hasattr(self, 'overriden_value'): + node = self.overriden_value + while node: + if isinstance(node, ConfigValues): + value = node.transform() + if isinstance(value, ConfigTree): + child.append(value) + else: + break + elif isinstance(node, ConfigTree): + child.append(node) + else: + break + if hasattr(node, 'overriden_value'): + node = node.overriden_value + else: + break + + result = ConfigTree() + for conf in reversed(child): + ConfigTree.merge_configs(result, conf, copy_trees=True) + for token in tokens: + ConfigTree.merge_configs(result, token, copy_trees=True) + return result + elif first_tok_type is ConfigList: + result = [] + main_index = 0 + for sublist in tokens: + sublist_result = ConfigList() + for token in sublist: + if isinstance(token, ConfigValues): + token.parent = result + token.key = main_index + main_index += 1 + sublist_result.append(token) + result.extend(sublist_result) + return result + else: + if len(tokens) == 1: + if isinstance(tokens[0], ConfigQuotedString): + return tokens[0].value + return tokens[0] + else: + return ''.join(format_str(token) for token in tokens[:-1]) + format_str(tokens[-1], True) + + def put(self, index, value): + self.tokens[index] = value + + def __repr__(self): # pragma: no cover + return '[ConfigValues: ' + ','.join(str(o) for o in self.tokens) + ']' + + +class ConfigSubstitution(object): + def __init__(self, variable, optional, ws, instring, loc): + self.variable = variable + self.optional = optional + self.ws = ws + self.index = None + self.parent = None + self.instring = instring + self.loc = loc + + def __repr__(self): # pragma: no cover + return '[ConfigSubstitution: ' + self.variable + ']' + + +class ConfigUnquotedString(unicode): + def __new__(cls, value): + return super(ConfigUnquotedString, cls).__new__(cls, value) + + +class ConfigQuotedString(object): + def __init__(self, value, ws, instring, loc): + self.value = value + self.ws = ws + self.instring = instring + self.loc = loc + + def __repr__(self): # pragma: no cover + return '[ConfigQuotedString: ' + self.value + ']' diff --git a/trains/utilities/pyhocon/converter.py b/trains/utilities/pyhocon/converter.py new file mode 100755 index 00000000..b7c3e569 --- /dev/null +++ b/trains/utilities/pyhocon/converter.py @@ -0,0 +1,326 @@ +import json +import re +import sys + +from . import ConfigFactory +from .config_tree import ConfigQuotedString +from .config_tree import ConfigSubstitution +from .config_tree import ConfigTree +from .config_tree import ConfigValues +from .config_tree import NoneValue + + +try: + basestring +except NameError: + basestring = str + unicode = str + + +class HOCONConverter(object): + _number_re = r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))' + _number_re_matcher = re.compile(_number_re) + + @classmethod + def to_json(cls, config, compact=False, indent=2, level=0): + """Convert HOCON input into a JSON output + + :return: JSON string representation + :type return: basestring + """ + lines = "" + if isinstance(config, ConfigTree): + if len(config) == 0: + lines += '{}' + else: + lines += '{\n' + bet_lines = [] + for key, item in config.items(): + bet_lines.append('{indent}"{key}": {value}'.format( + indent=''.rjust((level + 1) * indent, ' '), + key=key.strip('"'), # for dotted keys enclosed with "" to not be interpreted as nested key + value=cls.to_json(item, compact, indent, level + 1)) + ) + lines += ',\n'.join(bet_lines) + lines += '\n{indent}}}'.format(indent=''.rjust(level * indent, ' ')) + elif isinstance(config, list): + if len(config) == 0: + lines += '[]' + else: + lines += '[\n' + bet_lines = [] + for item in config: + bet_lines.append('{indent}{value}'.format( + indent=''.rjust((level + 1) * indent, ' '), + value=cls.to_json(item, compact, indent, level + 1)) + ) + lines += ',\n'.join(bet_lines) + lines += '\n{indent}]'.format(indent=''.rjust(level * indent, ' ')) + elif isinstance(config, basestring): + lines = json.dumps(config) + elif config is None or isinstance(config, NoneValue): + lines = 'null' + elif config is True: + lines = 'true' + elif config is False: + lines = 'false' + else: + lines = str(config) + return lines + + @staticmethod + def _auto_indent(lines, section): + try: + indent = len(lines) - lines.rindex('\n') + except: + indent = len(lines) + try: + section_indent = section.index('\n') + except: + section_indent = len(section) + if section_indent < 3: + return lines + section + + indent = '\n' + ''.rjust(indent, ' ') + return lines + indent.join([l.strip() for l in section.split('\n')]) + # indent = ''.rjust(indent, ' ') + # return lines + section.replace('\n', '\n'+indent) + + @classmethod + def to_hocon(cls, config, compact=False, indent=2, level=0): + """Convert HOCON input into a HOCON output + + :return: JSON string representation + :type return: basestring + """ + lines = "" + if isinstance(config, ConfigTree): + if len(config) == 0: + lines += '{}' + else: + if level > 0: # don't display { at root level + lines += '{\n' + bet_lines = [] + + for key, item in config.items(): + if compact: + full_key = key + while isinstance(item, ConfigTree) and len(item) == 1: + key, item = next(iter(item.items())) + full_key += '.' + key + else: + full_key = key + + if isinstance(full_key, (basestring, unicode)) and cls._number_re_matcher.match(full_key): + # if key can be casted to float, and it is a string, make sure we quote it + full_key = '\"' + full_key + '\"' + + bet_line = ('{indent}{key}{assign_sign} '.format( + indent=''.rjust(level * indent, ' '), + key=full_key, + assign_sign='' if isinstance(item, dict) else ' =',) + ) + value_line = cls.to_hocon(item, compact, indent, level + 1) + if isinstance(item, (list, tuple)): + bet_lines.append(cls._auto_indent(bet_line, value_line)) + else: + bet_lines.append(bet_line + value_line) + lines += '\n'.join(bet_lines) + + if level > 0: # don't display { at root level + lines += '\n{indent}}}'.format(indent=''.rjust((level - 1) * indent, ' ')) + elif isinstance(config, (list, tuple)): + if len(config) == 0: + lines += '[]' + else: + # lines += '[\n' + lines += '[' + bet_lines = [] + base_len = len(lines) + skip_comma = False + for i, item in enumerate(config): + if 0 < i and not skip_comma: + # if not isinstance(item, (str, int, float)): + # lines += ',\n{indent}'.format(indent=''.rjust(level * indent, ' ')) + # else: + # lines += ', ' + lines += ', ' + + skip_comma = False + new_line = cls.to_hocon(item, compact, indent, level + 1) + lines += new_line + if '\n' in new_line or len(lines) - base_len > 80: + if i < len(config)-1: + lines += ',\n{indent}'.format(indent=''.rjust(level * indent, ' ')) + base_len = len(lines) + skip_comma = True + # bet_lines.append('{value}'.format(value=cls.to_hocon(item, compact, indent, level + 1))) + + # lines += '\n'.join(bet_lines) + # lines += ', '.join(bet_lines) + + # lines += '\n{indent}]'.format(indent=''.rjust((level - 1) * indent, ' ')) + lines += ']' + elif isinstance(config, basestring): + if '\n' in config and len(config) > 1: + lines = '"""{value}"""'.format(value=config) # multilines + else: + lines = '"{value}"'.format(value=cls.__escape_string(config)) + elif isinstance(config, ConfigValues): + lines = ''.join(cls.to_hocon(o, compact, indent, level) for o in config.tokens) + elif isinstance(config, ConfigSubstitution): + lines = '${' + if config.optional: + lines += '?' + lines += config.variable + '}' + config.ws + elif isinstance(config, ConfigQuotedString): + if '\n' in config.value and len(config.value) > 1: + lines = '"""{value}"""'.format(value=config.value) # multilines + else: + lines = '"{value}"'.format(value=cls.__escape_string(config.value)) + elif config is None or isinstance(config, NoneValue): + lines = 'null' + elif config is True: + lines = 'true' + elif config is False: + lines = 'false' + else: + lines = str(config) + return lines + + @classmethod + def to_yaml(cls, config, compact=False, indent=2, level=0): + """Convert HOCON input into a YAML output + + :return: YAML string representation + :type return: basestring + """ + lines = "" + if isinstance(config, ConfigTree): + if len(config) > 0: + if level > 0: + lines += '\n' + bet_lines = [] + for key, item in config.items(): + bet_lines.append('{indent}{key}: {value}'.format( + indent=''.rjust(level * indent, ' '), + key=key.strip('"'), # for dotted keys enclosed with "" to not be interpreted as nested key, + value=cls.to_yaml(item, compact, indent, level + 1)) + ) + lines += '\n'.join(bet_lines) + elif isinstance(config, list): + config_list = [line for line in config if line is not None] + if len(config_list) == 0: + lines += '[]' + else: + lines += '\n' + bet_lines = [] + for item in config_list: + bet_lines.append('{indent}- {value}'.format(indent=''.rjust(level * indent, ' '), + value=cls.to_yaml(item, compact, indent, level + 1))) + lines += '\n'.join(bet_lines) + elif isinstance(config, basestring): + # if it contains a \n then it's multiline + lines = config.split('\n') + if len(lines) == 1: + lines = config + else: + lines = '|\n' + '\n'.join([line.rjust(level * indent, ' ') for line in lines]) + elif config is None or isinstance(config, NoneValue): + lines = 'null' + elif config is True: + lines = 'true' + elif config is False: + lines = 'false' + else: + lines = str(config) + return lines + + @classmethod + def to_properties(cls, config, compact=False, indent=2, key_stack=[]): + """Convert HOCON input into a .properties output + + :return: .properties string representation + :type return: basestring + :return: + """ + + def escape_value(value): + return value.replace('=', '\\=').replace('!', '\\!').replace('#', '\\#').replace('\n', '\\\n') + + stripped_key_stack = [key.strip('"') for key in key_stack] + lines = [] + if isinstance(config, ConfigTree): + for key, item in config.items(): + if item is not None: + lines.append(cls.to_properties(item, compact, indent, stripped_key_stack + [key])) + elif isinstance(config, list): + for index, item in enumerate(config): + if item is not None: + lines.append(cls.to_properties(item, compact, indent, stripped_key_stack + [str(index)])) + elif isinstance(config, basestring): + lines.append('.'.join(stripped_key_stack) + ' = ' + escape_value(config)) + elif config is True: + lines.append('.'.join(stripped_key_stack) + ' = true') + elif config is False: + lines.append('.'.join(stripped_key_stack) + ' = false') + elif config is None or isinstance(config, NoneValue): + pass + else: + lines.append('.'.join(stripped_key_stack) + ' = ' + str(config)) + return '\n'.join([line for line in lines if len(line) > 0]) + + @classmethod + def convert(cls, config, output_format='json', indent=2, compact=False): + converters = { + 'json': cls.to_json, + 'properties': cls.to_properties, + 'yaml': cls.to_yaml, + 'hocon': cls.to_hocon, + } + + if output_format in converters: + return converters[output_format](config, compact, indent) + else: + raise Exception("Invalid format '{format}'. Format must be 'json', 'properties', 'yaml' or 'hocon'".format( + format=output_format)) + + @classmethod + def convert_from_file(cls, input_file=None, output_file=None, output_format='json', indent=2, compact=False): + """Convert to json, properties or yaml + + :param input_file: input file, if not specified stdin + :param output_file: output file, if not specified stdout + :param output_format: json, properties or yaml + :return: json, properties or yaml string representation + """ + + if input_file is None: + content = sys.stdin.read() + config = ConfigFactory.parse_string(content) + else: + config = ConfigFactory.parse_file(input_file) + + res = cls.convert(config, output_format, indent, compact) + if output_file is None: + print(res) + else: + with open(output_file, "w") as fd: + fd.write(res) + + @classmethod + def __escape_match(cls, match): + char = match.group(0) + return { + '\b': r'\b', + '\t': r'\t', + '\n': r'\n', + '\f': r'\f', + '\r': r'\r', + '"': r'\"', + '\\': r'\\', + }.get(char) or (r'\u%04x' % ord(char)) + + @classmethod + def __escape_string(cls, string): + return re.sub(r'[\x00-\x1F"\\]', cls.__escape_match, string) diff --git a/trains/utilities/pyhocon/exceptions.py b/trains/utilities/pyhocon/exceptions.py new file mode 100755 index 00000000..c6b2d44c --- /dev/null +++ b/trains/utilities/pyhocon/exceptions.py @@ -0,0 +1,17 @@ +class ConfigException(Exception): + + def __init__(self, message, ex=None): + super(ConfigException, self).__init__(message) + self._exception = ex + + +class ConfigMissingException(ConfigException, KeyError): + pass + + +class ConfigSubstitutionException(ConfigException): + pass + + +class ConfigWrongTypeException(ConfigException): + pass