Compare commits

20 Commits

Author SHA1 Message Date
allegroai
9c95c63ce0 Version bump to v1.15.0 2024-03-24 11:25:05 +02:00
allegroai
73179f53c2 Use latest patch versions for ES and Mongo 2024-03-24 11:24:51 +02:00
allegroai
ddc8a76279 Set API version to v2.29 2024-03-18 16:02:45 +02:00
allegroai
ac7ea0d477 Allow filtering task models.input.model field by array of ids 2024-03-18 16:01:45 +02:00
allegroai
3544ed19f8 Use latest patch versions for mongodb and ES 2024-03-18 15:59:15 +02:00
allegroai
5e68f053a0 Add widgets link in nginx configuration 2024-03-18 15:58:19 +02:00
allegroai
7bd5fdad59 Update webserver build: allow using external configuration from a file or from environment variables 2024-03-18 15:57:19 +02:00
allegroai
484c72aa0c Upgrade to Debian bookworm 2024-03-18 15:56:18 +02:00
allegroai
2027afbed5 Added missing ES index template for scalar events 2024-03-18 15:54:38 +02:00
allegroai
7d649f1964 Support controlling config value inheritance from the base folder 2024-03-18 15:53:58 +02:00
allegroai
8d237b3cae Upgrade Redis to v6.2 2024-03-18 15:53:07 +02:00
allegroai
e8ee6ce72e Code cleanup 2024-03-18 15:52:22 +02:00
allegroai
5749ff0454 Add security headers to webserver 2024-03-18 15:50:40 +02:00
allegroai
5189adf4f1 Improve handling of fixed users 2024-03-18 15:49:42 +02:00
allegroai
92a4e56c1f Add support for cookies extensions 2024-03-18 15:46:07 +02:00
allegroai
33528870ae Request cookies processing enhanced for more flexibility 2024-03-18 15:45:09 +02:00
allegroai
85f5b8b6f6 Fix last metrics for task are updated for events reported without variants 2024-03-18 15:44:28 +02:00
allegroai
6112910768 Make sure that legacy templates are deleted and empty db check is done on the new templates 2024-03-18 15:40:13 +02:00
allegroai
d3013ac285 Invalidate token on user logoff 2024-03-18 15:38:44 +02:00
allegroai
88abf28287 Add ElasticSearch 8.x support 2024-03-18 15:37:44 +02:00
46 changed files with 670 additions and 168 deletions

View File

@@ -44,7 +44,6 @@ from apiserver.database.errors import translate_errors_context
from apiserver.database.model.task.task import Task, TaskStatus
from apiserver.redis_manager import redman
from apiserver.service_repo.auth import Identity
from apiserver.tools import safe_get
from apiserver.utilities.dicts import nested_get
from apiserver.utilities.json import loads
@@ -375,7 +374,7 @@ class EventBLL(object):
if invalid_iterations_count:
raise BulkIndexError(
f"{invalid_iterations_count} document(s) failed to index.",
[invalid_iteration_error],
[{"_index": invalid_iteration_error}],
)
if not added:
@@ -439,10 +438,8 @@ class EventBLL(object):
last_events contains [hashed_metric_name -> hashed_variant_name -> event]. Keys are hashed to avoid mongodb
key conflicts due to invalid characters and/or long field names.
"""
metric = event.get("metric")
variant = event.get("variant")
if not (metric and variant):
return
metric = event.get("metric") or ""
variant = event.get("variant") or ""
metric_hash = dbutils.hash_field_name(metric)
variant_hash = dbutils.hash_field_name(variant)
@@ -487,9 +484,9 @@ class EventBLL(object):
recent than the currently stored event for its metric/event_type combination.
last_events contains [metric_name -> event_type -> event]
"""
metric = event.get("metric")
metric = event.get("metric") or ""
event_type = event.get("type")
if not (metric and event_type):
if not event_type:
return
timestamp = last_events[metric][event_type].get("timestamp", None)
@@ -661,8 +658,8 @@ class EventBLL(object):
Return events and next scroll id from the scrolled query
Release the scroll once it is exhausted
"""
total_events = safe_get(es_res, "hits/total/value", default=0)
events = [doc["_source"] for doc in safe_get(es_res, "hits/hits", default=[])]
total_events = nested_get(es_res, ("hits", "total", "value"), default=0)
events = [doc["_source"] for doc in nested_get(es_res, ("hits", "hits"), default=[])]
next_scroll_id = es_res.get("_scroll_id")
if next_scroll_id and not events:
self.clear_scroll(next_scroll_id)

View File

@@ -9,7 +9,7 @@ from elasticsearch import Elasticsearch
from apiserver.config_repo import config
from apiserver.database.errors import translate_errors_context
from apiserver.database.model.task.task import Task
from apiserver.tools import safe_get
from apiserver.utilities.dicts import nested_get
class EventType(Enum):
@@ -123,8 +123,8 @@ def get_max_metric_and_variant_counts(
es, company_id=company_id, event_type=event_type, body=es_req, **kwargs,
)
metrics_count = safe_get(
es_res, "aggregations/metrics_count/value", max_metrics_count
metrics_count = nested_get(
es_res, ("aggregations", "metrics_count", "value"), max_metrics_count
)
if not metrics_count:
return max_metrics_count, max_variants_count

View File

@@ -24,7 +24,7 @@ from apiserver.bll.event.scalar_key import ScalarKey, ScalarKeyEnum
from apiserver.bll.query import Builder as QueryBuilder
from apiserver.config_repo import config
from apiserver.database.errors import translate_errors_context
from apiserver.tools import safe_get
from apiserver.utilities.dicts import nested_get
log = config.logger(__file__)
@@ -342,12 +342,12 @@ class EventMetrics:
total amount of intervals does not exceeds the samples
Return the interval and resulting amount of intervals
"""
count = safe_get(data, "count/value", default=0)
count = nested_get(data, ("count", "value"), default=0)
if count < samples:
return metric, variant, 1, count
min_index = safe_get(data, "min_index/value", default=0)
max_index = safe_get(data, "max_index/value", default=min_index)
min_index = nested_get(data, ("min_index", "value"), default=0)
max_index = nested_get(data, ("max_index", "value"), default=min_index)
index_range = max_index - min_index + 1
interval = max(1, math.ceil(float(index_range) / samples))
max_samples = math.ceil(float(index_range) / interval)
@@ -592,5 +592,5 @@ class EventMetrics:
return [
metric["key"]
for metric in safe_get(es_res, "aggregations/metrics/buckets", default=[])
for metric in nested_get(es_res, ("aggregations", "metrics", "buckets"), default=[])
]

View File

@@ -6,7 +6,6 @@ from operator import itemgetter
from typing import Sequence, Tuple, Optional, Mapping, Callable
import attr
import dpath
from boltons.iterutils import first
from elasticsearch import Elasticsearch
from jsonmodels.fields import StringField, ListField, IntField
@@ -27,6 +26,7 @@ from apiserver.config_repo import config
from apiserver.database.errors import translate_errors_context
from apiserver.database.model.task.metrics import MetricEventStats
from apiserver.database.model.task.task import Task
from apiserver.utilities.dicts import nested_get
class VariantState(Base):
@@ -305,13 +305,13 @@ class MetricEventsIterator:
return [
MetricState(
metric=metric["key"],
timestamp=dpath.get(metric, "last_event_timestamp/value"),
timestamp=nested_get(metric, ("last_event_timestamp", "value")),
variants=[
init_variant_state(variant)
for variant in dpath.get(metric, "variants/buckets")
for variant in nested_get(metric, ("variants", "buckets"))
],
)
for metric in dpath.get(es_res, "aggregations/metrics/buckets")
for metric in nested_get(es_res, ("aggregations", "metrics", "buckets"))
]
@abc.abstractmethod
@@ -430,14 +430,14 @@ class MetricEventsIterator:
def get_iteration_events(it_: dict) -> Sequence:
return [
self._process_event(ev["_source"])
for m in dpath.get(it_, "metrics/buckets")
for v in dpath.get(m, "variants/buckets")
for ev in dpath.get(v, "events/hits/hits")
for m in nested_get(it_, ("metrics", "buckets"))
for v in nested_get(m, ("variants", "buckets"))
for ev in nested_get(v, ("events", "hits", "hits"))
if is_valid_event(ev["_source"])
]
iterations = []
for it in dpath.get(es_res, "aggregations/iters/buckets"):
for it in nested_get(es_res, ("aggregations", "iters", "buckets")):
events = get_iteration_events(it)
if events:
iterations.append({"iter": it["key"], "events": events})

View File

@@ -869,7 +869,7 @@ class ProjectBLL:
company,
project_ids: Sequence[str],
user_ids: Optional[Sequence[str]] = None,
) -> Set[str]:
) -> Set[Union[str, type(None)]]:
"""
Get the set of user ids that created tasks/models in the given projects
If project_ids is empty then all projects are examined

View File

@@ -18,7 +18,7 @@ from apiserver.config.info import get_deployment_type
from apiserver.database.model import Company, User
from apiserver.database.model.queue import Queue
from apiserver.database.model.task.task import Task
from apiserver.tools import safe_get
from apiserver.utilities.dicts import nested_get
from apiserver.utilities.json import dumps
from apiserver.version import __version__ as current_version
from .resource_monitor import ResourceMonitor, stat_threads
@@ -162,7 +162,7 @@ class StatisticsReporter:
def _get_cardinality_fields(categories: Sequence[dict]) -> dict:
names = {"cpu": "num_cores"}
return {
names[c["key"]]: safe_get(c, "count/value")
names[c["key"]]: nested_get(c, ("count", "value"))
for c in categories
if c["key"] in names
}
@@ -175,21 +175,21 @@ class StatisticsReporter:
}
return {
names[m["key"]]: {
"min": safe_get(m, "min/value"),
"max": safe_get(m, "max/value"),
"avg": safe_get(m, "avg/value"),
"min": nested_get(m, ("min", "value")),
"max": nested_get(m, ("max", "value")),
"avg": nested_get(m, ("avg", "value")),
}
for m in metrics
if m["key"] in names
}
buckets = safe_get(res, "aggregations/workers/buckets", default=[])
buckets = nested_get(res, ("aggregations", "workers", "buckets"), default=[])
return {
b["key"]: {
key: {
"interval_sec": agent_resource_threshold_sec,
**_get_cardinality_fields(safe_get(b, "categories/buckets", [])),
**_get_metric_fields(safe_get(b, "metrics/buckets", [])),
**_get_cardinality_fields(nested_get(b, ("categories", "buckets"), [])),
**_get_metric_fields(nested_get(b, ("metrics", "buckets"), [])),
}
}
for b in buckets
@@ -227,7 +227,7 @@ class StatisticsReporter:
},
}
res = cls._run_worker_stats_query(company_id, es_req)
buckets = safe_get(res, "aggregations/workers/buckets", default=[])
buckets = nested_get(res, ("aggregations", "workers", "buckets"), default=[])
return {
b["key"]: {"last_activity_time": b["last_activity_time"]["value"]}
for b in buckets

View File

@@ -27,10 +27,9 @@ from apiserver.database.model.project import Project
from apiserver.database.model.queue import Queue
from apiserver.database.model.task.task import Task
from apiserver.redis_manager import redman
from apiserver.tools import safe_get
from apiserver.utilities.dicts import nested_get
from .stats import WorkerStats
log = config.logger(__file__)
@@ -287,7 +286,7 @@ class WorkerBLL:
filter(
None,
(
safe_get(info, "next_entry/task")
nested_get(info, ("next_entry", "task"))
for info in queues_info.values()
),
)
@@ -311,7 +310,7 @@ class WorkerBLL:
continue
entry.name = info.get("name", None)
entry.num_tasks = info.get("num_entries", 0)
task_id = safe_get(info, "next_entry/task")
task_id = nested_get(info, ("next_entry", "task"))
if task_id:
task = tasks_info.get(task_id, None)
entry.next_task = IdNameEntry(

View File

@@ -6,7 +6,7 @@ from functools import reduce
from os import getenv
from os.path import expandvars
from pathlib import Path
from typing import List, Any, TypeVar, Sequence
from typing import List, Any, TypeVar, Sequence, Set
from boltons.iterutils import first
from pyhocon import ConfigTree, ConfigFactory, ConfigValues
@@ -35,6 +35,7 @@ class BasicConfig:
folder: str = None,
verbose: bool = True,
prefix: Sequence[str] = DEFAULT_PREFIXES,
exclude_files_from_base_folder: Sequence[str] = None,
):
folder = (
Path(folder)
@@ -44,6 +45,11 @@ class BasicConfig:
if not folder.is_dir():
raise ValueError("Invalid configuration folder")
self.exclude_files_from_base_folder = (
set(exclude_files_from_base_folder)
if exclude_files_from_base_folder
else set()
)
self.verbose = verbose
self.extra_config_path_override_var = [
@@ -85,7 +91,7 @@ class BasicConfig:
return logging.getLogger(path)
def _read_extra_env_config_values(self) -> ConfigTree:
""" Loads extra configuration from environment-injected values """
"""Loads extra configuration from environment-injected values"""
result = ConfigTree()
for prefix in self.extra_config_values_env_key_prefix:
@@ -125,12 +131,18 @@ class BasicConfig:
def _reload(self) -> ConfigTree:
extra_config_values = self._read_extra_env_config_values()
configs = [self._read_recursive(path) for path in self._paths]
configs = [
self._read_recursive(
path,
exclude_files=(
self.exclude_files_from_base_folder if idx == 0 else None
),
)
for idx, path in enumerate(self._paths)
]
return reduce(
lambda last, config: self._merge_configs(
last, config, copy_trees=True
),
lambda last, config: self._merge_configs(last, config, copy_trees=True),
configs + [extra_config_values],
ConfigTree(),
)
@@ -141,9 +153,14 @@ class BasicConfig:
for key, value in b.items():
override = key.startswith(override_prefix)
if override:
key = key[len(override_prefix):]
key = key[len(override_prefix) :]
# if key is in both a and b and both values are dictionary then merge it otherwise override it
if not override and key in a and isinstance(a[key], ConfigTree) and isinstance(b[key], ConfigTree):
if (
not override
and key in a
and isinstance(a[key], ConfigTree)
and isinstance(b[key], ConfigTree)
):
if copy_trees:
a[key] = a[key].copy()
cls._merge_configs(a[key], b[key], copy_trees=copy_trees)
@@ -156,13 +173,15 @@ class BasicConfig:
a[key] = value
if a.root:
if b.root:
a.history[key] = a.history.get(key, []) + b.history.get(key, [value])
a.history[key] = a.history.get(key, []) + b.history.get(
key, [value]
)
else:
a.history[key] = a.history.get(key, []) + [value]
return a
def _read_recursive(self, conf_root) -> ConfigTree:
def _read_recursive(self, conf_root, exclude_files: Set[str]) -> ConfigTree:
conf = ConfigTree()
if not conf_root:
@@ -180,6 +199,8 @@ class BasicConfig:
print(f"Loading config from {conf_root}")
for file in conf_root.rglob("*.conf"):
if exclude_files and file.name in exclude_files:
continue
key = ".".join(file.relative_to(conf_root).with_suffix("").parts)
conf.put(key, self._read_single_file(file))

View File

@@ -58,6 +58,9 @@
# verify user tokens
verify_user_tokens: false
# If set then users that were created from secure credentials or fixed user settings and are no longer in these settings will be deleted on startup
delete_missing_autocreated_users: true
# max token expiration timeout in seconds (1 year)
max_expiration_sec: 31536000

View File

@@ -2,10 +2,9 @@ fileserver = "http://localhost:8081"
elastic {
events {
hosts: [{host: "127.0.0.1", port: 9200}]
hosts: [{host: "127.0.0.1", port: 9200, scheme: http}]
args {
timeout: 60
dead_timeout: 10
max_retries: 3
retry_on_timeout: true
}
@@ -13,10 +12,9 @@ elastic {
}
workers {
hosts: [{host:"127.0.0.1", port:9200}]
hosts: [{host:"127.0.0.1", port:9200, scheme: http}]
args {
timeout: 60
dead_timeout: 10
max_retries: 3
retry_on_timeout: true
}

View File

@@ -5,7 +5,7 @@ from textwrap import shorten
import dpath
from dpath.exceptions import InvalidKeyName
from elasticsearch import ElasticsearchException
from elastic_transport import TransportError, ApiError
from elasticsearch.helpers import BulkIndexError
from jsonmodels.errors import ValidationError as JsonschemaValidationError
from mongoengine.errors import (
@@ -210,9 +210,9 @@ def translate_errors_context(message=None, **kwargs):
raise errors.bad_request.ValidationError(e.args[0])
except BulkIndexError as e:
ElasticErrorsHandler.bulk_error(e, message, **kwargs)
except ElasticsearchException as e:
except (TransportError, ApiError) as e:
raise errors.server_error.DataError(e, message, **kwargs)
except InvalidKeyName:
raise errors.server_error.DataError("invalid empty key encountered in data")
except Exception as ex:
except Exception:
raise

View File

@@ -4,6 +4,7 @@ from mongoengine import (
EmbeddedDocumentListField,
EmailField,
DateTimeField,
BooleanField,
)
from apiserver.database import Database, strict
@@ -76,3 +77,6 @@ class User(DbModelMixin, AuthDocument):
email = EmailField(unique=True, sparse=True)
""" Email uniquely identifying the user """
autocreated = BooleanField(default=False)
""" Set to true if the user was auto created based on config settings"""

View File

@@ -1297,7 +1297,6 @@ class GetMixin(PropsMixin):
return result
class UpdateMixin(object):
__user_set_allowed_fields = None
__locked_when_published_fields = None

View File

@@ -231,11 +231,12 @@ class Task(AttributedDocument):
"parent",
"hyperparams.*",
"execution.queue",
"models.input.model",
),
range_fields=("started", "active_duration", "last_metrics.*", "last_iteration"),
datetime_fields=("status_changed", "last_update"),
pattern_fields=("name", "comment", "report"),
fields=("runtime.*", "models.input.model"),
fields=("runtime.*",),
)
id = StringField(primary_key=True)

View File

@@ -4,34 +4,89 @@ Apply elasticsearch mappings to given hosts.
"""
import argparse
import json
import logging
from pathlib import Path
from typing import Optional, Sequence, Tuple
from elasticsearch import Elasticsearch
from elasticsearch import Elasticsearch, exceptions
HERE = Path(__file__).resolve().parent
logging.getLogger("elasticsearch").setLevel(logging.WARNING)
logging.getLogger("elastic_transport").setLevel(logging.WARNING)
def apply_mappings_to_cluster(
hosts: Sequence, key: Optional[str] = None, es_args: dict = None, http_auth: Tuple = None
hosts: Sequence,
key: Optional[str] = None,
es_args: dict = None,
http_auth: Tuple = None,
):
"""Hosts maybe a sequence of strings or dicts in the form {"host": <host>, "port": <port>}"""
def _send_template(f):
with f.open() as json_data:
data = json.load(json_data)
template_name = f.stem
res = es.indices.put_template(name=template_name, body=data)
return {"mapping": template_name, "result": res}
def _send_component_template(ct_file):
with ct_file.open() as json_data:
body = json.load(json_data)
template_name = f"{ct_file.stem}"
res = es.cluster.put_component_template(name=template_name, body=body)
return {"component_template": template_name, "result": res}
p = HERE / "mappings"
if key:
files = (p / key).glob("*.json")
else:
files = p.glob("**/*.json")
def _send_index_template(it_file):
with it_file.open() as json_data:
body = json.load(json_data)
template_name = f"{it_file.stem}"
res = es.indices.put_index_template(name=template_name, body=body)
return {"index_template": template_name, "result": res}
# def _send_legacy_template(f):
# with f.open() as json_data:
# data = json.load(json_data)
# template_name = f.stem
# res = es.indices.put_template(name=template_name, body=data)
# return {"mapping": template_name, "result": res}
def _delete_legacy_templates(legacy_folder):
res_list = []
for lt in legacy_folder.glob("*.json"):
template_name = lt.stem
try:
if not es.indices.get_template(name=template_name):
continue
res = es.indices.delete_template(name=template_name)
except exceptions.NotFoundError:
continue
res_list.append({"deleted legacy mapping": template_name, "result": res})
return res_list
es = Elasticsearch(hosts=hosts, http_auth=http_auth, **(es_args or {}))
return [_send_template(f) for f in files]
root = HERE / "index_templates"
if key:
folders = [root / key]
else:
folders = [f for f in root.iterdir() if f.is_dir()]
ret = []
for f in folders:
for ct in (f / "component_templates").glob("*.json"):
ret.append(_send_component_template(ct))
for it in f.glob("*.json"):
ret.append(_send_index_template(it))
legacy_root = HERE / "mappings"
for f in folders:
legacy_f = legacy_root / f.stem
if not legacy_f.exists() or not legacy_f.is_dir():
continue
ret.extend(_delete_legacy_templates(legacy_f))
return ret
# p = HERE / "mappings"
# if key:
# files = (p / key).glob("*.json")
# else:
# files = p.glob("**/*.json")
#
# return [_send_template(f) for f in files]
def parse_args():

View File

@@ -0,0 +1,48 @@
{
"template": {
"settings": {
"number_of_replicas": 0,
"number_of_shards": 1
},
"mappings": {
"_source": {
"enabled": true
},
"properties": {
"@timestamp": {
"type": "date"
},
"task": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"worker": {
"type": "keyword"
},
"timestamp": {
"type": "date"
},
"iter": {
"type": "long"
},
"metric": {
"type": "keyword"
},
"variant": {
"type": "keyword"
},
"value": {
"type": "float"
},
"company_id": {
"type": "keyword"
},
"model_event": {
"type": "boolean"
}
}
}
}
}

View File

@@ -0,0 +1,18 @@
{
"index_patterns": "events-log-*",
"template": {
"mappings": {
"properties": {
"msg": {
"type": "text",
"index": false
},
"level": {
"type": "keyword"
}
}
}
},
"priority": 500,
"composed_of": ["events_common"]
}

View File

@@ -0,0 +1,18 @@
{
"index_patterns": "events-plot-*",
"template": {
"mappings": {
"properties": {
"plot_str": {
"type": "text",
"index": false
},
"plot_data": {
"type": "binary"
}
}
}
},
"priority": 500,
"composed_of": ["events_common"]
}

View File

@@ -0,0 +1,17 @@
{
"index_patterns": "events-training_debug_image-*",
"template": {
"mappings": {
"properties": {
"key": {
"type": "keyword"
},
"url": {
"type": "keyword"
}
}
}
},
"priority": 500,
"composed_of": ["events_common"]
}

View File

@@ -0,0 +1,5 @@
{
"index_patterns": "events-training_stats_scalar-*",
"priority": 500,
"composed_of": ["events_common"]
}

View File

@@ -0,0 +1,31 @@
{
"index_patterns": "queue_metrics_*",
"template": {
"settings": {
"number_of_replicas": 0,
"number_of_shards": 1
},
"mappings": {
"_source": {
"enabled": true
},
"properties": {
"timestamp": {
"type": "date"
},
"queue": {
"type": "keyword"
},
"average_waiting_time": {
"type": "float"
},
"queue_length": {
"type": "integer"
},
"company_id": {
"type": "keyword"
}
}
}
}
}

View File

@@ -0,0 +1,43 @@
{
"index_patterns": "worker_stats_*",
"template": {
"settings": {
"number_of_replicas": 0,
"number_of_shards": 1
},
"mappings": {
"_source": {
"enabled": true
},
"properties": {
"timestamp": {
"type": "date"
},
"worker": {
"type": "keyword"
},
"category": {
"type": "keyword"
},
"metric": {
"type": "keyword"
},
"variant": {
"type": "keyword"
},
"value": {
"type": "float"
},
"unit": {
"type": "keyword"
},
"task": {
"type": "keyword"
},
"company_id": {
"type": "keyword"
}
}
}
}
}

View File

@@ -10,6 +10,8 @@ from apiserver.config_repo import config
from apiserver.elastic.apply_mappings import apply_mappings_to_cluster
log = config.logger(__file__)
logging.getLogger("elasticsearch").setLevel(logging.WARNING)
logging.getLogger("elastic_transport").setLevel(logging.WARNING)
class MissingElasticConfiguration(Exception):
@@ -78,6 +80,18 @@ def check_elastic_empty() -> bool:
err_type=urllib3.exceptions.NewConnectionError, args_prefix=("GET",)
)
def events_legacy_template():
try:
return es.indices.get_template(name="events*")
except exceptions.NotFoundError:
return False
def events_template():
try:
return es.indices.get_index_template(name="events*")
except exceptions.NotFoundError:
return False
try:
es_logger.addFilter(log_filter)
for retry in range(max_retries):
@@ -87,10 +101,7 @@ def check_elastic_empty() -> bool:
http_auth=es_factory.get_credentials("events", cluster_conf),
**cluster_conf.get("args", {}),
)
return not es.indices.get_template(name="events*")
except exceptions.NotFoundError as ex:
log.error(ex)
return True
return not (events_template() or events_legacy_template())
except exceptions.ConnectionError as ex:
if retry >= max_retries - 1:
raise ElasticConnectionError(

View File

@@ -1,3 +1,4 @@
import logging
from datetime import datetime
from functools import lru_cache
from os import getenv
@@ -9,6 +10,8 @@ from elasticsearch import Elasticsearch
from apiserver.config_repo import config
log = config.logger(__file__)
logging.getLogger('elasticsearch').setLevel(logging.WARNING)
logging.getLogger('elastic_transport').setLevel(logging.WARNING)
OVERRIDE_HOST_ENV_KEY = (
"CLEARML_ELASTIC_SERVICE_HOST",
@@ -32,6 +35,7 @@ if OVERRIDE_HOST:
OVERRIDE_PORT = first(filter(None, map(getenv, OVERRIDE_PORT_ENV_KEY)))
if OVERRIDE_PORT:
OVERRIDE_PORT = int(OVERRIDE_PORT)
log.info(f"Using override elastic port {OVERRIDE_PORT}")
OVERRIDE_USERNAME = first(filter(None, map(getenv, OVERRIDE_USERNAME_ENV_KEY)))

View File

@@ -3,7 +3,7 @@ from typing import Sequence, Union
from apiserver.config_repo import config
from apiserver.config.info import get_default_company
from apiserver.database.model.auth import Role
from apiserver.database.model.auth import Role, User as AuthUser
from apiserver.service_repo.auth.fixed_user import FixedUser
from .migration import _apply_migrations, check_mongo_empty, get_last_server_version
from .pre_populate import PrePopulate
@@ -60,14 +60,18 @@ def init_mongo_data():
fixed_mode = FixedUser.enabled()
internal_user_emails = set()
for user, credentials in config.get("secure.credentials", {}).items():
email = f"{user}@example.com"
user_data = {
"name": user,
"role": credentials.role,
"email": f"{user}@example.com",
"email": email,
"key": credentials.user_key,
"secret": credentials.user_secret,
"autocreated": True,
}
internal_user_emails.add(email.lower())
revoke = fixed_mode and credentials.get("revoke_in_fixed_mode", False)
user_id = _ensure_auth_user(user_data, company_id, log=log, revoke=revoke)
if credentials.role == Role.user:
@@ -82,8 +86,20 @@ def init_mongo_data():
for user in FixedUser.from_config():
try:
ensure_fixed_user(user, log=log)
ensure_fixed_user(user, log=log, emails=internal_user_emails)
except Exception as ex:
log.error(f"Failed creating fixed user {user.name}: {ex}")
if internal_user_emails and config.get(
f"apiserver.auth.delete_missing_autocreated_users", True
):
for user in AuthUser.objects(
company=company_id, autocreated=True, email__nin=internal_user_emails
):
log.info(
f"Removing user that is no longer in configuration: {user['id']}\t{user['email']}\t{user['name']}"
)
user.delete()
except Exception as ex:
log.exception("Failed initializing mongodb")
log.exception(f"Failed initializing mongodb: {str(ex)}")

View File

@@ -26,6 +26,7 @@ def _ensure_auth_user(user_data: dict, company_id: str, log: Logger, revoke: boo
credentials = [] if revoke else [creds]
user_id = user_data.get("id", f"__{user_data['name']}__")
autocreated = user_data.get("autocreated", False)
log.info(f"Creating user: {user_data['name']}")
@@ -37,6 +38,7 @@ def _ensure_auth_user(user_data: dict, company_id: str, log: Logger, revoke: boo
email=user_data["email"],
created=datetime.utcnow(),
credentials=credentials,
autocreated=autocreated,
)
user.save()
@@ -59,7 +61,7 @@ def _ensure_backend_user(user_id: str, company_id: str, user_name: str):
return user_id
def ensure_fixed_user(user: FixedUser, log: Logger):
def ensure_fixed_user(user: FixedUser, log: Logger, emails: set):
db_user = User.objects(company=user.company, id=user.user_id).first()
if db_user:
# noinspection PyBroadException
@@ -73,9 +75,12 @@ def ensure_fixed_user(user: FixedUser, log: Logger):
data = attr.asdict(user)
data["id"] = user.user_id
data["email"] = f"{user.user_id}@example.com"
email = f"{user.user_id}@example.com"
data["email"] = email
data["role"] = Role.guest if user.is_guest else Role.user
data["autocreated"] = True
_ensure_auth_user(user_data=data, company_id=user.company, log=log)
emails.add(email)
return _ensure_backend_user(user.user_id, user.company, user.name)

View File

@@ -6,7 +6,7 @@ boto3>=1.26
boto3-stubs[s3]>=1.26
clearml>=1.10.3
dpath>=1.4.2,<2.0
elasticsearch==7.17.9
elasticsearch==8.12.0
fastjsonschema>=2.8
flask-compress>=1.4.0
flask-cors>=3.0.5

View File

@@ -21,6 +21,11 @@ log = config.logger(__file__)
class RequestHandlers:
_request_strip_prefix = config.get("apiserver.request.strip_prefix", None)
_server_header = config.get("apiserver.response.headers.server", "clearml")
_custom_cookie_settings = {
c["name"]: c["settings"]
for c in config.get("apiserver.auth.custom_cookies", {}).values()
if c.get("enabled") and c.get("settings")
}
def before_request(self):
if request.method == "OPTIONS":
@@ -29,7 +34,10 @@ class RequestHandlers:
return
if request.content_encoding:
return f"Content encoding is not supported ({request.content_encoding})", 415
return (
f"Content encoding is not supported ({request.content_encoding})",
415,
)
try:
call = self._create_api_call(request)
@@ -70,7 +78,10 @@ class RequestHandlers:
if call.result.cookies:
for key, value in call.result.cookies.items():
kwargs = config.get("apiserver.auth.cookies").copy()
kwargs = (
self._custom_cookie_settings.get(key)
or config.get("apiserver.auth.cookies")
).copy()
if value is None:
# Removing a cookie
kwargs["max_age"] = 0
@@ -87,7 +98,9 @@ class RequestHandlers:
if company:
try:
# use no default value to allow setting a null domain as well
kwargs["domain"] = config.get(f"apiserver.auth.cookies_domain_override.{company}")
kwargs["domain"] = config.get(
f"apiserver.auth.cookies_domain_override.{company}"
)
except KeyError:
pass
@@ -114,11 +127,15 @@ class RequestHandlers:
return v
for k, v in md.lists():
v = [convert_value(x) for x in v] if (len(v) > 1 or k.endswith("[]")) else convert_value(v[0])
v = (
[convert_value(x) for x in v]
if (len(v) > 1 or k.endswith("[]"))
else convert_value(v[0])
)
nested_set(body, k.rstrip("[]").split("."), v)
def _update_call_data(self, call, req):
""" Use request payload/form to fill call data or batched data """
"""Use request payload/form to fill call data or batched data"""
if req.content_type == "application/json-lines":
items = []
for i, line in enumerate(req.data.splitlines()):
@@ -148,6 +165,9 @@ class RequestHandlers:
call.set_error_result(msg=msg, code=code, subcode=subcode)
return call
def _get_session_auth_cookie(self, req):
return req.cookies.get(config.get("apiserver.auth.session_auth_cookie_name"))
def _create_api_call(self, req):
call = None
try:
@@ -161,9 +181,7 @@ class RequestHandlers:
# Resolve authorization: if cookies contain an authorization token, use it as a starting point.
# in any case, request headers always take precedence.
auth_cookie = req.cookies.get(
config.get("apiserver.auth.session_auth_cookie_name")
)
auth_cookie = self._get_session_auth_cookie(req)
headers = (
{}
if not auth_cookie

View File

@@ -1,4 +1,4 @@
from .auth import get_auth_func, authorize_impersonation
from .auth import get_auth_func, authorize_impersonation, revoke_auth_token
from .payload import Token, Basic, AuthType, Payload
from .identity import Identity
from .utils import get_client_id, get_secret_key

View File

@@ -1,5 +1,6 @@
import base64
from datetime import datetime
from time import time
import bcrypt
import jwt
@@ -11,15 +12,16 @@ from apiserver.database.errors import translate_errors_context
from apiserver.database.model.auth import User, Entities, Credentials
from apiserver.database.model.company import Company
from apiserver.database.utils import get_options
from apiserver.redis_manager import redman
from .fixed_user import FixedUser
from .identity import Identity
from .payload import Payload, Token, Basic, AuthType
log = config.logger(__file__)
entity_keys = set(get_options(Entities))
verify_user_tokens = config.get("apiserver.auth.verify_user_tokens", True)
_revoked_tokens_key = "revoked_tokens"
redis = redman.connection("apiserver")
def get_auth_func(auth_type):
@@ -41,8 +43,10 @@ def authorize_token(jwt_token, service, action, call):
log.error(f"{msg} Call info: {info}")
try:
return Token.from_encoded_token(jwt_token)
token = Token.from_encoded_token(jwt_token)
if is_token_revoked(token):
raise errors.unauthorized.InvalidToken("revoked token")
return token
except jwt.exceptions.InvalidKeyError as ex:
log_error("Failed parsing token.")
raise errors.unauthorized.InvalidToken(
@@ -154,3 +158,23 @@ def compare_secret_key_hash(secret_key: str, hashed_secret: str) -> bool:
return bcrypt.checkpw(
secret_key.encode(), base64.b64decode(hashed_secret.encode("ascii"))
)
def is_token_revoked(token: Token) -> bool:
if not isinstance(token, Token) or not token.session_id:
return False
return redis.zscore(_revoked_tokens_key, token.session_id) is not None
def revoke_auth_token(token: Token):
if not isinstance(token, Token) or not token.session_id:
return
timestamp_now = int(time())
expiration_timestamp = token.exp
if not expiration_timestamp:
expiration_timestamp = timestamp_now + Token.default_expiration_sec
redis.zadd(_revoked_tokens_key, {token.session_id: expiration_timestamp})
redis.zremrangebyscore(_revoked_tokens_key, min=0, max=timestamp_now)

View File

@@ -1,3 +1,5 @@
from uuid import uuid4
import jwt
from datetime import datetime, timedelta
@@ -20,7 +22,15 @@ class Token(Payload):
default_expiration_sec = config.get("apiserver.auth.default_expiration_sec")
def __init__(
self, exp=None, iat=None, nbf=None, env=None, identity=None, entities=None, **_
self,
exp=None,
iat=None,
nbf=None,
env=None,
identity=None,
session_id=None,
entities=None,
**_,
):
super(Token, self).__init__(
AuthType.bearer_token, identity=identity, entities=entities
@@ -28,8 +38,13 @@ class Token(Payload):
self.exp = exp
self.iat = iat
self.nbf = nbf
self._session_id = session_id
self._env = env or config.get("env", "<unknown>")
@property
def session_id(self):
return self._session_id
@property
def env(self):
return self._env
@@ -102,8 +117,11 @@ class Token(Payload):
expiration_sec = expiration_sec or cls.default_expiration_sec
now = datetime.utcnow()
session_id = uuid4().hex
token = cls(identity=identity, entities=entities, iat=now)
token = cls(
identity=identity, entities=entities, iat=now, session_id=session_id
)
if expiration_sec:
# add 'expiration' claim

View File

@@ -39,7 +39,7 @@ class ServiceRepo(object):
"""If the check is set, parsing will fail for endpoint request with the version that is grater than the current
maximum """
_max_version = PartialVersion("2.28")
_max_version = PartialVersion("2.29")
""" Maximum version number (the highest min_version value across all endpoints) """
_endpoint_exp = (

View File

@@ -24,6 +24,7 @@ from apiserver.database.errors import translate_errors_context
from apiserver.database.model.auth import User, Role
from apiserver.service_repo import APICall, endpoint
from apiserver.service_repo.auth import Token
from apiserver.service_repo.auth.auth import is_token_revoked, revoke_auth_token
from apiserver.service_repo.auth.fixed_user import FixedUser
log = config.logger(__file__)
@@ -35,7 +36,7 @@ log = config.logger(__file__)
response_data_model=GetTokenResponse,
)
def login(call: APICall, *_, **__):
""" Generates a token based on the authenticated user (intended for use with credentials) """
"""Generates a token based on the authenticated user (intended for use with credentials)"""
call.result.data_model = AuthBLL.get_token_for_user(
user_id=call.identity.user,
company_id=call.identity.company,
@@ -48,6 +49,7 @@ def login(call: APICall, *_, **__):
@endpoint("auth.logout", min_version="2.2")
def logout(call: APICall, *_, **__):
revoke_auth_token(call.auth)
call.result.set_auth_cookie(None)
@@ -57,7 +59,7 @@ def logout(call: APICall, *_, **__):
response_data_model=GetTokenResponse,
)
def get_token_for_user(call: APICall, _: str, request: GetTokenForUserRequest):
""" Generates a token based on a requested user and company. INTERNAL. """
"""Generates a token based on a requested user and company. INTERNAL."""
if call.identity.role not in Role.get_system_roles():
if call.identity.role != Role.admin and call.identity.user != request.user:
raise errors.bad_request.InvalidUserId(
@@ -81,12 +83,14 @@ def get_token_for_user(call: APICall, _: str, request: GetTokenForUserRequest):
response_data_model=ValidateResponse,
)
def validate_token_endpoint(call: APICall, _, __):
""" Validate a token and return identity if valid. INTERNAL. """
"""Validate a token and return identity if valid. INTERNAL."""
try:
# if invalid, decoding will fail
token = Token.from_encoded_token(call.data_model.token)
call.result.data_model = ValidateResponse(
valid=True, user=token.identity.user, company=token.identity.company
valid=not is_token_revoked(token),
user=token.identity.user,
company=token.identity.company,
)
except Exception as e:
call.result.data_model = ValidateResponse(valid=False, msg=e.args[0])
@@ -98,7 +102,7 @@ def validate_token_endpoint(call: APICall, _, __):
response_data_model=CreateUserResponse,
)
def create_user(call: APICall, _, request: CreateUserRequest):
""" Create a user from. INTERNAL. """
"""Create a user from. INTERNAL."""
if (
call.identity.role not in Role.get_system_roles()
and request.company != call.identity.company

View File

@@ -7,6 +7,7 @@ from apiserver.apimodels.login import (
)
from apiserver.config import info
from apiserver.service_repo import endpoint, APICall
from apiserver.service_repo.auth import revoke_auth_token
from apiserver.service_repo.auth.fixed_user import FixedUser
@@ -37,4 +38,5 @@ def supported_modes(call: APICall, _, __: GetSupportedModesRequest):
@endpoint("login.logout", min_version="2.13")
def logout(call: APICall, _, __):
revoke_auth_token(call.auth)
call.result.set_auth_cookie(None)

View File

@@ -16,7 +16,7 @@ from apiserver.bll.project import ProjectBLL
from apiserver.bll.user import UserBLL
from apiserver.config_repo import config
from apiserver.database.errors import translate_errors_context
from apiserver.database.model.auth import Role
from apiserver.database.model.auth import Role, User as AuthUser
from apiserver.database.model.company import Company
from apiserver.database.model.user import User
from apiserver.database.utils import parse_from_call
@@ -158,9 +158,17 @@ def update_user(user_id, company_id, data: dict) -> Tuple[int, dict]:
update_fields = {
k: v for k, v in create_fields.items() if k in User.user_set_allowed()
}
auth_user_update_fields = ("name",)
partial_update_dict = parse_from_call(data, update_fields, User.get_fields())
with translate_errors_context("updating user"):
return User.safe_update(company_id, user_id, partial_update_dict)
ret = User.safe_update(company_id, user_id, partial_update_dict)
auth_update = {
k: v for k, v in partial_update_dict.items() if k in auth_user_update_fields
}
if auth_update:
AuthUser.objects(id=user_id).update(**auth_update)
return ret
@endpoint("users.update", response_data_model=UpdateResponse)

View File

@@ -113,7 +113,7 @@ class TestProjectTags(TestService):
new_tags = ["New model tag"]
self.api.models.update_tags(ids=[model], add_tags=new_tags)
data = self.api.projects.get_model_tags(projects=[p])
self.assertEqual(set(data.tags), set([*new_tags, *initial_tags]))
self.assertEqual(set(data.tags), {*new_tags, *initial_tags})
def new_task(self, **kwargs):
self.update_missing(

View File

@@ -193,33 +193,33 @@ class TestTaskEvents(TestService):
def test_last_scalar_metrics(self):
metric = "Metric1"
variant = "Variant1"
iter_count = 100
task = self._temp_task()
events = [
{
**self._create_task_event("training_stats_scalar", task, iteration),
"metric": metric,
"variant": variant,
"value": iteration,
}
for iteration in range(iter_count)
]
# send 2 batches to check the interaction with already stored db value
# each batch contains multiple iterations
self.send_batch(events[:50])
self.send_batch(events[50:])
for variant in ("Variant1", None):
iter_count = 100
task = self._temp_task()
events = [
{
**self._create_task_event("training_stats_scalar", task, iteration),
"metric": metric,
"variant": variant,
"value": iteration,
}
for iteration in range(iter_count)
]
# send 2 batches to check the interaction with already stored db value
# each batch contains multiple iterations
self.send_batch(events[:50])
self.send_batch(events[50:])
task_data = self.api.tasks.get_by_id(task=task).task
metric_data = first(first(task_data.last_metrics.values()).values())
self.assertEqual(iter_count - 1, metric_data.value)
self.assertEqual(iter_count - 1, metric_data.max_value)
self.assertEqual(iter_count - 1, metric_data.max_value_iteration)
self.assertEqual(0, metric_data.min_value)
self.assertEqual(0, metric_data.min_value_iteration)
task_data = self.api.tasks.get_by_id(task=task).task
metric_data = first(first(task_data.last_metrics.values()).values())
self.assertEqual(iter_count - 1, metric_data.value)
self.assertEqual(iter_count - 1, metric_data.max_value)
self.assertEqual(iter_count - 1, metric_data.max_value_iteration)
self.assertEqual(0, metric_data.min_value)
self.assertEqual(0, metric_data.min_value_iteration)
res = self.api.events.get_task_latest_scalar_values(task=task)
self.assertEqual(iter_count - 1, res.last_iter)
res = self.api.events.get_task_latest_scalar_values(task=task)
self.assertEqual(iter_count - 1, res.last_iter)
def test_model_events(self):
model = self._temp_model(ready=False)

View File

@@ -7,13 +7,17 @@ from humanfriendly import parse_timespan
def setup():
from apiserver.database import db
db.initialize()
def gen_token(args):
from apiserver.bll.auth import AuthBLL
resp = AuthBLL.get_token_for_user(args.user_id, args.company_id, parse_timespan(args.expiration))
print('Token:\n%s' % resp.token)
resp = AuthBLL.get_token_for_user(
args.user_id, args.company_id, int(parse_timespan(args.expiration))
)
print("Token:\n%s" % resp.token)
def safe_get(obj, glob, default=None, separator="/"):
@@ -23,19 +27,24 @@ def safe_get(obj, glob, default=None, separator="/"):
return default
if __name__ == '__main__':
if __name__ == "__main__":
top_parser = ArgumentParser(__doc__)
subparsers = top_parser.add_subparsers(title='Sections')
subparsers = top_parser.add_subparsers(title="Sections")
token = subparsers.add_parser('token')
token_commands = token.add_subparsers(title='Commands')
token_create = token_commands.add_parser('generate', description='Generate a new token')
token_create.add_argument('--user-id', '-u', help='User ID', required=True)
token_create.add_argument('--company-id', '-c', help='Company ID', required=True)
token_create.add_argument('--expiration', '-exp',
help="Token expiration (time span, shorthand suffixes are supported, default 1m)",
default=parse_timespan('1m'))
token = subparsers.add_parser("token")
token_commands = token.add_subparsers(title="Commands")
token_create = token_commands.add_parser(
"generate", description="Generate a new token"
)
token_create.add_argument("--user-id", "-u", help="User ID", required=True)
token_create.add_argument("--company-id", "-c", help="Company ID", required=True)
token_create.add_argument(
"--expiration",
"-exp",
help="Token expiration (time span, shorthand suffixes are supported, default 1m)",
default=parse_timespan("1m"),
)
token_create.set_defaults(_func=gen_token)
args = top_parser.parse_args()

View File

@@ -1 +1 @@
__version__ = "1.14.0"
__version__ = "1.15.0"

View File

@@ -1,4 +1,4 @@
FROM node:18-bullseye as webapp_builder
FROM node:20-bookworm-slim as webapp_builder
ARG CLEARML_WEB_GIT_URL=https://github.com/allegroai/clearml-web.git
@@ -10,8 +10,9 @@ RUN mv clearml-web /opt/open-webapp
COPY --chmod=744 docker/build/internal_files/build_webapp.sh /tmp/internal_files/
RUN /bin/bash -c '/tmp/internal_files/build_webapp.sh'
FROM python:3.9-slim-bullseye
FROM python:3.9-slim-bookworm
COPY --chmod=744 docker/build/internal_files/entrypoint.sh /opt/clearml/
COPY --chmod=744 docker/build/internal_files/update_from_env.py /opt/clearml/utilities/
COPY fileserver /opt/clearml/fileserver/
COPY apiserver /opt/clearml/apiserver/

View File

@@ -29,7 +29,12 @@ server {
include /etc/nginx/default.d/*.conf;
location / {
try_files $uri$args $uri$args/ $uri index.html /index.html;
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
add_header Content-Security-Policy "frame-ancestors 'self'";
add_header X-XSS-Protection "1; mode=block";
add_header X-Content-Type-Options "nosniff" always;
add_header Referrer-Policy "no-referrer-when-downgrade";
try_files $uri $uri/ /index.html;
}
location /version.json {
@@ -50,6 +55,12 @@ server {
rewrite /files/(.*) /$1 break;
}
location /widgets {
alias /usr/share/nginx/widgets;
try_files $uri $uri/ /widgets/index.html;
add_header Content-Security-Policy "frame-ancestors *";
}
error_page 404 /404.html;
location = /40x.html {
}
@@ -57,4 +68,4 @@ server {
error_page 500 502 503 504 /50x.html;
location = /50x.html {
}
}
}

View File

@@ -46,10 +46,26 @@ elif [[ ${SERVER_TYPE} == "webserver" ]]; then
EOF
fi
# Create an empty configuration json
echo "{}" > /tmp/configuration.json
# Copy the external configuration file if it exists
if test -f "/mnt/external_files/configs/configuration.json"; then
echo "Copying external configuration"
cp /mnt/external_files/configs/configuration.json /tmp/configuration.json
fi
# Update from env variables
echo "Updating configuration from env"
/opt/clearml/utilities/update_from_env.py \
--verbose \
/tmp/configuration.json \
/usr/share/nginx/html/configuration.json
export NGINX_APISERVER_ADDR=${NGINX_APISERVER_ADDRESS:-http://apiserver:8008}
export NGINX_FILESERVER_ADDR=${NGINX_FILESERVER_ADDRESS:-http://fileserver:8081}
COMMENT_IPV6_LISTEN=$([ "$DISABLE_NGINX_IPV6" = "true" ] && echo "#" || echo "") \
envsubst '${COMMENT_IPV6_LISTEN} ${NGINX_APISERVER_ADDR} ${NGINX_FILESERVER_ADDR}' < /etc/nginx/clearml.conf.template > /etc/nginx/sites-enabled/default
export COMMENT_IPV6_LISTEN=$([ "$DISABLE_NGINX_IPV6" = "true" ] && echo "#" || echo "")
envsubst '${COMMENT_IPV6_LISTEN} ${NGINX_APISERVER_ADDR} ${NGINX_FILESERVER_ADDR}' < /etc/nginx/clearml.conf.template > /etc/nginx/sites-enabled/default
if [[ -n "${CLEARML_SERVER_SUB_PATH}" ]]; then
mkdir -p /etc/nginx/default.d/

View File

@@ -1,11 +1,11 @@
#!/usr/bin/env bash
set -x
set -o errexit
set -o nounset
set -o pipefail
apt-get update -y
apt-get install -y python3-setuptools python3-dev build-essential nginx gettext
apt-get install -y vim curl
apt-get install -y python3-setuptools python3-dev build-essential nginx gettext vim curl
python3 -m ensurepip
python3 -m pip install --upgrade pip

View File

@@ -0,0 +1,104 @@
#!/usr/bin/env python3
""" Update json configuration file from environment variables """
from argparse import ArgumentParser, FileType
import json
from os import environ
from typing import Any, Generator, Tuple, Optional, List
class PathConflictError(Exception):
def __init__(self, path_: List[str]):
self.path = path_
def scan(
obj: Any, path_: str = None, sep: str = ".", parent_=None, key_=None,
) -> Generator[Tuple[str, Any, Optional[dict], str], None, None]:
if not isinstance(obj, dict):
yield path_.lower(), obj, parent_, key_
else:
for k, v in obj.items():
yield from scan(v, path_=sep.join(filter(None, (path_, k))), parent_=obj, key_=k, sep=sep)
def set_path(p: List[str], obj: dict, v: Any):
key_, *rest = p
if not rest:
obj[key_] = v
else:
if key_ in obj:
if not isinstance(obj[key_], dict):
raise PathConflictError(rest)
else:
obj[key_] = {}
return set_path(rest, obj[key_], v)
if __name__ == '__main__':
parser = ArgumentParser(description=__doc__)
parser.add_argument("input_file", type=FileType(), help="Input JSON file")
parser.add_argument("output_file", type=FileType("w"), help="Output JSON file")
parser.add_argument(
"--env-prefix", "-p", default="WEBSERVER", help="Environment variables prefix (default=%(default)s)",
dest="prefix", required=False
)
parser.add_argument(
"--env-separator", "-s", default="__", help="Environment variable name separator (default=%(default)s)",
dest="sep"
)
parser.add_argument("--verbose", "-v", action="store_true", default=False)
parser.add_argument(
"--disable-parse-env-value", action="store_false", default=True, help="Don't parse env value as JSON",
dest="parse_env"
)
args = parser.parse_args()
if not args.prefix:
print("Error: script does not support an empty prefix")
exit(1)
data = None
try:
data = json.load(args.input_file)
except json.JSONDecodeError as ex:
print(f"Error parsing JSON file {args.input_file.name}: {str(ex)}")
exit(1)
def parse_value(k, v):
try:
return json.loads(v)
except json.JSONDecodeError as ex:
print(f"Error parsing {k} JSON value `{v}`: {str(ex)}")
exit(2)
prefix = args.prefix + args.sep
env_vars = {
k.lstrip(prefix): parse_value(k, v) if args.parse_env else v
for k, v in environ.items() if k.startswith(prefix)
}
for path, value, parent, key in scan(data, sep=args.sep):
if not (parent and key):
continue
match = next((k for k in env_vars if k.lower() == path), None)
if match:
replace = env_vars.pop(match)
parent[key] = replace
if args.verbose:
print(f"Replacing {path}={value} with {replace}")
for k, v in env_vars.items():
path = k.split(args.sep)
try:
set_path(path, data, v)
except PathConflictError as ex:
print(f"Error: failed setting value into {k}: {path[:-len(ex.path)]} is not a dictionary")
try:
json.dump(data, args.output_file, sort_keys=True, indent=2)
except Exception as ex:
print(f"Error writing JSON file {args.output_file.name}: {str(ex)}")
exit(3)

View File

@@ -49,13 +49,10 @@ services:
cluster.routing.allocation.disk.watermark.low: 500mb
cluster.routing.allocation.disk.watermark.high: 500mb
cluster.routing.allocation.disk.watermark.flood_stage: 500mb
discovery.zen.minimum_master_nodes: "1"
discovery.type: "single-node"
http.compression_level: "7"
node.ingest: "true"
node.name: clearml
reindex.remote.whitelist: '*.*'
xpack.monitoring.enabled: "false"
reindex.remote.whitelist: "'*.*'"
xpack.security.enabled: "false"
ulimits:
memlock:
@@ -64,7 +61,7 @@ services:
nofile:
soft: 65536
hard: 65536
image: docker.elastic.co/elasticsearch/elasticsearch:7.17.7
image: docker.elastic.co/elasticsearch/elasticsearch:7.17.18
restart: unless-stopped
volumes:
- c:/opt/clearml/data/elastic_7:/usr/share/elasticsearch/data
@@ -93,7 +90,7 @@ services:
networks:
- backend
container_name: clearml-mongo
image: mongo:4.4.9
image: mongo:4.4.29
restart: unless-stopped
command: --setParameter internalQueryMaxBlockingSortMemoryUsageBytes=196100200
volumes:
@@ -104,7 +101,7 @@ services:
networks:
- backend
container_name: clearml-redis
image: redis:5.0
image: redis:6.2
restart: unless-stopped
volumes:
- c:/opt/clearml/data/redis:/data

View File

@@ -49,13 +49,10 @@ services:
cluster.routing.allocation.disk.watermark.low: 500mb
cluster.routing.allocation.disk.watermark.high: 500mb
cluster.routing.allocation.disk.watermark.flood_stage: 500mb
discovery.zen.minimum_master_nodes: "1"
discovery.type: "single-node"
http.compression_level: "7"
node.ingest: "true"
node.name: clearml
reindex.remote.whitelist: '*.*'
xpack.monitoring.enabled: "false"
reindex.remote.whitelist: "'*.*'"
xpack.security.enabled: "false"
ulimits:
memlock:
@@ -64,7 +61,7 @@ services:
nofile:
soft: 65536
hard: 65536
image: docker.elastic.co/elasticsearch/elasticsearch:7.17.7
image: docker.elastic.co/elasticsearch/elasticsearch:7.17.18
restart: unless-stopped
volumes:
- /opt/clearml/data/elastic_7:/usr/share/elasticsearch/data
@@ -92,7 +89,7 @@ services:
networks:
- backend
container_name: clearml-mongo
image: mongo:4.4.9
image: mongo:4.4.29
restart: unless-stopped
command: --setParameter internalQueryMaxBlockingSortMemoryUsageBytes=196100200
volumes:
@@ -103,7 +100,7 @@ services:
networks:
- backend
container_name: clearml-redis
image: redis:5.0
image: redis:6.2
restart: unless-stopped
volumes:
- /opt/clearml/data/redis:/data