mirror of
https://github.com/clearml/clearml-server
synced 2025-06-26 23:15:47 +00:00
Compare commits
362 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8de039ee35 | ||
|
|
dbebdf2885 | ||
|
|
e94d7fcfa9 | ||
|
|
fa3727c5fc | ||
|
|
b7795b3e2e | ||
|
|
8c29ebaece | ||
|
|
478f6b531b | ||
|
|
893ba48eda | ||
|
|
1b76f36dcd | ||
|
|
1299ebfcf3 | ||
|
|
8c4932c7eb | ||
|
|
e48e64a82f | ||
|
|
046a142f36 | ||
|
|
207b9e4746 | ||
|
|
605fccdef1 | ||
|
|
8b8d8d6e6f | ||
|
|
97b9bbc4a9 | ||
|
|
ed60a27d1a | ||
|
|
17fcaba2cb | ||
|
|
83dbf0fcb8 | ||
|
|
a3b303fa28 | ||
|
|
543c579a2e | ||
|
|
41b003f328 | ||
|
|
606bf2c4be | ||
|
|
57ce9446b1 | ||
|
|
073cc96fb8 | ||
|
|
77e7fb5c13 | ||
|
|
0b61ec2a56 | ||
|
|
7506a13fe8 | ||
|
|
9dfb4b882a | ||
|
|
2eee909364 | ||
|
|
3bcbc38c4c | ||
|
|
eb755be001 | ||
|
|
9997dcc977 | ||
|
|
ee9f45ea61 | ||
|
|
a1956cdd83 | ||
|
|
4b93f1f508 | ||
|
|
2752c4df54 | ||
|
|
2332b8589b | ||
|
|
f94cda4e9d | ||
|
|
a84e1ec0d6 | ||
|
|
4223fe73d1 | ||
|
|
f9577f9faa | ||
|
|
58b748ddf3 | ||
|
|
fa41e14625 | ||
|
|
4df5687ecd | ||
|
|
9a69c21504 | ||
|
|
39c36527e2 | ||
|
|
f59ef65fa6 | ||
|
|
8f942f0da2 | ||
|
|
7b5679fd70 | ||
|
|
5a5f02cead | ||
|
|
cfcad6300a | ||
|
|
fd46f3c6f3 | ||
|
|
e86b7fd24e | ||
|
|
50593f69f8 | ||
|
|
ba928854e0 | ||
|
|
83a0485518 | ||
|
|
f3491cc9b9 | ||
|
|
7558426bc6 | ||
|
|
ce01e37c66 | ||
|
|
92b42d66b7 | ||
|
|
f7d36bea4f | ||
|
|
f1c876089b | ||
|
|
dd0ecb712d | ||
|
|
fcfc1e8998 | ||
|
|
9c210bb4fa | ||
|
|
14547155cb | ||
|
|
3f34f83a91 | ||
|
|
da3941e6f2 | ||
|
|
2e19a18ee4 | ||
|
|
cdc668e3c8 | ||
|
|
7c9889605a | ||
|
|
5456ee4ebf | ||
|
|
562cb77003 | ||
|
|
91df2bb3b7 | ||
|
|
cb9812caee | ||
|
|
0496582d96 | ||
|
|
beff19e104 | ||
|
|
639b3d59a4 | ||
|
|
c0d687e2ef | ||
|
|
9c95c63ce0 | ||
|
|
73179f53c2 | ||
|
|
ddc8a76279 | ||
|
|
ac7ea0d477 | ||
|
|
3544ed19f8 | ||
|
|
5e68f053a0 | ||
|
|
7bd5fdad59 | ||
|
|
484c72aa0c | ||
|
|
2027afbed5 | ||
|
|
7d649f1964 | ||
|
|
8d237b3cae | ||
|
|
e8ee6ce72e | ||
|
|
5749ff0454 | ||
|
|
5189adf4f1 | ||
|
|
92a4e56c1f | ||
|
|
33528870ae | ||
|
|
85f5b8b6f6 | ||
|
|
6112910768 | ||
|
|
d3013ac285 | ||
|
|
88abf28287 | ||
|
|
6a1fc04d1e | ||
|
|
ee8eb03698 | ||
|
|
5799baae45 | ||
|
|
801e536c5e | ||
|
|
6e484ea8f4 | ||
|
|
a47e65d974 | ||
|
|
702b6dc9c8 | ||
|
|
db15f235e4 | ||
|
|
8c347f8fa9 | ||
|
|
768c3d80ff | ||
|
|
a5c3ef6385 | ||
|
|
11b7a384af | ||
|
|
9a70ade4a6 | ||
|
|
91ce140901 | ||
|
|
49084a9c49 | ||
|
|
8a99eb6812 | ||
|
|
811ab2bf4f | ||
|
|
3752db122b | ||
|
|
439911b84c | ||
|
|
262a301e28 | ||
|
|
a604451b01 | ||
|
|
88a7773621 | ||
|
|
35c4061992 | ||
|
|
4684fd5b74 | ||
|
|
e08123fcc0 | ||
|
|
e713e876eb | ||
|
|
c2cc788319 | ||
|
|
da8315d0db | ||
|
|
4ac6f88278 | ||
|
|
a7865ccbec | ||
|
|
ec14f327c6 | ||
|
|
a03b24d6b6 | ||
|
|
cb71ef8e47 | ||
|
|
8678fbc995 | ||
|
|
58df8f201a | ||
|
|
f4bf16c156 | ||
|
|
942f996237 | ||
|
|
c1e7f8f9c1 | ||
|
|
274c487b37 | ||
|
|
cc0129a800 | ||
|
|
388dd1b01f | ||
|
|
d62ecb5e6e | ||
|
|
6d507616b3 | ||
|
|
d0252a6dd9 | ||
|
|
2263e7cc1e | ||
|
|
81b93e6811 | ||
|
|
491e83d0f1 | ||
|
|
f84cc0a2cb | ||
|
|
6c5f966ed4 | ||
|
|
4eff657810 | ||
|
|
74acaa31df | ||
|
|
21ed8559bf | ||
|
|
3927604648 | ||
|
|
f7dcbd96ec | ||
|
|
5950b81f0b | ||
|
|
1e51e2e221 | ||
|
|
4c98b87554 | ||
|
|
c196043d2a | ||
|
|
752020c66a | ||
|
|
6885d07462 | ||
|
|
00552da1b0 | ||
|
|
eebe2eeffc | ||
|
|
bc2fe28bdd | ||
|
|
ed86750b24 | ||
|
|
6df69afb25 | ||
|
|
3f22423c3f | ||
|
|
3ad636c468 | ||
|
|
5c80336aa9 | ||
|
|
5cd59ea6e3 | ||
|
|
5d3ba4fa73 | ||
|
|
42556c8dbb | ||
|
|
dbe1c6f00f | ||
|
|
a17485b1bd | ||
|
|
a2b9fed92d | ||
|
|
ff34da3c88 | ||
|
|
5239755066 | ||
|
|
8061dfedbb | ||
|
|
011164ce9b | ||
|
|
8135cf5258 | ||
|
|
a83a932e84 | ||
|
|
db021f2863 | ||
|
|
1b650b1689 | ||
|
|
14d18a7aba | ||
|
|
a7ed46979f | ||
|
|
452f606889 | ||
|
|
fc47ccbf09 | ||
|
|
0206811342 | ||
|
|
a3ac1049a3 | ||
|
|
8488f63a3a | ||
|
|
9206a7c57d | ||
|
|
0c37ced2a1 | ||
|
|
b22f26129e | ||
|
|
d8b998ebd8 | ||
|
|
741fa84b52 | ||
|
|
d9579891c8 | ||
|
|
900414d0de | ||
|
|
5449b332d2 | ||
|
|
875f4b9536 | ||
|
|
95b8f22899 | ||
|
|
4058fb9ce5 | ||
|
|
cf8e847ed3 | ||
|
|
755cc803d9 | ||
|
|
3729afe014 | ||
|
|
dff2ed34e8 | ||
|
|
de9651d761 | ||
|
|
818496236b | ||
|
|
e99817b28b | ||
|
|
58465fbc17 | ||
|
|
2e4e060a82 | ||
|
|
5c5d9b6434 | ||
|
|
4291ad682a | ||
|
|
4c22757002 | ||
|
|
6e777e80b8 | ||
|
|
c8e4d9eeac | ||
|
|
b51aa5c29b | ||
|
|
e7c9daa42b | ||
|
|
7357654249 | ||
|
|
a6f671b46a | ||
|
|
17a8b440bd | ||
|
|
eb2b9cbd9a | ||
|
|
797e503e67 | ||
|
|
30cfdac8f2 | ||
|
|
24bb87aaee | ||
|
|
dd49ba180a | ||
|
|
bda903d0d8 | ||
|
|
9739eb2d5a | ||
|
|
cfbb37238f | ||
|
|
6664c6237e | ||
|
|
74200a24bd | ||
|
|
2fb9288a6c | ||
|
|
5d014d81af | ||
|
|
3a2675abe1 | ||
|
|
f0d68b1ce9 | ||
|
|
15db9cdaef | ||
|
|
a45d47f5d7 | ||
|
|
b1a50c1370 | ||
|
|
22a2a02760 | ||
|
|
ab798e4170 | ||
|
|
f09ac672d2 | ||
|
|
2149b76f63 | ||
|
|
d96420aa67 | ||
|
|
ed6c7b7bcb | ||
|
|
a392bc0bd7 | ||
|
|
7e97ec5555 | ||
|
|
9c41124b81 | ||
|
|
14ff639bb0 | ||
|
|
e66257761a | ||
|
|
0ffde24dc2 | ||
|
|
d4fdcd9b32 | ||
|
|
18570bfccb | ||
|
|
54ce6c34c6 | ||
|
|
ae4c33fa0e | ||
|
|
c7cd949fd0 | ||
|
|
1ce4058157 | ||
|
|
7b6f24b24d | ||
|
|
d03a931d84 | ||
|
|
5cc7199661 | ||
|
|
6537e9ef69 | ||
|
|
930aaff791 | ||
|
|
1999fb2479 | ||
|
|
9db14cc31d | ||
|
|
e3cc689528 | ||
|
|
9e0adc77dd | ||
|
|
58d9a64537 | ||
|
|
d397d2ae20 | ||
|
|
2d711e1500 | ||
|
|
97992b0d9e | ||
|
|
bc23f1b0cf | ||
|
|
6b3eff1426 | ||
|
|
caaf801cd0 | ||
|
|
c23e8a90d0 | ||
|
|
fa5b28ca0e | ||
|
|
bfb55a9463 | ||
|
|
37e485e1f2 | ||
|
|
3451ff441f | ||
|
|
53c9b5525e | ||
|
|
e5230edac3 | ||
|
|
a54dd8030c | ||
|
|
482a5c34bc | ||
|
|
ee2a72c70f | ||
|
|
a0d8aaf3b9 | ||
|
|
de1f823213 | ||
|
|
0c9e2f92ee | ||
|
|
6c49e96ff0 | ||
|
|
81e3fc6577 | ||
|
|
e6dc4b7557 | ||
|
|
238a47a197 | ||
|
|
04e7076628 | ||
|
|
0531612bf4 | ||
|
|
3ae410a1e9 | ||
|
|
98ed3075dd | ||
|
|
b871bf4224 | ||
|
|
8d4c02fc3c | ||
|
|
b986980c75 | ||
|
|
a4fa567be2 | ||
|
|
ddb91f226a | ||
|
|
7772f47773 | ||
|
|
9c118d14e0 | ||
|
|
efd56e085e | ||
|
|
4dff163af4 | ||
|
|
242a78a0fe | ||
|
|
78989fea91 | ||
|
|
5de7c12062 | ||
|
|
3f79c19079 | ||
|
|
fe29743c54 | ||
|
|
d760cf5835 | ||
|
|
3695f25a5f | ||
|
|
c6f1beafdd | ||
|
|
68a54c34f3 | ||
|
|
ab495ae586 | ||
|
|
b058770af1 | ||
|
|
f7e833bf6f | ||
|
|
36b9ab0453 | ||
|
|
ec0436d0da | ||
|
|
0f6c4e75b7 | ||
|
|
a41ae112a1 | ||
|
|
c28f478ea8 | ||
|
|
c18eb99d06 | ||
|
|
3a60f00d93 | ||
|
|
ee87778548 | ||
|
|
52c0c4d438 | ||
|
|
d117a4f022 | ||
|
|
6683d2d7a9 | ||
|
|
05357fe25e | ||
|
|
adc1825843 | ||
|
|
0c15169668 | ||
|
|
123dc1dcfb | ||
|
|
b2feafac09 | ||
|
|
b41ab8c550 | ||
|
|
62d5779bd5 | ||
|
|
f8b9d9802e | ||
|
|
dd8a1503b0 | ||
|
|
cff98ae900 | ||
|
|
9b108740da | ||
|
|
08a7bc7c9f | ||
|
|
fb256d7e5b | ||
|
|
710443b078 | ||
|
|
e0cde2f7c9 | ||
|
|
60b9c8de14 | ||
|
|
ecffe26be4 | ||
|
|
2570bd9e26 | ||
|
|
174f84514a | ||
|
|
65cb8d7b43 | ||
|
|
5f8ef808a3 | ||
|
|
4941ac70e0 | ||
|
|
67cd461145 | ||
|
|
92b5fc6f9a | ||
|
|
b90165b4e4 | ||
|
|
6c2dcb5c8a | ||
|
|
3efed32934 | ||
|
|
69737308fe | ||
|
|
a6dbea808a | ||
|
|
5131b17901 | ||
|
|
5f21c3a56d | ||
|
|
2350ac64ed | ||
|
|
d146127c18 | ||
|
|
abd65e103e | ||
|
|
bf65ea7bd0 | ||
|
|
73e278a8ed | ||
|
|
d92dfbbdb7 | ||
|
|
5c1e419eb5 |
2
LICENSE
2
LICENSE
@@ -1,7 +1,7 @@
|
||||
Server Side Public License
|
||||
VERSION 1, OCTOBER 16, 2018
|
||||
|
||||
Copyright © 2019 allegro.ai, Inc.
|
||||
Copyright © 2024 ClearML Inc.
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies of this
|
||||
license document, but changing it is not allowed.
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
</br>Experiment Manager, ML-Ops and Data-Management**
|
||||
|
||||
[](https://img.shields.io/badge/license-SSPL-green.svg)
|
||||
[](https://img.shields.io/badge/python-3.6%20%7C%203.7-blue.svg)
|
||||
[](https://img.shields.io/badge/python-3.9-blue.svg)
|
||||
[](https://img.shields.io/github/release-pre/allegroai/trains-server.svg)
|
||||
[](https://artifacthub.io/packages/search?repo=allegroai)
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
Server Side Public License
|
||||
VERSION 1, OCTOBER 16, 2018
|
||||
|
||||
Copyright © 2019 allegro.ai, Inc.
|
||||
Copyright © 2024 ClearML Inc.
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies of this
|
||||
license document, but changing it is not allowed.
|
||||
|
||||
@@ -26,6 +26,9 @@
|
||||
23: ["invalid_domain_name", "malformed domain name"]
|
||||
24: ["not_public_object", "object is not public"]
|
||||
|
||||
# Auth / Login
|
||||
75: ["invalid_access_key", "access key not found"]
|
||||
|
||||
# Tasks
|
||||
100: ["task_error", "general task error"]
|
||||
101: ["invalid_task_id", "invalid task id"]
|
||||
@@ -47,6 +50,12 @@
|
||||
130: ["task_not_found", "task not found"]
|
||||
131: ["events_not_added", "events not added"]
|
||||
|
||||
# Reports
|
||||
150: ["operation_supported_on_reports_only", "passed task is not report"]
|
||||
|
||||
# Pipelines
|
||||
160: ["cannot_remove_all_runs", "at least one pipeline run should be left"]
|
||||
|
||||
# Models
|
||||
200: ["model_error", "general task error"]
|
||||
201: ["invalid_model_id", "invalid model id"]
|
||||
@@ -67,12 +76,15 @@
|
||||
402: ["project_has_tasks", "project has associated tasks"]
|
||||
403: ["project_not_found", "project not found"]
|
||||
405: ["project_has_models", "project has associated models"]
|
||||
406: ["project_has_datasets", "project has associated non-empty datasets"]
|
||||
407: ["invalid_project_name", "invalid project name"]
|
||||
408: ["cannot_update_project_location", "Cannot update project location. Use projects.move instead"]
|
||||
409: ["project_path_exceeds_max", "Project path exceed the maximum allowed depth"]
|
||||
410: ["project_source_and_destination_are_the_same", "Project has the same source and destination paths"]
|
||||
411: ["project_cannot_be_moved_under_itself", "Project can not be moved under itself in the projects hierarchy"]
|
||||
412: ["project_cannot_be_merged_into_its_child", "Project can not be merged into its own child"]
|
||||
413: ["project_has_pipelines", "project has associated pipelines with active controllers"]
|
||||
414: ["public_project_exists", "Cannot create project. Public project with the same name already exists"]
|
||||
|
||||
# Queues
|
||||
701: ["invalid_queue_id", "invalid queue id"]
|
||||
@@ -86,7 +98,7 @@
|
||||
|
||||
# Database
|
||||
800: ["data_validation_error", "data validation error"]
|
||||
801: ["expected_unique_data", "value combination already exists"]
|
||||
801: ["expected_unique_data", "value combination already exists (unique field already contains this value)"]
|
||||
|
||||
# Workers
|
||||
1001: ["invalid_worker_id", "invalid worker id"]
|
||||
@@ -95,6 +107,11 @@
|
||||
1004: ["worker_not_registered", "worker is not registered"]
|
||||
1005: ["worker_stats_not_found", "worker stats not found"]
|
||||
|
||||
# Serving
|
||||
1050: ["invalid_container_id", "invalid container id"]
|
||||
1051: ["container_not_registered", "container is not registered"]
|
||||
1052: ["no_containers_for_url", "no container instances found for serice url"]
|
||||
|
||||
1104: ["invalid_scroll_id", "Invalid scroll id"]
|
||||
}
|
||||
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
from enum import Enum
|
||||
from typing import Union, Type, Iterable
|
||||
from numbers import Number
|
||||
from typing import Union, Type, Iterable, Mapping
|
||||
|
||||
import jsonmodels.errors
|
||||
import six
|
||||
from jsonmodels import fields
|
||||
from jsonmodels.fields import _LazyType, NotSet
|
||||
from jsonmodels.fields import _LazyType, NotSet, EmbeddedField
|
||||
from jsonmodels.models import Base as ModelBase
|
||||
from jsonmodels.validators import Enum as EnumValidator
|
||||
from mongoengine.base import BaseDocument
|
||||
@@ -40,6 +41,34 @@ def make_default(field_cls, default_value):
|
||||
return _FieldWithDefault
|
||||
|
||||
|
||||
class OneOfEmbeddedField(EmbeddedField):
|
||||
def __init__(
|
||||
self,
|
||||
*args,
|
||||
discriminator_property: str,
|
||||
discriminator_mapping: Mapping[str, type],
|
||||
**kwargs,
|
||||
):
|
||||
self.discriminator_property = discriminator_property
|
||||
self.discriminator_mapping = discriminator_mapping
|
||||
model_types = tuple(set(self.discriminator_mapping.values()))
|
||||
|
||||
super().__init__(model_types, *args, **kwargs)
|
||||
|
||||
def parse_value(self, value):
|
||||
"""Parse value to proper model type."""
|
||||
if not isinstance(value, dict) or self.discriminator_property not in value:
|
||||
return super().parse_value(value)
|
||||
|
||||
property_value = value.get(self.discriminator_property)
|
||||
embed_type = self.discriminator_mapping.get(property_value)
|
||||
if not embed_type:
|
||||
raise jsonmodels.errors.ValidationError(
|
||||
f"Could not find type matching discriminator property value: {property_value}"
|
||||
)
|
||||
return embed_type(**value)
|
||||
|
||||
|
||||
class ListField(fields.ListField):
|
||||
def __init__(self, items_types=None, *args, default=NotSet, **kwargs):
|
||||
if default is not NotSet and callable(default):
|
||||
@@ -61,10 +90,20 @@ class ListField(fields.ListField):
|
||||
item.validate()
|
||||
|
||||
|
||||
# since there is no distinction between None and empty DictField
|
||||
# this value can be used as sentinel in order to distinguish
|
||||
# between not set and empty DictField
|
||||
DictFieldNotSet = {}
|
||||
class ScalarField(fields.BaseField):
|
||||
|
||||
"""String field."""
|
||||
|
||||
types = (str, int, float, bool)
|
||||
|
||||
|
||||
class SafeStringField(fields.StringField):
|
||||
"""String field that can also accept numbers as input"""
|
||||
def parse_value(self, value):
|
||||
if isinstance(value, Number):
|
||||
value = str(value)
|
||||
|
||||
return super().parse_value(value)
|
||||
|
||||
|
||||
class DictField(fields.BaseField):
|
||||
@@ -114,9 +153,7 @@ class DictField(fields.BaseField):
|
||||
if len(self.value_types) != 1:
|
||||
tpl = 'Cannot decide which type to choose from "{types}".'
|
||||
raise jsonmodels.errors.ValidationError(
|
||||
tpl.format(
|
||||
types=', '.join([t.__name__ for t in self.value_types])
|
||||
)
|
||||
tpl.format(types=", ".join([t.__name__ for t in self.value_types]))
|
||||
)
|
||||
return self.value_types[0](**value)
|
||||
|
||||
@@ -178,7 +215,7 @@ class EnumField(fields.StringField):
|
||||
*args,
|
||||
required=False,
|
||||
default=None,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
):
|
||||
choices = list(map(self.parse_value, values_or_type))
|
||||
validator_cls = EnumValidator if required else NullableEnumValidator
|
||||
@@ -201,7 +238,7 @@ class ActualEnumField(fields.StringField):
|
||||
validators=None,
|
||||
required=False,
|
||||
default=None,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
):
|
||||
self.__enum = enum_class
|
||||
self.types = (enum_class,)
|
||||
@@ -214,7 +251,7 @@ class ActualEnumField(fields.StringField):
|
||||
*args,
|
||||
required=required,
|
||||
validators=validators,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def parse_value(self, value):
|
||||
|
||||
@@ -96,6 +96,11 @@ class GetCredentialsResponse(Base):
|
||||
credentials = ListField(CredentialsResponse)
|
||||
|
||||
|
||||
class EditCredentialsRequest(Base):
|
||||
access_key = StringField(required=True)
|
||||
label = StringField()
|
||||
|
||||
|
||||
class RevokeCredentialsRequest(Base):
|
||||
access_key = StringField(required=True)
|
||||
|
||||
|
||||
@@ -13,6 +13,14 @@ from apiserver.config_repo import config
|
||||
from apiserver.utilities.stringenum import StringEnum
|
||||
|
||||
|
||||
class TaskRequest(Base):
|
||||
task: str = StringField(required=True)
|
||||
|
||||
|
||||
class ModelRequest(Base):
|
||||
model: str = StringField(required=True)
|
||||
|
||||
|
||||
class HistogramRequestBase(Base):
|
||||
samples: int = IntField(default=2000, validators=[Min(1), Max(6000)])
|
||||
key: ScalarKeyEnum = ActualEnumField(ScalarKeyEnum, default=ScalarKeyEnum.iter)
|
||||
@@ -26,6 +34,12 @@ class MetricVariants(Base):
|
||||
class ScalarMetricsIterHistogramRequest(HistogramRequestBase):
|
||||
task: str = StringField(required=True)
|
||||
metrics: Sequence[MetricVariants] = ListField(items_types=MetricVariants)
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class GetMetricsAndVariantsRequest(Base):
|
||||
task: str = StringField(required=True)
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class MultiTaskScalarMetricsIterHistogramRequest(HistogramRequestBase):
|
||||
@@ -35,11 +49,13 @@ class MultiTaskScalarMetricsIterHistogramRequest(HistogramRequestBase):
|
||||
Length(
|
||||
minimum_value=1,
|
||||
maximum_value=config.get(
|
||||
"services.tasks.multi_task_histogram_limit", 10
|
||||
"services.tasks.multi_task_histogram_limit", 100
|
||||
),
|
||||
)
|
||||
],
|
||||
)
|
||||
metrics: Sequence[MetricVariants] = ListField(items_types=MetricVariants)
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class TaskMetric(Base):
|
||||
@@ -48,7 +64,13 @@ class TaskMetric(Base):
|
||||
variants: Sequence[str] = ListField(items_types=str)
|
||||
|
||||
|
||||
class DebugImagesRequest(Base):
|
||||
class LegacyMetricEventsRequest(TaskRequest):
|
||||
iters: int = IntField(default=1, validators=validators.Min(1))
|
||||
scroll_id: str = StringField()
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class MetricEventsRequest(Base):
|
||||
metrics: Sequence[TaskMetric] = ListField(
|
||||
items_types=TaskMetric, validators=[Length(minimum_value=1)]
|
||||
)
|
||||
@@ -56,24 +78,43 @@ class DebugImagesRequest(Base):
|
||||
navigate_earlier: bool = BoolField(default=True)
|
||||
refresh: bool = BoolField(default=False)
|
||||
scroll_id: str = StringField()
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class TaskMetricVariant(Base):
|
||||
class VectorMetricsIterHistogramRequest(Base):
|
||||
task: str = StringField(required=True)
|
||||
metric: str = StringField(required=True)
|
||||
variant: str = StringField(required=True)
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class GetDebugImageSampleRequest(TaskMetricVariant):
|
||||
class GetVariantSampleRequest(Base):
|
||||
task: str = StringField(required=True)
|
||||
metric: str = StringField(required=True)
|
||||
variant: str = StringField(required=True)
|
||||
iteration: Optional[int] = IntField()
|
||||
refresh: bool = BoolField(default=False)
|
||||
scroll_id: Optional[str] = StringField()
|
||||
navigate_current_metric: bool = BoolField(default=True)
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class NextDebugImageSampleRequest(Base):
|
||||
class GetMetricSamplesRequest(Base):
|
||||
task: str = StringField(required=True)
|
||||
metric: str = StringField(required=True)
|
||||
iteration: Optional[int] = IntField()
|
||||
refresh: bool = BoolField(default=False)
|
||||
scroll_id: Optional[str] = StringField()
|
||||
navigate_current_metric: bool = BoolField(default=True)
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class NextHistorySampleRequest(Base):
|
||||
task: str = StringField(required=True)
|
||||
scroll_id: Optional[str] = StringField()
|
||||
navigate_earlier: bool = BoolField(default=True)
|
||||
next_iteration: bool = BoolField(default=False)
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class LogOrderEnum(StringEnum):
|
||||
@@ -92,6 +133,12 @@ class TaskEventsRequest(TaskEventsRequestBase):
|
||||
order: Optional[str] = ActualEnumField(LogOrderEnum, default=LogOrderEnum.asc)
|
||||
scroll_id: str = StringField()
|
||||
count_total: bool = BoolField(default=True)
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class LegacyLogEventsRequest(TaskEventsRequestBase):
|
||||
order: Optional[str] = ActualEnumField(LogOrderEnum, default=LogOrderEnum.desc)
|
||||
scroll_id: str = StringField()
|
||||
|
||||
|
||||
class LogEventsRequest(TaskEventsRequestBase):
|
||||
@@ -99,6 +146,7 @@ class LogEventsRequest(TaskEventsRequestBase):
|
||||
navigate_earlier: bool = BoolField(default=True)
|
||||
from_timestamp: Optional[int] = IntField()
|
||||
order: Optional[str] = ActualEnumField(LogOrderEnum)
|
||||
metrics: Sequence[MetricVariants] = ListField(items_types=MetricVariants)
|
||||
|
||||
|
||||
class ScalarMetricsIterRawRequest(TaskEventsRequestBase):
|
||||
@@ -107,6 +155,7 @@ class ScalarMetricsIterRawRequest(TaskEventsRequestBase):
|
||||
metric: MetricVariants = EmbeddedField(MetricVariants, required=True)
|
||||
count_total: bool = BoolField(default=False)
|
||||
scroll_id: str = StringField()
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class IterationEvents(Base):
|
||||
@@ -119,21 +168,67 @@ class MetricEvents(Base):
|
||||
iterations: Sequence[IterationEvents] = ListField(items_types=IterationEvents)
|
||||
|
||||
|
||||
class DebugImageResponse(Base):
|
||||
class MetricEventsResponse(Base):
|
||||
metrics: Sequence[MetricEvents] = ListField(items_types=MetricEvents)
|
||||
scroll_id: str = StringField()
|
||||
|
||||
|
||||
class TaskMetricsRequest(Base):
|
||||
class MultiTasksRequestBase(Base):
|
||||
tasks: Sequence[str] = ListField(
|
||||
items_types=str, validators=[Length(minimum_value=1)]
|
||||
)
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class SingleValueMetricsRequest(MultiTasksRequestBase):
|
||||
metrics: Sequence[MetricVariants] = ListField(items_types=MetricVariants)
|
||||
|
||||
|
||||
class TaskMetricsRequest(MultiTasksRequestBase):
|
||||
event_type: EventType = ActualEnumField(EventType, required=True)
|
||||
|
||||
|
||||
class MultiTaskMetricsRequest(MultiTasksRequestBase):
|
||||
event_type: EventType = ActualEnumField(EventType, default=EventType.all)
|
||||
|
||||
|
||||
class LegacyMultiTaskEventsRequest(MultiTasksRequestBase):
|
||||
iters: int = IntField(default=1, validators=validators.Min(1))
|
||||
scroll_id: str = StringField()
|
||||
|
||||
|
||||
class MultiTaskPlotsRequest(MultiTasksRequestBase):
|
||||
iters: int = IntField(default=1)
|
||||
scroll_id: str = StringField()
|
||||
no_scroll: bool = BoolField(default=False)
|
||||
last_iters_per_task_metric: bool = BoolField(default=True)
|
||||
metrics: Sequence[MetricVariants] = ListField(items_types=MetricVariants)
|
||||
|
||||
|
||||
class TaskPlotsRequest(Base):
|
||||
task: str = StringField(required=True)
|
||||
iters: int = IntField(default=1)
|
||||
scroll_id: str = StringField()
|
||||
no_scroll: bool = BoolField(default=False)
|
||||
metrics: Sequence[MetricVariants] = ListField(items_types=MetricVariants)
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class GetScalarMetricDataRequest(Base):
|
||||
task: str = StringField(required=True)
|
||||
metric: str = StringField(required=True)
|
||||
scroll_id: str = StringField()
|
||||
no_scroll: bool = BoolField(default=False)
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class ClearScrollRequest(Base):
|
||||
scroll_id: str = StringField()
|
||||
|
||||
|
||||
class ClearTaskLogRequest(Base):
|
||||
task: str = StringField(required=True)
|
||||
threshold_sec = IntField()
|
||||
allow_locked = BoolField(default=False)
|
||||
exclude_metrics = ListField(items_types=[str])
|
||||
include_metrics = ListField(items_types=[str])
|
||||
|
||||
@@ -5,8 +5,9 @@ from apiserver.apimodels import DictField, callable_default
|
||||
|
||||
|
||||
class GetSupportedModesRequest(Base):
|
||||
state = StringField(help_text="ASCII base64 encoded application state")
|
||||
callback_url_prefix = StringField()
|
||||
pass
|
||||
# state = StringField(help_text="ASCII base64 encoded application state")
|
||||
# callback_url_prefix = StringField()
|
||||
|
||||
|
||||
class BasicGuestMode(Base):
|
||||
|
||||
@@ -42,12 +42,29 @@ class ModelRequest(models.Base):
|
||||
model = fields.StringField(required=True)
|
||||
|
||||
|
||||
class TaskRequest(models.Base):
|
||||
task = fields.StringField(required=True)
|
||||
|
||||
|
||||
class UpdateForTaskRequest(TaskRequest):
|
||||
uri = fields.StringField()
|
||||
iteration = fields.IntField()
|
||||
override_model_id = fields.StringField()
|
||||
|
||||
|
||||
class UpdateModelRequest(ModelRequest):
|
||||
task = fields.StringField()
|
||||
iteration = fields.IntField()
|
||||
|
||||
|
||||
class DeleteModelRequest(ModelRequest):
|
||||
force = fields.BoolField(default=False)
|
||||
delete_external_artifacts = fields.BoolField(default=True)
|
||||
|
||||
|
||||
class ModelsDeleteManyRequest(BatchRequest):
|
||||
force = fields.BoolField(default=False)
|
||||
delete_external_artifacts = fields.BoolField(default=True)
|
||||
|
||||
|
||||
class PublishModelRequest(ModelRequest):
|
||||
@@ -75,3 +92,8 @@ class DeleteMetadataRequest(DeleteMetadata):
|
||||
|
||||
class AddOrUpdateMetadataRequest(AddOrUpdateMetadata):
|
||||
model = fields.StringField(required=True)
|
||||
|
||||
|
||||
class ModelsGetRequest(models.Base):
|
||||
include_stats = fields.BoolField(default=False)
|
||||
allow_public = fields.BoolField(default=True)
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
from enum import auto
|
||||
from typing import Sequence
|
||||
|
||||
from jsonmodels import fields, models
|
||||
from jsonmodels.validators import Length
|
||||
|
||||
from apiserver.apimodels import DictField, ActualEnumField, ScalarField
|
||||
from apiserver.utilities.stringenum import StringEnum
|
||||
|
||||
|
||||
class Filter(models.Base):
|
||||
@@ -9,3 +16,47 @@ class Filter(models.Base):
|
||||
class TagsRequest(models.Base):
|
||||
include_system = fields.BoolField(default=False)
|
||||
filter = fields.EmbeddedField(Filter)
|
||||
|
||||
|
||||
class EntitiesCountRequest(models.Base):
|
||||
projects = DictField()
|
||||
tasks = DictField()
|
||||
models = DictField()
|
||||
pipelines = DictField()
|
||||
datasets = DictField()
|
||||
reports = DictField()
|
||||
active_users = fields.ListField(str)
|
||||
search_hidden = fields.BoolField(default=False)
|
||||
allow_public = fields.BoolField(default=True)
|
||||
|
||||
|
||||
class EntityType(StringEnum):
|
||||
task = auto()
|
||||
model = auto()
|
||||
|
||||
|
||||
class ValueMapping(models.Base):
|
||||
key = ScalarField(nullable=True)
|
||||
value = ScalarField(nullable=True)
|
||||
|
||||
|
||||
class FieldMapping(models.Base):
|
||||
field = fields.StringField(required=True)
|
||||
name = fields.StringField()
|
||||
values: Sequence[ValueMapping] = fields.ListField(items_types=[ValueMapping])
|
||||
|
||||
|
||||
class PrepareDownloadForGetAllRequest(models.Base):
|
||||
entity_type = ActualEnumField(EntityType)
|
||||
allow_public = fields.BoolField(default=True)
|
||||
search_hidden = fields.BoolField(default=False)
|
||||
only_fields = fields.ListField(
|
||||
items_types=[str], validators=[Length(1)], required=True
|
||||
)
|
||||
field_mappings: Sequence[FieldMapping] = fields.ListField(
|
||||
items_types=[FieldMapping], validators=[Length(1)], required=True
|
||||
)
|
||||
|
||||
|
||||
class DownloadForGetAllRequest(models.Base):
|
||||
prepare_id = fields.StringField(required=True)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from jsonmodels import models, fields
|
||||
from jsonmodels.validators import Length
|
||||
|
||||
from apiserver.apimodels import ListField
|
||||
|
||||
@@ -8,12 +9,13 @@ class Arg(models.Base):
|
||||
value = fields.StringField(required=True)
|
||||
|
||||
|
||||
class DeleteRunsRequest(models.Base):
|
||||
project = fields.StringField(required=True)
|
||||
ids = ListField([str], required=True, validators=[Length(1)])
|
||||
|
||||
|
||||
class StartPipelineRequest(models.Base):
|
||||
task = fields.StringField(required=True)
|
||||
queue = fields.StringField(required=True)
|
||||
args = ListField(Arg)
|
||||
|
||||
|
||||
class StartPipelineResponse(models.Base):
|
||||
pipeline = fields.StringField(required=True)
|
||||
enqueued = fields.BoolField(required=True)
|
||||
verify_watched_queue = fields.BoolField(default=False)
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
from enum import Enum, auto
|
||||
|
||||
from jsonmodels import models, fields
|
||||
|
||||
from apiserver.apimodels import ListField, ActualEnumField, DictField
|
||||
from apiserver.apimodels.organization import TagsRequest
|
||||
from apiserver.database.model import EntityVisibility
|
||||
from apiserver.utilities.stringenum import StringEnum
|
||||
|
||||
|
||||
class ProjectRequest(models.Base):
|
||||
@@ -20,6 +23,7 @@ class MoveRequest(ProjectRequest):
|
||||
class DeleteRequest(ProjectRequest):
|
||||
force = fields.BoolField(default=False)
|
||||
delete_contents = fields.BoolField(default=False)
|
||||
delete_external_artifacts = fields.BoolField(default=True)
|
||||
|
||||
|
||||
class ProjectOrNoneRequest(models.Base):
|
||||
@@ -27,6 +31,11 @@ class ProjectOrNoneRequest(models.Base):
|
||||
include_subprojects = fields.BoolField(default=True)
|
||||
|
||||
|
||||
class GetUniqueMetricsRequest(ProjectOrNoneRequest):
|
||||
model_metrics = fields.BoolField(default=False)
|
||||
ids = fields.ListField(str)
|
||||
|
||||
|
||||
class GetParamsRequest(ProjectOrNoneRequest):
|
||||
page = fields.IntField(default=0)
|
||||
page_size = fields.IntField(default=500)
|
||||
@@ -37,31 +46,58 @@ class ProjectTagsRequest(TagsRequest):
|
||||
|
||||
|
||||
class MultiProjectRequest(models.Base):
|
||||
projects = fields.ListField(str)
|
||||
projects = fields.ListField(items_types=[str, type(None)])
|
||||
include_subprojects = fields.BoolField(default=True)
|
||||
|
||||
|
||||
class ProjectTaskParentsRequest(MultiProjectRequest):
|
||||
tasks_state = ActualEnumField(EntityVisibility)
|
||||
task_name = fields.StringField()
|
||||
|
||||
|
||||
class ProjectHyperparamValuesRequest(MultiProjectRequest):
|
||||
class EntityTypeEnum(StringEnum):
|
||||
task = auto()
|
||||
model = auto()
|
||||
|
||||
|
||||
class ProjectUserNamesRequest(MultiProjectRequest):
|
||||
entity = ActualEnumField(EntityTypeEnum, default=EntityTypeEnum.task)
|
||||
|
||||
|
||||
class MultiProjectPagedRequest(MultiProjectRequest):
|
||||
allow_public = fields.BoolField(default=True)
|
||||
page = fields.IntField(default=0)
|
||||
page_size = fields.IntField(default=500)
|
||||
|
||||
|
||||
class ProjectHyperparamValuesRequest(MultiProjectPagedRequest):
|
||||
section = fields.StringField(required=True)
|
||||
name = fields.StringField(required=True)
|
||||
allow_public = fields.BoolField(default=True)
|
||||
pattern = fields.StringField()
|
||||
|
||||
|
||||
class ProjectModelMetadataValuesRequest(MultiProjectRequest):
|
||||
class ProjectModelMetadataValuesRequest(MultiProjectPagedRequest):
|
||||
key = fields.StringField(required=True)
|
||||
allow_public = fields.BoolField(default=True)
|
||||
|
||||
|
||||
class ProjectChildrenType(Enum):
|
||||
pipeline = "pipeline"
|
||||
report = "report"
|
||||
dataset = "dataset"
|
||||
|
||||
|
||||
class ProjectsGetRequest(models.Base):
|
||||
include_dataset_stats = fields.BoolField(default=False)
|
||||
include_stats = fields.BoolField(default=False)
|
||||
include_stats_filter = DictField()
|
||||
stats_with_children = fields.BoolField(default=True)
|
||||
stats_for_state = ActualEnumField(EntityVisibility, default=EntityVisibility.active)
|
||||
non_public = fields.BoolField(default=False)
|
||||
non_public = fields.BoolField(default=False) # legacy, use allow_public instead
|
||||
active_users = fields.ListField(str)
|
||||
check_own_contents = fields.BoolField(default=False)
|
||||
shallow_search = fields.BoolField(default=False)
|
||||
search_hidden = fields.BoolField(default=False)
|
||||
allow_public = fields.BoolField(default=True)
|
||||
children_type = ActualEnumField(ProjectChildrenType)
|
||||
children_tags = fields.ListField(str)
|
||||
children_tags_filter = DictField()
|
||||
|
||||
@@ -17,6 +17,7 @@ class GetDefaultResp(Base):
|
||||
|
||||
class CreateRequest(Base):
|
||||
name = StringField(required=True)
|
||||
display_name = StringField()
|
||||
tags = ListField(items_types=[str])
|
||||
system_tags = ListField(items_types=[str])
|
||||
metadata = DictField(value_types=[MetadataItem])
|
||||
@@ -26,9 +27,19 @@ class QueueRequest(Base):
|
||||
queue = StringField(required=True)
|
||||
|
||||
|
||||
class GetByIdRequest(QueueRequest):
|
||||
max_task_entries = IntField()
|
||||
|
||||
|
||||
class GetAllRequest(Base):
|
||||
max_task_entries = IntField()
|
||||
search_hidden = BoolField(default=False)
|
||||
|
||||
|
||||
class GetNextTaskRequest(QueueRequest):
|
||||
queue = StringField(required=True)
|
||||
get_task_info = BoolField(default=False)
|
||||
task = StringField()
|
||||
|
||||
|
||||
class DeleteRequest(QueueRequest):
|
||||
@@ -37,6 +48,7 @@ class DeleteRequest(QueueRequest):
|
||||
|
||||
class UpdateRequest(QueueRequest):
|
||||
name = StringField()
|
||||
display_name = StringField()
|
||||
tags = ListField(items_types=[str])
|
||||
system_tags = ListField(items_types=[str])
|
||||
metadata = DictField(value_types=[MetadataItem])
|
||||
@@ -46,6 +58,14 @@ class TaskRequest(QueueRequest):
|
||||
task = StringField(required=True)
|
||||
|
||||
|
||||
class RemoveTaskRequest(TaskRequest):
|
||||
update_task_status = BoolField(default=False)
|
||||
|
||||
|
||||
class AddTaskRequest(TaskRequest):
|
||||
update_execution_queue = BoolField(default=True)
|
||||
|
||||
|
||||
class MoveTaskRequest(TaskRequest):
|
||||
count = IntField(default=1)
|
||||
|
||||
@@ -59,6 +79,7 @@ class GetMetricsRequest(Base):
|
||||
from_date = FloatField(required=True, validators=validators.Min(0))
|
||||
to_date = FloatField(required=True, validators=validators.Min(0))
|
||||
interval = IntField(required=True, validators=validators.Min(1))
|
||||
refresh = BoolField(default=False)
|
||||
|
||||
|
||||
class QueueMetrics(Base):
|
||||
|
||||
84
apiserver/apimodels/reports.py
Normal file
84
apiserver/apimodels/reports.py
Normal file
@@ -0,0 +1,84 @@
|
||||
from typing import Sequence
|
||||
|
||||
from jsonmodels import validators
|
||||
from jsonmodels.fields import StringField, ListField, BoolField, EmbeddedField, IntField
|
||||
from jsonmodels.models import Base
|
||||
from jsonmodels.validators import Length
|
||||
|
||||
from apiserver.apimodels.events import MetricVariants, HistogramRequestBase
|
||||
|
||||
|
||||
class UpdateReportRequest(Base):
|
||||
task = StringField(required=True)
|
||||
name = StringField(nullable=True, validators=Length(minimum_value=3))
|
||||
tags = ListField(items_types=[str])
|
||||
comment = StringField()
|
||||
report = StringField()
|
||||
report_assets = ListField(items_types=[str])
|
||||
|
||||
|
||||
class CreateReportRequest(Base):
|
||||
name = StringField(required=True, validators=Length(minimum_value=3))
|
||||
tags = ListField(items_types=[str])
|
||||
comment = StringField()
|
||||
report = StringField()
|
||||
project = StringField()
|
||||
report_assets = ListField(items_types=[str])
|
||||
|
||||
|
||||
class PublishReportRequest(Base):
|
||||
task = StringField(required=True)
|
||||
message = StringField(default="")
|
||||
|
||||
|
||||
class ArchiveReportRequest(Base):
|
||||
task = StringField(required=True)
|
||||
message = StringField(default="")
|
||||
|
||||
|
||||
class ShareReportRequest(Base):
|
||||
task = StringField(required=True)
|
||||
share = BoolField(default=True)
|
||||
|
||||
|
||||
class DeleteReportRequest(Base):
|
||||
task = StringField(required=True)
|
||||
force = BoolField(default=False)
|
||||
|
||||
|
||||
class MoveReportRequest(Base):
|
||||
task = StringField(required=True)
|
||||
project = StringField()
|
||||
project_name = StringField()
|
||||
|
||||
|
||||
class EventsRequest(Base):
|
||||
iters = IntField(default=1, validators=validators.Min(1))
|
||||
metrics: Sequence[MetricVariants] = ListField(items_types=MetricVariants)
|
||||
|
||||
|
||||
class PlotEventsRequest(EventsRequest):
|
||||
last_iters_per_task_metric: bool = BoolField(default=True)
|
||||
|
||||
|
||||
class ScalarMetricsIterHistogram(HistogramRequestBase):
|
||||
metrics: Sequence[MetricVariants] = ListField(items_types=MetricVariants)
|
||||
|
||||
|
||||
class SingleValueMetrics(Base):
|
||||
pass
|
||||
|
||||
|
||||
class GetTasksDataRequest(Base):
|
||||
debug_images: EventsRequest = EmbeddedField(EventsRequest)
|
||||
plots: PlotEventsRequest = EmbeddedField(PlotEventsRequest)
|
||||
scalar_metrics_iter_histogram: ScalarMetricsIterHistogram = EmbeddedField(
|
||||
ScalarMetricsIterHistogram
|
||||
)
|
||||
single_value_metrics: SingleValueMetrics = EmbeddedField(SingleValueMetrics)
|
||||
allow_public = BoolField(default=True)
|
||||
model_events: bool = BoolField(default=False)
|
||||
|
||||
|
||||
class GetAllRequest(Base):
|
||||
allow_public = BoolField(default=True)
|
||||
@@ -6,6 +6,10 @@ class ReportStatsOptionRequest(Base):
|
||||
enabled = BoolField(default=None, nullable=True)
|
||||
|
||||
|
||||
class GetConfigRequest(Base):
|
||||
path = StringField()
|
||||
|
||||
|
||||
class ReportStatsOptionResponse(Base):
|
||||
supported = BoolField(default=True)
|
||||
enabled = BoolField()
|
||||
|
||||
104
apiserver/apimodels/serving.py
Normal file
104
apiserver/apimodels/serving.py
Normal file
@@ -0,0 +1,104 @@
|
||||
from enum import Enum
|
||||
from typing import Sequence
|
||||
|
||||
from jsonmodels.models import Base
|
||||
from jsonmodels.fields import (
|
||||
StringField,
|
||||
EmbeddedField,
|
||||
DateTimeField,
|
||||
IntField,
|
||||
FloatField,
|
||||
BoolField,
|
||||
)
|
||||
from jsonmodels import validators
|
||||
from jsonmodels.validators import Min
|
||||
|
||||
from apiserver.apimodels import ListField, JsonSerializableMixin, SafeStringField
|
||||
from apiserver.apimodels import ActualEnumField
|
||||
from apiserver.config_repo import config
|
||||
from .workers import MachineStats
|
||||
|
||||
|
||||
class ReferenceItem(Base):
|
||||
type = StringField(
|
||||
required=True,
|
||||
validators=validators.Enum("app_id", "app_instance", "model", "task", "url"),
|
||||
)
|
||||
value = StringField(required=True)
|
||||
|
||||
|
||||
class ServingModel(Base):
|
||||
container_id = StringField(required=True)
|
||||
endpoint_name = StringField(required=True)
|
||||
endpoint_url = StringField() # can be not existing yet at registration time
|
||||
model_name = StringField(required=True)
|
||||
model_source = StringField()
|
||||
model_version = StringField()
|
||||
preprocess_artifact = StringField()
|
||||
input_type = StringField()
|
||||
input_size = SafeStringField()
|
||||
tags = ListField(str)
|
||||
system_tags = ListField(str)
|
||||
reference: Sequence[ReferenceItem] = ListField(ReferenceItem)
|
||||
|
||||
|
||||
class RegisterRequest(ServingModel):
|
||||
timeout = IntField(
|
||||
default=int(
|
||||
config.get("services.serving.default_container_timeout_sec", 10 * 60)
|
||||
),
|
||||
validators=[Min(1)],
|
||||
)
|
||||
""" registration timeout in seconds (default is 10min) """
|
||||
|
||||
|
||||
class UnregisterRequest(Base):
|
||||
container_id = StringField(required=True)
|
||||
|
||||
|
||||
class StatusReportRequest(ServingModel):
|
||||
uptime_sec = IntField()
|
||||
requests_num = IntField()
|
||||
requests_min = FloatField()
|
||||
latency_ms = IntField()
|
||||
machine_stats: MachineStats = EmbeddedField(MachineStats)
|
||||
|
||||
|
||||
class ServingContainerEntry(StatusReportRequest, JsonSerializableMixin):
|
||||
key = StringField(required=True)
|
||||
company_id = StringField(required=True)
|
||||
ip = StringField()
|
||||
register_time = DateTimeField(required=True)
|
||||
register_timeout = IntField(required=True)
|
||||
last_activity_time = DateTimeField(required=True)
|
||||
|
||||
|
||||
class GetEndpointDetailsRequest(Base):
|
||||
endpoint_url = StringField(required=True)
|
||||
|
||||
|
||||
class MetricType(Enum):
|
||||
requests = "requests"
|
||||
requests_min = "requests_min"
|
||||
latency_ms = "latency_ms"
|
||||
cpu_count = "cpu_count"
|
||||
gpu_count = "gpu_count"
|
||||
cpu_util = "cpu_util"
|
||||
gpu_util = "gpu_util"
|
||||
ram_total = "ram_total"
|
||||
ram_used = "ram_used"
|
||||
ram_free = "ram_free"
|
||||
gpu_ram_total = "gpu_ram_total"
|
||||
gpu_ram_used = "gpu_ram_used"
|
||||
gpu_ram_free = "gpu_ram_free"
|
||||
network_rx = "network_rx"
|
||||
network_tx = "network_tx"
|
||||
|
||||
|
||||
class GetEndpointMetricsHistoryRequest(Base):
|
||||
from_date = FloatField(required=True, validators=Min(0))
|
||||
to_date = FloatField(required=True, validators=Min(0))
|
||||
interval = IntField(required=True, validators=Min(1))
|
||||
endpoint_url = StringField(required=True)
|
||||
metric_type = ActualEnumField(MetricType, default=MetricType.requests)
|
||||
instance_charts = BoolField(default=True)
|
||||
60
apiserver/apimodels/storage.py
Normal file
60
apiserver/apimodels/storage.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from jsonmodels.fields import StringField, BoolField, ListField, EmbeddedField
|
||||
from jsonmodels.models import Base
|
||||
from jsonmodels.validators import Enum
|
||||
|
||||
|
||||
class AWSBucketSettings(Base):
|
||||
bucket = StringField()
|
||||
subdir = StringField()
|
||||
host = StringField()
|
||||
key = StringField()
|
||||
secret = StringField()
|
||||
token = StringField()
|
||||
multipart = BoolField(default=True)
|
||||
acl = StringField()
|
||||
secure = BoolField(default=True)
|
||||
region = StringField()
|
||||
verify = BoolField(default=True)
|
||||
use_credentials_chain = BoolField(default=False)
|
||||
|
||||
|
||||
class AWSSettings(Base):
|
||||
key = StringField()
|
||||
secret = StringField()
|
||||
region = StringField()
|
||||
token = StringField()
|
||||
use_credentials_chain = BoolField(default=False)
|
||||
buckets = ListField(items_types=[AWSBucketSettings])
|
||||
|
||||
|
||||
class GoogleBucketSettings(Base):
|
||||
bucket = StringField()
|
||||
subdir = StringField()
|
||||
project = StringField()
|
||||
credentials_json = StringField()
|
||||
|
||||
|
||||
class GoogleSettings(Base):
|
||||
project = StringField()
|
||||
credentials_json = StringField()
|
||||
buckets = ListField(items_types=[GoogleBucketSettings])
|
||||
|
||||
|
||||
class AzureContainerSettings(Base):
|
||||
account_name = StringField()
|
||||
account_key = StringField()
|
||||
container_name = StringField()
|
||||
|
||||
|
||||
class AzureSettings(Base):
|
||||
containers = ListField(items_types=[AzureContainerSettings])
|
||||
|
||||
|
||||
class SetSettingsRequest(Base):
|
||||
aws = EmbeddedField(AWSSettings)
|
||||
google = EmbeddedField(GoogleSettings)
|
||||
azure = EmbeddedField(AzureSettings)
|
||||
|
||||
|
||||
class ResetSettingsRequest(Base):
|
||||
keys = ListField([str], item_validators=[Enum("aws", "google", "azure")])
|
||||
@@ -42,6 +42,7 @@ class StartedResponse(UpdateResponse):
|
||||
|
||||
class EnqueueResponse(UpdateResponse):
|
||||
queued = IntField()
|
||||
queue_watched = BoolField()
|
||||
|
||||
|
||||
class EnqueueBatchItem(UpdateBatchItem):
|
||||
@@ -50,6 +51,7 @@ class EnqueueBatchItem(UpdateBatchItem):
|
||||
|
||||
class EnqueueManyResponse(BatchResponse):
|
||||
succeeded: Sequence[EnqueueBatchItem] = ListField(EnqueueBatchItem)
|
||||
queue_watched = BoolField()
|
||||
|
||||
|
||||
class DequeueResponse(UpdateResponse):
|
||||
@@ -94,20 +96,42 @@ class UpdateRequest(TaskUpdateRequest):
|
||||
status_message = StringField(default="")
|
||||
|
||||
|
||||
class DequeueRequest(UpdateRequest):
|
||||
remove_from_all_queues = BoolField(default=False)
|
||||
new_status = StringField()
|
||||
|
||||
|
||||
class StopRequest(UpdateRequest):
|
||||
include_pipeline_steps = BoolField(default=False)
|
||||
|
||||
|
||||
class EnqueueRequest(UpdateRequest):
|
||||
queue = StringField()
|
||||
queue_name = StringField()
|
||||
verify_watched_queue = BoolField(default=False)
|
||||
update_execution_queue = BoolField(default=True)
|
||||
|
||||
|
||||
class DeleteRequest(UpdateRequest):
|
||||
move_to_trash = BoolField(default=True)
|
||||
return_file_urls = BoolField(default=False)
|
||||
delete_output_models = BoolField(default=True)
|
||||
delete_external_artifacts = BoolField(default=True)
|
||||
include_pipeline_steps = BoolField(default=False)
|
||||
|
||||
|
||||
class SetRequirementsRequest(TaskRequest):
|
||||
requirements = DictField(required=True)
|
||||
|
||||
|
||||
class CompletedRequest(UpdateRequest):
|
||||
publish = BoolField(default=False)
|
||||
|
||||
|
||||
class CompletedResponse(UpdateResponse):
|
||||
published = IntField(default=0)
|
||||
|
||||
|
||||
class PublishRequest(UpdateRequest):
|
||||
publish_model = BoolField(default=True)
|
||||
|
||||
@@ -171,6 +195,7 @@ class ResetRequest(UpdateRequest):
|
||||
clear_all = BoolField(default=False)
|
||||
return_file_urls = BoolField(default=False)
|
||||
delete_output_models = BoolField(default=True)
|
||||
delete_external_artifacts = BoolField(default=True)
|
||||
|
||||
|
||||
class MultiTaskRequest(models.Base):
|
||||
@@ -245,6 +270,7 @@ class DeleteConfigurationRequest(TaskUpdateRequest):
|
||||
class ArchiveRequest(MultiTaskRequest):
|
||||
status_reason = StringField(default="")
|
||||
status_message = StringField(default="")
|
||||
include_pipeline_steps = BoolField(default=False)
|
||||
|
||||
|
||||
class ArchiveResponse(models.Base):
|
||||
@@ -256,13 +282,29 @@ class TaskBatchRequest(BatchRequest):
|
||||
status_message = StringField(default="")
|
||||
|
||||
|
||||
class ArchiveManyRequest(TaskBatchRequest):
|
||||
include_pipeline_steps = BoolField(default=False)
|
||||
|
||||
|
||||
class UnarchiveManyRequest(TaskBatchRequest):
|
||||
include_pipeline_steps = BoolField(default=False)
|
||||
|
||||
|
||||
class StopManyRequest(TaskBatchRequest):
|
||||
force = BoolField(default=False)
|
||||
include_pipeline_steps = BoolField(default=False)
|
||||
|
||||
|
||||
class DequeueManyRequest(TaskBatchRequest):
|
||||
remove_from_all_queues = BoolField(default=False)
|
||||
new_status = StringField()
|
||||
|
||||
|
||||
class EnqueueManyRequest(TaskBatchRequest):
|
||||
queue = StringField()
|
||||
queue_name = StringField()
|
||||
validate_tasks = BoolField(default=False)
|
||||
verify_watched_queue = BoolField(default=False)
|
||||
|
||||
|
||||
class DeleteManyRequest(TaskBatchRequest):
|
||||
@@ -270,6 +312,8 @@ class DeleteManyRequest(TaskBatchRequest):
|
||||
return_file_urls = BoolField(default=False)
|
||||
delete_output_models = BoolField(default=True)
|
||||
force = BoolField(default=False)
|
||||
delete_external_artifacts = BoolField(default=True)
|
||||
include_pipeline_steps = BoolField(default=False)
|
||||
|
||||
|
||||
class ResetManyRequest(TaskBatchRequest):
|
||||
@@ -277,6 +321,7 @@ class ResetManyRequest(TaskBatchRequest):
|
||||
return_file_urls = BoolField(default=False)
|
||||
delete_output_models = BoolField(default=True)
|
||||
force = BoolField(default=False)
|
||||
delete_external_artifacts = BoolField(default=True)
|
||||
|
||||
|
||||
class PublishManyRequest(TaskBatchRequest):
|
||||
@@ -300,3 +345,13 @@ class DeleteModelsRequest(TaskRequest):
|
||||
models: Sequence[ModelItemKey] = ListField(
|
||||
[ModelItemKey], validators=Length(minimum_value=1)
|
||||
)
|
||||
|
||||
|
||||
class GetAllReq(models.Base):
|
||||
allow_public = BoolField(default=True)
|
||||
search_hidden = BoolField(default=False)
|
||||
|
||||
|
||||
class UpdateTagsRequest(BatchRequest):
|
||||
add_tags = ListField([str])
|
||||
remove_tags = ListField([str])
|
||||
|
||||
@@ -4,6 +4,10 @@ from jsonmodels.models import Base
|
||||
from apiserver.apimodels import DictField
|
||||
|
||||
|
||||
class UserRequest(Base):
|
||||
user = StringField(required=True)
|
||||
|
||||
|
||||
class CreateRequest(Base):
|
||||
id = StringField(required=True)
|
||||
name = StringField(required=True)
|
||||
|
||||
@@ -12,20 +12,21 @@ from jsonmodels.fields import (
|
||||
)
|
||||
from jsonmodels.models import Base
|
||||
|
||||
from apiserver.apimodels import make_default, ListField, EnumField, JsonSerializableMixin
|
||||
|
||||
DEFAULT_TIMEOUT = 10 * 60
|
||||
from apiserver.apimodels import ListField, EnumField, JsonSerializableMixin
|
||||
from apiserver.config_repo import config
|
||||
|
||||
|
||||
class WorkerRequest(Base):
|
||||
worker = StringField(required=True)
|
||||
tags = ListField(str)
|
||||
system_tags = ListField(str)
|
||||
|
||||
|
||||
class RegisterRequest(WorkerRequest):
|
||||
timeout = make_default(
|
||||
IntField, DEFAULT_TIMEOUT
|
||||
)() # registration timeout in seconds (default is 10min)
|
||||
timeout = IntField(
|
||||
default=int(config.get("services.workers.default_worker_timeout_sec", 10 * 60))
|
||||
)
|
||||
""" registration timeout in seconds (default is 10min) """
|
||||
queues = ListField(six.string_types) # list of queues this worker listens to
|
||||
|
||||
|
||||
@@ -76,6 +77,7 @@ class WorkerEntry(Base, JsonSerializableMixin):
|
||||
last_activity_time = DateTimeField(required=True)
|
||||
last_report_time = DateTimeField()
|
||||
tags = ListField(str)
|
||||
system_tags = ListField(str)
|
||||
|
||||
|
||||
class CurrentTaskEntry(IdNameEntry):
|
||||
@@ -84,6 +86,7 @@ class CurrentTaskEntry(IdNameEntry):
|
||||
|
||||
|
||||
class QueueEntry(IdNameEntry):
|
||||
display_name = StringField()
|
||||
next_task = EmbeddedField(IdNameEntry)
|
||||
num_tasks = IntField()
|
||||
|
||||
@@ -96,12 +99,19 @@ class WorkerResponseEntry(WorkerEntry):
|
||||
|
||||
class GetAllRequest(Base):
|
||||
last_seen = IntField(default=3600)
|
||||
tags = ListField(str)
|
||||
system_tags = ListField(str)
|
||||
worker_pattern = StringField()
|
||||
|
||||
|
||||
class GetAllResponse(Base):
|
||||
workers = ListField(WorkerResponseEntry)
|
||||
|
||||
|
||||
class GetCountRequest(GetAllRequest):
|
||||
last_seen = IntField(default=0)
|
||||
|
||||
|
||||
class StatsBase(Base):
|
||||
worker_ids = ListField(str)
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ class AuthBLL:
|
||||
feature_set="basic",
|
||||
)
|
||||
|
||||
return GetTokenResponse(token=token.decode("ascii"))
|
||||
return GetTokenResponse(token=token)
|
||||
|
||||
@staticmethod
|
||||
def create_user(request: CreateUserRequest, call: APICall = None) -> str:
|
||||
|
||||
@@ -1,375 +0,0 @@
|
||||
import operator
|
||||
from typing import Sequence, Tuple, Optional
|
||||
|
||||
import attr
|
||||
from boltons.iterutils import first
|
||||
from elasticsearch import Elasticsearch
|
||||
from jsonmodels.fields import StringField, ListField, IntField, BoolField
|
||||
from jsonmodels.models import Base
|
||||
from redis import StrictRedis
|
||||
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.apimodels import JsonSerializableMixin
|
||||
from apiserver.bll.event.event_common import (
|
||||
EventSettings,
|
||||
EventType,
|
||||
check_empty_data,
|
||||
search_company_events,
|
||||
)
|
||||
from apiserver.bll.redis_cache_manager import RedisCacheManager
|
||||
from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.timing_context import TimingContext
|
||||
from apiserver.utilities.dicts import nested_get
|
||||
|
||||
|
||||
class VariantState(Base):
|
||||
name: str = StringField(required=True)
|
||||
min_iteration: int = IntField()
|
||||
max_iteration: int = IntField()
|
||||
|
||||
|
||||
class DebugSampleHistoryState(Base, JsonSerializableMixin):
|
||||
id: str = StringField(required=True)
|
||||
iteration: int = IntField()
|
||||
variant: str = StringField()
|
||||
task: str = StringField()
|
||||
metric: str = StringField()
|
||||
reached_first: bool = BoolField()
|
||||
reached_last: bool = BoolField()
|
||||
variant_states: Sequence[VariantState] = ListField([VariantState])
|
||||
warning: str = StringField()
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class DebugSampleHistoryResult(object):
|
||||
scroll_id: str = None
|
||||
event: dict = None
|
||||
min_iteration: int = None
|
||||
max_iteration: int = None
|
||||
|
||||
|
||||
class DebugSampleHistory:
|
||||
EVENT_TYPE = EventType.metrics_image
|
||||
|
||||
def __init__(self, redis: StrictRedis, es: Elasticsearch):
|
||||
self.es = es
|
||||
self.cache_manager = RedisCacheManager(
|
||||
state_class=DebugSampleHistoryState,
|
||||
redis=redis,
|
||||
expiration_interval=EventSettings.state_expiration_sec,
|
||||
)
|
||||
|
||||
def get_next_debug_image(
|
||||
self, company_id: str, task: str, state_id: str, navigate_earlier: bool
|
||||
) -> DebugSampleHistoryResult:
|
||||
"""
|
||||
Get the debug image for next/prev variant on the current iteration
|
||||
If does not exist then try getting image for the first/last variant from next/prev iteration
|
||||
"""
|
||||
res = DebugSampleHistoryResult(scroll_id=state_id)
|
||||
state = self.cache_manager.get_state(state_id)
|
||||
if not state or state.task != task:
|
||||
raise errors.bad_request.InvalidScrollId(scroll_id=state_id)
|
||||
|
||||
if check_empty_data(self.es, company_id=company_id, event_type=self.EVENT_TYPE):
|
||||
return res
|
||||
|
||||
image = self._get_next_for_current_iteration(
|
||||
company_id=company_id, navigate_earlier=navigate_earlier, state=state
|
||||
) or self._get_next_for_another_iteration(
|
||||
company_id=company_id, navigate_earlier=navigate_earlier, state=state
|
||||
)
|
||||
if not image:
|
||||
return res
|
||||
|
||||
self._fill_res_and_update_state(image=image, res=res, state=state)
|
||||
self.cache_manager.set_state(state=state)
|
||||
return res
|
||||
|
||||
def _fill_res_and_update_state(
|
||||
self, image: dict, res: DebugSampleHistoryResult, state: DebugSampleHistoryState
|
||||
):
|
||||
state.variant = image["variant"]
|
||||
state.iteration = image["iter"]
|
||||
res.event = image
|
||||
var_state = first(s for s in state.variant_states if s.name == state.variant)
|
||||
if var_state:
|
||||
res.min_iteration = var_state.min_iteration
|
||||
res.max_iteration = var_state.max_iteration
|
||||
|
||||
def _get_next_for_current_iteration(
|
||||
self, company_id: str, navigate_earlier: bool, state: DebugSampleHistoryState
|
||||
) -> Optional[dict]:
|
||||
"""
|
||||
Get the image for next (if navigated earlier is False) or previous variant sorted by name for the same iteration
|
||||
Only variants for which the iteration falls into their valid range are considered
|
||||
Return None if no such variant or image is found
|
||||
"""
|
||||
cmp = operator.lt if navigate_earlier else operator.gt
|
||||
variants = [
|
||||
var_state
|
||||
for var_state in state.variant_states
|
||||
if cmp(var_state.name, state.variant)
|
||||
and var_state.min_iteration <= state.iteration
|
||||
]
|
||||
if not variants:
|
||||
return
|
||||
|
||||
must_conditions = [
|
||||
{"term": {"task": state.task}},
|
||||
{"term": {"metric": state.metric}},
|
||||
{"terms": {"variant": [v.name for v in variants]}},
|
||||
{"term": {"iter": state.iteration}},
|
||||
{"exists": {"field": "url"}},
|
||||
]
|
||||
es_req = {
|
||||
"size": 1,
|
||||
"sort": {"variant": "desc" if navigate_earlier else "asc"},
|
||||
"query": {"bool": {"must": must_conditions}},
|
||||
}
|
||||
|
||||
with translate_errors_context(), TimingContext(
|
||||
"es", "get_next_for_current_iteration"
|
||||
):
|
||||
es_res = search_company_events(
|
||||
self.es, company_id=company_id, event_type=self.EVENT_TYPE, body=es_req
|
||||
)
|
||||
|
||||
hits = nested_get(es_res, ("hits", "hits"))
|
||||
if not hits:
|
||||
return
|
||||
|
||||
return hits[0]["_source"]
|
||||
|
||||
def _get_next_for_another_iteration(
|
||||
self, company_id: str, navigate_earlier: bool, state: DebugSampleHistoryState
|
||||
) -> Optional[dict]:
|
||||
"""
|
||||
Get the image for the first variant for the next iteration (if navigate_earlier is set to False)
|
||||
or from the last variant for the previous iteration (otherwise)
|
||||
The variants for which the image falls in invalid range are discarded
|
||||
If no suitable image is found then None is returned
|
||||
"""
|
||||
|
||||
must_conditions = [
|
||||
{"term": {"task": state.task}},
|
||||
{"term": {"metric": state.metric}},
|
||||
{"exists": {"field": "url"}},
|
||||
]
|
||||
|
||||
if navigate_earlier:
|
||||
range_operator = "lt"
|
||||
order = "desc"
|
||||
variants = [
|
||||
var_state
|
||||
for var_state in state.variant_states
|
||||
if var_state.min_iteration < state.iteration
|
||||
]
|
||||
else:
|
||||
range_operator = "gt"
|
||||
order = "asc"
|
||||
variants = state.variant_states
|
||||
|
||||
if not variants:
|
||||
return
|
||||
|
||||
variants_conditions = [
|
||||
{
|
||||
"bool": {
|
||||
"must": [
|
||||
{"term": {"variant": v.name}},
|
||||
{"range": {"iter": {"gte": v.min_iteration}}},
|
||||
]
|
||||
}
|
||||
}
|
||||
for v in variants
|
||||
]
|
||||
must_conditions.append({"bool": {"should": variants_conditions}})
|
||||
must_conditions.append({"range": {"iter": {range_operator: state.iteration}}},)
|
||||
|
||||
es_req = {
|
||||
"size": 1,
|
||||
"sort": [{"iter": order}, {"variant": order}],
|
||||
"query": {"bool": {"must": must_conditions}},
|
||||
}
|
||||
with translate_errors_context(), TimingContext(
|
||||
"es", "get_next_for_another_iteration"
|
||||
):
|
||||
es_res = search_company_events(
|
||||
self.es, company_id=company_id, event_type=self.EVENT_TYPE, body=es_req
|
||||
)
|
||||
|
||||
hits = nested_get(es_res, ("hits", "hits"))
|
||||
if not hits:
|
||||
return
|
||||
|
||||
return hits[0]["_source"]
|
||||
|
||||
def get_debug_image_for_variant(
|
||||
self,
|
||||
company_id: str,
|
||||
task: str,
|
||||
metric: str,
|
||||
variant: str,
|
||||
iteration: Optional[int] = None,
|
||||
refresh: bool = False,
|
||||
state_id: str = None,
|
||||
) -> DebugSampleHistoryResult:
|
||||
"""
|
||||
Get the debug image for the requested iteration or the latest before it
|
||||
If the iteration is not passed then get the latest event
|
||||
"""
|
||||
res = DebugSampleHistoryResult()
|
||||
if check_empty_data(self.es, company_id=company_id, event_type=self.EVENT_TYPE):
|
||||
return res
|
||||
|
||||
def init_state(state_: DebugSampleHistoryState):
|
||||
state_.task = task
|
||||
state_.metric = metric
|
||||
self._reset_variant_states(company_id=company_id, state=state_)
|
||||
|
||||
def validate_state(state_: DebugSampleHistoryState):
|
||||
if state_.task != task or state_.metric != metric:
|
||||
raise errors.bad_request.InvalidScrollId(
|
||||
"Task and metric stored in the state do not match the passed ones",
|
||||
scroll_id=state_.id,
|
||||
)
|
||||
if refresh:
|
||||
self._reset_variant_states(company_id=company_id, state=state_)
|
||||
|
||||
state: DebugSampleHistoryState
|
||||
with self.cache_manager.get_or_create_state(
|
||||
state_id=state_id, init_state=init_state, validate_state=validate_state,
|
||||
) as state:
|
||||
res.scroll_id = state.id
|
||||
|
||||
var_state = first(s for s in state.variant_states if s.name == variant)
|
||||
if not var_state:
|
||||
return res
|
||||
|
||||
res.min_iteration = var_state.min_iteration
|
||||
res.max_iteration = var_state.max_iteration
|
||||
|
||||
must_conditions = [
|
||||
{"term": {"task": task}},
|
||||
{"term": {"metric": metric}},
|
||||
{"term": {"variant": variant}},
|
||||
{"exists": {"field": "url"}},
|
||||
]
|
||||
if iteration is not None:
|
||||
must_conditions.append(
|
||||
{
|
||||
"range": {
|
||||
"iter": {"lte": iteration, "gte": var_state.min_iteration}
|
||||
}
|
||||
}
|
||||
)
|
||||
else:
|
||||
must_conditions.append(
|
||||
{"range": {"iter": {"gte": var_state.min_iteration}}}
|
||||
)
|
||||
|
||||
es_req = {
|
||||
"size": 1,
|
||||
"sort": {"iter": "desc"},
|
||||
"query": {"bool": {"must": must_conditions}},
|
||||
}
|
||||
|
||||
with translate_errors_context(), TimingContext(
|
||||
"es", "get_debug_image_for_variant"
|
||||
):
|
||||
es_res = search_company_events(
|
||||
self.es,
|
||||
company_id=company_id,
|
||||
event_type=self.EVENT_TYPE,
|
||||
body=es_req,
|
||||
)
|
||||
|
||||
hits = nested_get(es_res, ("hits", "hits"))
|
||||
if not hits:
|
||||
return res
|
||||
|
||||
self._fill_res_and_update_state(
|
||||
image=hits[0]["_source"], res=res, state=state
|
||||
)
|
||||
return res
|
||||
|
||||
def _reset_variant_states(self, company_id: str, state: DebugSampleHistoryState):
|
||||
variant_iterations = self._get_variant_iterations(
|
||||
company_id=company_id, task=state.task, metric=state.metric
|
||||
)
|
||||
state.variant_states = [
|
||||
VariantState(name=var_name, min_iteration=min_iter, max_iteration=max_iter)
|
||||
for var_name, min_iter, max_iter in variant_iterations
|
||||
]
|
||||
|
||||
def _get_variant_iterations(
|
||||
self,
|
||||
company_id: str,
|
||||
task: str,
|
||||
metric: str,
|
||||
variants: Optional[Sequence[str]] = None,
|
||||
) -> Sequence[Tuple[str, int, int]]:
|
||||
"""
|
||||
Return valid min and max iterations that the task reported images
|
||||
The min iteration is the lowest iteration that contains non-recycled image url
|
||||
"""
|
||||
must = [
|
||||
{"term": {"task": task}},
|
||||
{"term": {"metric": metric}},
|
||||
{"exists": {"field": "url"}},
|
||||
]
|
||||
if variants:
|
||||
must.append({"terms": {"variant": variants}})
|
||||
|
||||
es_req: dict = {
|
||||
"size": 0,
|
||||
"query": {"bool": {"must": must}},
|
||||
"aggs": {
|
||||
"variants": {
|
||||
# all variants that sent debug images
|
||||
"terms": {
|
||||
"field": "variant",
|
||||
"size": EventSettings.max_variants_count,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
"last_iter": {"max": {"field": "iter"}},
|
||||
"urls": {
|
||||
# group by urls and choose the minimal iteration
|
||||
# from all the maximal iterations per url
|
||||
"terms": {
|
||||
"field": "url",
|
||||
"order": {"max_iter": "asc"},
|
||||
"size": 1,
|
||||
},
|
||||
"aggs": {
|
||||
# find max iteration for each url
|
||||
"max_iter": {"max": {"field": "iter"}}
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
with translate_errors_context(), TimingContext(
|
||||
"es", "get_debug_image_iterations"
|
||||
):
|
||||
es_res = search_company_events(
|
||||
self.es, company_id=company_id, event_type=self.EVENT_TYPE, body=es_req
|
||||
)
|
||||
|
||||
def get_variant_data(variant_bucket: dict) -> Tuple[str, int, int]:
|
||||
variant = variant_bucket["key"]
|
||||
urls = nested_get(variant_bucket, ("urls", "buckets"))
|
||||
min_iter = int(urls[0]["max_iter"]["value"])
|
||||
max_iter = int(variant_bucket["last_iter"]["value"])
|
||||
return variant, min_iter, max_iter
|
||||
|
||||
return [
|
||||
get_variant_data(variant_bucket)
|
||||
for variant_bucket in nested_get(
|
||||
es_res, ("aggregations", "variants", "buckets")
|
||||
)
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,10 +1,15 @@
|
||||
import base64
|
||||
import zlib
|
||||
from enum import Enum
|
||||
from typing import Union, Sequence, Mapping
|
||||
from typing import Union, Sequence, Mapping, Tuple
|
||||
|
||||
from boltons.typeutils import classproperty
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.database.model.task.task import Task
|
||||
from apiserver.utilities.dicts import nested_get
|
||||
|
||||
|
||||
class EventType(Enum):
|
||||
@@ -16,10 +21,14 @@ class EventType(Enum):
|
||||
all = "*"
|
||||
|
||||
|
||||
SINGLE_SCALAR_ITERATION = -(2 ** 31)
|
||||
MetricVariants = Mapping[str, Sequence[str]]
|
||||
TaskCompanies = Mapping[str, Sequence[Task]]
|
||||
|
||||
|
||||
class EventSettings:
|
||||
_max_es_allowed_aggregation_buckets = 10000
|
||||
|
||||
@classproperty
|
||||
def max_workers(self):
|
||||
return config.get("services.events.events_retrieval.max_metrics_concurrency", 4)
|
||||
@@ -31,22 +40,31 @@ class EventSettings:
|
||||
)
|
||||
|
||||
@classproperty
|
||||
def max_metrics_count(self):
|
||||
return config.get("services.events.events_retrieval.max_metrics_count", 100)
|
||||
|
||||
@classproperty
|
||||
def max_variants_count(self):
|
||||
return config.get("services.events.events_retrieval.max_variants_count", 100)
|
||||
def max_es_buckets(self):
|
||||
percentage = (
|
||||
min(
|
||||
100,
|
||||
config.get(
|
||||
"services.events.events_retrieval.dynamic_metrics_count_threshold",
|
||||
80,
|
||||
),
|
||||
)
|
||||
/ 100
|
||||
)
|
||||
return int(self._max_es_allowed_aggregation_buckets * percentage)
|
||||
|
||||
|
||||
def get_index_name(company_id: str, event_type: str):
|
||||
def get_index_name(company_id: Union[str, Sequence[str]], event_type: str):
|
||||
event_type = event_type.lower().replace(" ", "_")
|
||||
return f"events-{event_type}-{company_id}"
|
||||
if isinstance(company_id, str):
|
||||
company_id = [company_id]
|
||||
|
||||
return ",".join(f"events-{event_type}-{(c_id or '').lower()}" for c_id in company_id)
|
||||
|
||||
|
||||
def check_empty_data(es: Elasticsearch, company_id: str, event_type: EventType) -> bool:
|
||||
es_index = get_index_name(company_id, event_type.value)
|
||||
if not es.indices.exists(es_index):
|
||||
if not es.indices.exists(index=es_index):
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -66,9 +84,7 @@ def delete_company_events(
|
||||
es: Elasticsearch, company_id: str, event_type: EventType, body: dict, **kwargs
|
||||
) -> dict:
|
||||
es_index = get_index_name(company_id, event_type.value)
|
||||
return es.delete_by_query(
|
||||
index=es_index, body=body, conflicts="proceed", **kwargs
|
||||
)
|
||||
return es.delete_by_query(index=es_index, body=body, conflicts="proceed", **kwargs)
|
||||
|
||||
|
||||
def count_company_events(
|
||||
@@ -78,6 +94,44 @@ def count_company_events(
|
||||
return es.count(index=es_index, body=body, **kwargs)
|
||||
|
||||
|
||||
def get_max_metric_and_variant_counts(
|
||||
es: Elasticsearch,
|
||||
company_id: Union[str, Sequence[str]],
|
||||
event_type: EventType,
|
||||
query: dict,
|
||||
**kwargs,
|
||||
) -> Tuple[int, int]:
|
||||
dynamic = config.get(
|
||||
"services.events.events_retrieval.dynamic_metrics_count", False
|
||||
)
|
||||
max_metrics_count = config.get(
|
||||
"services.events.events_retrieval.max_metrics_count", 100
|
||||
)
|
||||
max_variants_count = config.get(
|
||||
"services.events.events_retrieval.max_variants_count", 100
|
||||
)
|
||||
if not dynamic:
|
||||
return max_metrics_count, max_variants_count
|
||||
|
||||
es_req: dict = {
|
||||
"size": 0,
|
||||
"query": query,
|
||||
"aggs": {"metrics_count": {"cardinality": {"field": "metric"}}},
|
||||
}
|
||||
with translate_errors_context():
|
||||
es_res = search_company_events(
|
||||
es, company_id=company_id, event_type=event_type, body=es_req, **kwargs,
|
||||
)
|
||||
|
||||
metrics_count = nested_get(
|
||||
es_res, ("aggregations", "metrics_count", "value"), max_metrics_count
|
||||
)
|
||||
if not metrics_count:
|
||||
return max_metrics_count, max_variants_count
|
||||
|
||||
return metrics_count, int(EventSettings.max_es_buckets / metrics_count)
|
||||
|
||||
|
||||
def get_metric_variants_condition(metric_variants: MetricVariants,) -> Sequence:
|
||||
conditions = [
|
||||
{
|
||||
@@ -94,3 +148,19 @@ def get_metric_variants_condition(metric_variants: MetricVariants,) -> Sequence:
|
||||
]
|
||||
|
||||
return {"bool": {"should": conditions}}
|
||||
|
||||
|
||||
class PlotFields:
|
||||
valid_plot = "valid_plot"
|
||||
plot_len = "plot_len"
|
||||
plot_str = "plot_str"
|
||||
plot_data = "plot_data"
|
||||
source_urls = "source_urls"
|
||||
|
||||
|
||||
def uncompress_plot(event: dict):
|
||||
plot_data = event.pop(PlotFields.plot_data, None)
|
||||
if plot_data and event.get(PlotFields.plot_str) is None:
|
||||
event[PlotFields.plot_str] = zlib.decompress(
|
||||
base64.b64decode(plot_data)
|
||||
).decode()
|
||||
|
||||
@@ -4,12 +4,11 @@ from collections import defaultdict
|
||||
from concurrent.futures.thread import ThreadPoolExecutor
|
||||
from functools import partial
|
||||
from operator import itemgetter
|
||||
from typing import Sequence, Tuple
|
||||
from typing import Sequence, Tuple, Mapping
|
||||
|
||||
from boltons.iterutils import bucketize
|
||||
from elasticsearch import Elasticsearch
|
||||
from mongoengine import Q
|
||||
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.bll.event.event_common import (
|
||||
EventType,
|
||||
EventSettings,
|
||||
@@ -17,13 +16,17 @@ from apiserver.bll.event.event_common import (
|
||||
check_empty_data,
|
||||
MetricVariants,
|
||||
get_metric_variants_condition,
|
||||
get_max_metric_and_variant_counts,
|
||||
SINGLE_SCALAR_ITERATION,
|
||||
TaskCompanies,
|
||||
)
|
||||
from apiserver.bll.event.scalar_key import ScalarKey, ScalarKeyEnum
|
||||
from apiserver.bll.query import Builder as QueryBuilder
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.database.model.model import Model
|
||||
from apiserver.database.model.task.task import Task
|
||||
from apiserver.timing_context import TimingContext
|
||||
from apiserver.tools import safe_get
|
||||
from apiserver.utilities.dicts import nested_get
|
||||
|
||||
log = config.logger(__file__)
|
||||
|
||||
@@ -42,6 +45,7 @@ class EventMetrics:
|
||||
samples: int,
|
||||
key: ScalarKeyEnum,
|
||||
metric_variants: MetricVariants = None,
|
||||
model_events: bool = False,
|
||||
) -> dict:
|
||||
"""
|
||||
Get scalar metric histogram per metric and variant
|
||||
@@ -59,6 +63,7 @@ class EventMetrics:
|
||||
samples=samples,
|
||||
key=ScalarKey.resolve(key),
|
||||
metric_variants=metric_variants,
|
||||
model_events=model_events,
|
||||
)
|
||||
|
||||
def _get_scalar_average_per_iter_core(
|
||||
@@ -70,6 +75,7 @@ class EventMetrics:
|
||||
key: ScalarKey,
|
||||
run_parallel: bool = True,
|
||||
metric_variants: MetricVariants = None,
|
||||
model_events: bool = False,
|
||||
) -> dict:
|
||||
intervals = self._get_task_metric_intervals(
|
||||
company_id=company_id,
|
||||
@@ -101,64 +107,75 @@ class EventMetrics:
|
||||
)
|
||||
|
||||
ret = defaultdict(dict)
|
||||
if not metrics:
|
||||
return ret
|
||||
|
||||
last_metrics = {}
|
||||
cls_ = Model if model_events else Task
|
||||
task = cls_.objects(id=task_id).only("last_metrics").first()
|
||||
if task and task.last_metrics:
|
||||
for m_data in task.last_metrics.values():
|
||||
for v_data in m_data.values():
|
||||
last_metrics[(v_data.metric, v_data.variant)] = v_data
|
||||
|
||||
for metric_key, metric_values in metrics:
|
||||
for variant_key, data in metric_values.items():
|
||||
last_metrics_data = last_metrics.get((metric_key, variant_key))
|
||||
if last_metrics_data and last_metrics_data.x_axis_label is not None:
|
||||
data["x_axis_label"] = last_metrics_data.x_axis_label
|
||||
ret[metric_key].update(metric_values)
|
||||
|
||||
return ret
|
||||
|
||||
def compare_scalar_metrics_average_per_iter(
|
||||
self,
|
||||
company_id,
|
||||
task_ids: Sequence[str],
|
||||
companies: TaskCompanies,
|
||||
samples,
|
||||
key: ScalarKeyEnum,
|
||||
allow_public=True,
|
||||
metric_variants: MetricVariants = None,
|
||||
model_events: bool = False,
|
||||
):
|
||||
"""
|
||||
Compare scalar metrics for different tasks per metric and variant
|
||||
The amount of points in each histogram should not exceed the requested samples
|
||||
"""
|
||||
task_name_by_id = {}
|
||||
with translate_errors_context():
|
||||
task_objs = Task.get_many(
|
||||
company=company_id,
|
||||
query=Q(id__in=task_ids),
|
||||
allow_public=allow_public,
|
||||
override_projection=("id", "name", "company", "company_origin"),
|
||||
return_dicts=False,
|
||||
)
|
||||
if len(task_objs) < len(task_ids):
|
||||
invalid = tuple(set(task_ids) - set(r.id for r in task_objs))
|
||||
raise errors.bad_request.InvalidTaskId(company=company_id, ids=invalid)
|
||||
task_name_by_id = {t.id: t.name for t in task_objs}
|
||||
|
||||
companies = {t.get_index_company() for t in task_objs}
|
||||
if len(companies) > 1:
|
||||
raise errors.bad_request.InvalidTaskId(
|
||||
"only tasks from the same company are supported"
|
||||
)
|
||||
|
||||
event_type = EventType.metrics_scalar
|
||||
company_id = next(iter(companies))
|
||||
if check_empty_data(self.es, company_id=company_id, event_type=event_type):
|
||||
companies = {
|
||||
company_id: tasks
|
||||
for company_id, tasks in companies.items()
|
||||
if not check_empty_data(
|
||||
self.es, company_id=company_id, event_type=event_type
|
||||
)
|
||||
}
|
||||
if not companies:
|
||||
return {}
|
||||
|
||||
get_scalar_average_per_iter = partial(
|
||||
self._get_scalar_average_per_iter_core,
|
||||
company_id=company_id,
|
||||
event_type=event_type,
|
||||
samples=samples,
|
||||
key=ScalarKey.resolve(key),
|
||||
metric_variants=metric_variants,
|
||||
run_parallel=False,
|
||||
model_events=model_events,
|
||||
)
|
||||
task_ids, company_ids = zip(
|
||||
*(
|
||||
(t.id, t.company)
|
||||
for t in itertools.chain.from_iterable(companies.values())
|
||||
)
|
||||
)
|
||||
with ThreadPoolExecutor(max_workers=EventSettings.max_workers) as pool:
|
||||
task_metrics = zip(
|
||||
task_ids, pool.map(get_scalar_average_per_iter, task_ids)
|
||||
task_ids, pool.map(get_scalar_average_per_iter, task_ids, company_ids)
|
||||
)
|
||||
|
||||
task_names = {
|
||||
t.id: t.name for t in itertools.chain.from_iterable(companies.values())
|
||||
}
|
||||
res = defaultdict(lambda: defaultdict(dict))
|
||||
for task_id, task_data in task_metrics:
|
||||
task_name = task_name_by_id[task_id]
|
||||
task_name = task_names[task_id]
|
||||
for metric_key, metric_data in task_data.items():
|
||||
for variant_key, variant_data in metric_data.items():
|
||||
variant_data["name"] = task_name
|
||||
@@ -166,6 +183,75 @@ class EventMetrics:
|
||||
|
||||
return res
|
||||
|
||||
def get_task_single_value_metrics(
|
||||
self,
|
||||
companies: TaskCompanies,
|
||||
metric_variants: MetricVariants = None,
|
||||
) -> Mapping[str, Sequence[dict]]:
|
||||
"""
|
||||
For the requested tasks return all the events delivered for the single iteration (-2**31)
|
||||
"""
|
||||
companies = {
|
||||
company_id: [t.id for t in tasks]
|
||||
for company_id, tasks in companies.items()
|
||||
if not check_empty_data(
|
||||
self.es, company_id=company_id, event_type=EventType.metrics_scalar
|
||||
)
|
||||
}
|
||||
if not companies:
|
||||
return {}
|
||||
|
||||
with ThreadPoolExecutor(max_workers=EventSettings.max_workers) as pool:
|
||||
task_events = list(
|
||||
itertools.chain.from_iterable(
|
||||
pool.map(
|
||||
partial(
|
||||
self._get_task_single_value_metrics,
|
||||
metric_variants=metric_variants,
|
||||
),
|
||||
companies.items(),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
def _get_value(event: dict):
|
||||
return {
|
||||
field: event.get(field)
|
||||
for field in ("metric", "variant", "value", "timestamp")
|
||||
}
|
||||
|
||||
return {
|
||||
task: [_get_value(e) for e in events]
|
||||
for task, events in bucketize(task_events, itemgetter("task")).items()
|
||||
}
|
||||
|
||||
def _get_task_single_value_metrics(
|
||||
self, tasks: Tuple[str, Sequence[str]], metric_variants: MetricVariants = None
|
||||
) -> Sequence[dict]:
|
||||
company_id, task_ids = tasks
|
||||
must = [
|
||||
{"terms": {"task": task_ids}},
|
||||
{"term": {"iter": SINGLE_SCALAR_ITERATION}},
|
||||
]
|
||||
if metric_variants:
|
||||
must.append(get_metric_variants_condition(metric_variants))
|
||||
|
||||
es_req = {
|
||||
"size": 10000,
|
||||
"query": {"bool": {"must": must}},
|
||||
}
|
||||
with translate_errors_context():
|
||||
es_res = search_company_events(
|
||||
body=es_req,
|
||||
es=self.es,
|
||||
company_id=company_id,
|
||||
event_type=EventType.metrics_scalar,
|
||||
)
|
||||
if not es_res["hits"]["total"]["value"]:
|
||||
return []
|
||||
|
||||
return [hit["_source"] for hit in es_res["hits"]["hits"]]
|
||||
|
||||
MetricInterval = Tuple[str, str, int, int]
|
||||
MetricIntervalGroup = Tuple[int, Sequence[Tuple[str, str]]]
|
||||
|
||||
@@ -219,11 +305,16 @@ class EventMetrics:
|
||||
Return the list og metric variant intervals as the following tuple:
|
||||
(metric, variant, interval, samples)
|
||||
"""
|
||||
must = [{"term": {"task": task_id}}]
|
||||
must = self._task_conditions(task_id)
|
||||
if metric_variants:
|
||||
must.append(get_metric_variants_condition(metric_variants))
|
||||
query = {"bool": {"must": must}}
|
||||
|
||||
search_args = dict(es=self.es, company_id=company_id, event_type=event_type)
|
||||
max_metrics, max_variants = get_max_metric_and_variant_counts(
|
||||
query=query,
|
||||
**search_args,
|
||||
)
|
||||
max_variants = int(max_variants // 2)
|
||||
es_req = {
|
||||
"size": 0,
|
||||
"query": query,
|
||||
@@ -231,14 +322,14 @@ class EventMetrics:
|
||||
"metrics": {
|
||||
"terms": {
|
||||
"field": "metric",
|
||||
"size": EventSettings.max_metrics_count,
|
||||
"size": max_metrics,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
"variants": {
|
||||
"terms": {
|
||||
"field": "variant",
|
||||
"size": EventSettings.max_variants_count,
|
||||
"size": max_variants,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
@@ -252,10 +343,7 @@ class EventMetrics:
|
||||
},
|
||||
}
|
||||
|
||||
with translate_errors_context(), TimingContext("es", "task_stats_get_interval"):
|
||||
es_res = search_company_events(
|
||||
self.es, company_id=company_id, event_type=event_type, body=es_req,
|
||||
)
|
||||
es_res = search_company_events(body=es_req, **search_args)
|
||||
|
||||
aggs_result = es_res.get("aggregations")
|
||||
if not aggs_result:
|
||||
@@ -276,12 +364,12 @@ class EventMetrics:
|
||||
total amount of intervals does not exceeds the samples
|
||||
Return the interval and resulting amount of intervals
|
||||
"""
|
||||
count = safe_get(data, "count/value", default=0)
|
||||
count = nested_get(data, ("count", "value"), default=0)
|
||||
if count < samples:
|
||||
return metric, variant, 1, count
|
||||
|
||||
min_index = safe_get(data, "min_index/value", default=0)
|
||||
max_index = safe_get(data, "max_index/value", default=min_index)
|
||||
min_index = nested_get(data, ("min_index", "value"), default=0)
|
||||
max_index = nested_get(data, ("max_index", "value"), default=min_index)
|
||||
index_range = max_index - min_index + 1
|
||||
interval = max(1, math.ceil(float(index_range) / samples))
|
||||
max_samples = math.ceil(float(index_range) / interval)
|
||||
@@ -307,33 +395,41 @@ class EventMetrics:
|
||||
"""
|
||||
interval, metrics = metrics_interval
|
||||
aggregation = self._add_aggregation_average(key.get_aggregation(interval))
|
||||
aggs = {
|
||||
"metrics": {
|
||||
"terms": {
|
||||
"field": "metric",
|
||||
"size": EventSettings.max_metrics_count,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
"variants": {
|
||||
"terms": {
|
||||
"field": "variant",
|
||||
"size": EventSettings.max_variants_count,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": aggregation,
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
aggs_result = self._query_aggregation_for_task_metrics(
|
||||
company_id=company_id,
|
||||
event_type=event_type,
|
||||
aggs=aggs,
|
||||
task_id=task_id,
|
||||
metrics=metrics,
|
||||
query = self._get_task_metrics_query(task_id=task_id, metrics=metrics)
|
||||
search_args = dict(es=self.es, company_id=company_id, event_type=event_type)
|
||||
max_metrics, max_variants = get_max_metric_and_variant_counts(
|
||||
query=query,
|
||||
**search_args,
|
||||
)
|
||||
max_variants = int(max_variants // 2)
|
||||
es_req = {
|
||||
"size": 0,
|
||||
"query": query,
|
||||
"aggs": {
|
||||
"metrics": {
|
||||
"terms": {
|
||||
"field": "metric",
|
||||
"size": max_metrics,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
"variants": {
|
||||
"terms": {
|
||||
"field": "variant",
|
||||
"size": max_variants,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": aggregation,
|
||||
}
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
with translate_errors_context():
|
||||
es_res = search_company_events(body=es_req, **search_args)
|
||||
|
||||
aggs_result = es_res.get("aggregations")
|
||||
if not aggs_result:
|
||||
return {}
|
||||
|
||||
@@ -360,19 +456,20 @@ class EventMetrics:
|
||||
for key, value in aggregation.items()
|
||||
}
|
||||
|
||||
def _query_aggregation_for_task_metrics(
|
||||
self,
|
||||
company_id: str,
|
||||
event_type: EventType,
|
||||
aggs: dict,
|
||||
@staticmethod
|
||||
def _task_conditions(task_id: str) -> list:
|
||||
return [
|
||||
{"term": {"task": task_id}},
|
||||
{"range": {"iter": {"gt": SINGLE_SCALAR_ITERATION}}},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def _get_task_metrics_query(
|
||||
cls,
|
||||
task_id: str,
|
||||
metrics: Sequence[Tuple[str, str]],
|
||||
) -> dict:
|
||||
"""
|
||||
Return the result of elastic search query for the given aggregation filtered
|
||||
by the given task_ids and metrics
|
||||
"""
|
||||
must = [{"term": {"task": task_id}}]
|
||||
):
|
||||
must = cls._task_conditions(task_id)
|
||||
if metrics:
|
||||
should = [
|
||||
{
|
||||
@@ -387,25 +484,98 @@ class EventMetrics:
|
||||
]
|
||||
must.append({"bool": {"should": should}})
|
||||
|
||||
es_req = {
|
||||
"size": 0,
|
||||
"query": {"bool": {"must": must}},
|
||||
"aggs": aggs,
|
||||
}
|
||||
return {"bool": {"must": must}}
|
||||
|
||||
with translate_errors_context(), TimingContext("es", "task_stats_scalar"):
|
||||
es_res = search_company_events(
|
||||
self.es, company_id=company_id, event_type=event_type, body=es_req,
|
||||
def get_multi_task_metrics(self, companies: TaskCompanies, event_type: EventType) -> Mapping[str, list]:
|
||||
"""
|
||||
For the requested tasks return reported metrics and variants
|
||||
"""
|
||||
tasks_ids = {
|
||||
company: [t.id for t in tasks]
|
||||
for company, tasks in companies.items()
|
||||
}
|
||||
with ThreadPoolExecutor(EventSettings.max_workers) as pool:
|
||||
companies_res: Sequence = list(
|
||||
pool.map(
|
||||
partial(
|
||||
self._get_multi_task_metrics,
|
||||
event_type=event_type,
|
||||
),
|
||||
tasks_ids.items(),
|
||||
)
|
||||
)
|
||||
|
||||
return es_res.get("aggregations")
|
||||
if len(companies_res) == 1:
|
||||
return companies_res[0]
|
||||
|
||||
def get_tasks_metrics(
|
||||
res = defaultdict(set)
|
||||
for c_res in companies_res:
|
||||
for m, vars_ in c_res.items():
|
||||
res[m].update(vars_)
|
||||
|
||||
return {
|
||||
k: list(v)
|
||||
for k, v in res.items()
|
||||
}
|
||||
|
||||
def _get_multi_task_metrics(
|
||||
self, company_tasks: Tuple[str, Sequence[str]], event_type: EventType
|
||||
) -> Mapping[str, list]:
|
||||
company_id, task_ids = company_tasks
|
||||
if check_empty_data(self.es, company_id, event_type):
|
||||
return {}
|
||||
|
||||
search_args = dict(
|
||||
es=self.es,
|
||||
company_id=company_id,
|
||||
event_type=event_type,
|
||||
)
|
||||
query = QueryBuilder.terms("task", task_ids)
|
||||
max_metrics, max_variants = get_max_metric_and_variant_counts(
|
||||
query=query,
|
||||
**search_args,
|
||||
)
|
||||
es_req = {
|
||||
"size": 0,
|
||||
"query": query,
|
||||
"aggs": {
|
||||
"metrics": {
|
||||
"terms": {
|
||||
"field": "metric",
|
||||
"size": max_metrics,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
"variants": {
|
||||
"terms": {
|
||||
"field": "variant",
|
||||
"size": max_variants,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
es_res = search_company_events(
|
||||
body=es_req,
|
||||
**search_args,
|
||||
)
|
||||
aggs_result = es_res.get("aggregations")
|
||||
if not aggs_result:
|
||||
return {}
|
||||
|
||||
return {
|
||||
mb["key"]: [vb["key"] for vb in mb["variants"]["buckets"]]
|
||||
for mb in aggs_result["metrics"]["buckets"]
|
||||
}
|
||||
|
||||
def get_task_metrics(
|
||||
self, company_id, task_ids: Sequence, event_type: EventType
|
||||
) -> Sequence:
|
||||
"""
|
||||
For the requested tasks return all the metrics that
|
||||
reported events of the requested types
|
||||
For the requested tasks return reported metrics per task
|
||||
"""
|
||||
if check_empty_data(self.es, company_id, event_type):
|
||||
return {}
|
||||
@@ -426,24 +596,23 @@ class EventMetrics:
|
||||
) -> Sequence:
|
||||
es_req = {
|
||||
"size": 0,
|
||||
"query": {"bool": {"must": [{"term": {"task": task_id}}]}},
|
||||
"query": {"bool": {"must": self._task_conditions(task_id)}},
|
||||
"aggs": {
|
||||
"metrics": {
|
||||
"terms": {
|
||||
"field": "metric",
|
||||
"size": EventSettings.max_metrics_count,
|
||||
"size": EventSettings.max_es_buckets,
|
||||
"order": {"_key": "asc"},
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
with translate_errors_context(), TimingContext("es", "_get_task_metrics"):
|
||||
es_res = search_company_events(
|
||||
self.es, company_id=company_id, event_type=event_type, body=es_req
|
||||
)
|
||||
es_res = search_company_events(
|
||||
self.es, company_id=company_id, event_type=event_type, body=es_req
|
||||
)
|
||||
|
||||
return [
|
||||
metric["key"]
|
||||
for metric in safe_get(es_res, "aggregations/metrics/buckets", default=[])
|
||||
for metric in nested_get(es_res, ("aggregations", "metrics", "buckets"), default=[])
|
||||
]
|
||||
|
||||
@@ -4,6 +4,7 @@ import attr
|
||||
import jsonmodels.models
|
||||
import jwt
|
||||
from elasticsearch import Elasticsearch
|
||||
from jwt.algorithms import get_default_algorithms
|
||||
|
||||
from apiserver.bll.event.event_common import (
|
||||
check_empty_data,
|
||||
@@ -16,7 +17,6 @@ from apiserver.bll.event.event_common import (
|
||||
from apiserver.bll.event.scalar_key import ScalarKeyEnum, ScalarKey
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.timing_context import TimingContext
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
@@ -64,21 +64,20 @@ class EventsIterator:
|
||||
self,
|
||||
event_type: EventType,
|
||||
company_id: str,
|
||||
task_id: str,
|
||||
task_ids: Sequence[str],
|
||||
metric_variants: MetricVariants = None,
|
||||
) -> int:
|
||||
query, _ = self._get_initial_query_and_must(task_id, metric_variants)
|
||||
if check_empty_data(self.es, company_id, event_type):
|
||||
return 0
|
||||
|
||||
query, _ = self._get_initial_query_and_must(task_ids, metric_variants)
|
||||
es_req = {
|
||||
"query": query,
|
||||
}
|
||||
|
||||
with translate_errors_context(), TimingContext("es", "count_task_events"):
|
||||
with translate_errors_context():
|
||||
es_result = count_company_events(
|
||||
self.es,
|
||||
company_id=company_id,
|
||||
event_type=event_type,
|
||||
body=es_req,
|
||||
routing=task_id,
|
||||
self.es, company_id=company_id, event_type=event_type, body=es_req,
|
||||
)
|
||||
|
||||
return es_result["count"]
|
||||
@@ -101,7 +100,7 @@ class EventsIterator:
|
||||
For the last key-field value all the events are brought (even if the resulting size exceeds batch_size)
|
||||
so that events with this value will not be lost between the calls.
|
||||
"""
|
||||
query, must = self._get_initial_query_and_must(task_id, metric_variants)
|
||||
query, must = self._get_initial_query_and_must([task_id], metric_variants)
|
||||
|
||||
# retrieve the next batch of events
|
||||
es_req = {
|
||||
@@ -113,13 +112,9 @@ class EventsIterator:
|
||||
if from_key_value:
|
||||
es_req["search_after"] = [from_key_value]
|
||||
|
||||
with translate_errors_context(), TimingContext("es", "get_task_events"):
|
||||
with translate_errors_context():
|
||||
es_result = search_company_events(
|
||||
self.es,
|
||||
company_id=company_id,
|
||||
event_type=event_type,
|
||||
body=es_req,
|
||||
routing=task_id,
|
||||
self.es, company_id=company_id, event_type=event_type, body=es_req,
|
||||
)
|
||||
hits = es_result["hits"]["hits"]
|
||||
hits_total = es_result["hits"]["total"]["value"]
|
||||
@@ -139,11 +134,7 @@ class EventsIterator:
|
||||
},
|
||||
}
|
||||
es_result = search_company_events(
|
||||
self.es,
|
||||
company_id=company_id,
|
||||
event_type=event_type,
|
||||
body=es_req,
|
||||
routing=task_id,
|
||||
self.es, company_id=company_id, event_type=event_type, body=es_req,
|
||||
)
|
||||
last_second_hits = es_result["hits"]["hits"]
|
||||
if not last_second_hits or len(last_second_hits) < 2:
|
||||
@@ -167,14 +158,14 @@ class EventsIterator:
|
||||
|
||||
@staticmethod
|
||||
def _get_initial_query_and_must(
|
||||
task_id: str, metric_variants: MetricVariants = None
|
||||
task_ids: Sequence[str], metric_variants: MetricVariants = None
|
||||
) -> Tuple[dict, list]:
|
||||
if not metric_variants:
|
||||
must = [{"term": {"task": task_id}}]
|
||||
query = {"term": {"task": task_id}}
|
||||
query = {"terms": {"task": task_ids}}
|
||||
must = [query]
|
||||
else:
|
||||
must = [
|
||||
{"term": {"task": task_id}},
|
||||
{"terms": {"task": task_ids}},
|
||||
get_metric_variants_condition(metric_variants),
|
||||
]
|
||||
query = {"bool": {"must": must}}
|
||||
@@ -188,7 +179,7 @@ class Scroll(jsonmodels.models.Base):
|
||||
key=config.get(
|
||||
"services.events.events_retrieval.scroll_id_key", "1234567890"
|
||||
),
|
||||
).decode()
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_scroll_id(cls, scroll_id: str):
|
||||
@@ -199,6 +190,7 @@ class Scroll(jsonmodels.models.Base):
|
||||
key=config.get(
|
||||
"services.events.events_retrieval.scroll_id_key", "1234567890"
|
||||
),
|
||||
algorithms=get_default_algorithms(),
|
||||
)
|
||||
)
|
||||
except jwt.PyJWTError:
|
||||
|
||||
455
apiserver/bll/event/history_debug_image_iterator.py
Normal file
455
apiserver/bll/event/history_debug_image_iterator.py
Normal file
@@ -0,0 +1,455 @@
|
||||
import operator
|
||||
from operator import attrgetter
|
||||
from typing import Sequence, Tuple, Optional, Mapping
|
||||
|
||||
import attr
|
||||
from boltons.iterutils import first, bucketize
|
||||
from elasticsearch import Elasticsearch
|
||||
from jsonmodels.fields import StringField, IntField, BoolField, ListField
|
||||
from jsonmodels.models import Base
|
||||
from redis.client import StrictRedis
|
||||
|
||||
from apiserver.utilities.dicts import nested_get
|
||||
from .event_common import (
|
||||
EventType,
|
||||
EventSettings,
|
||||
check_empty_data,
|
||||
search_company_events,
|
||||
get_max_metric_and_variant_counts,
|
||||
)
|
||||
from apiserver.apimodels import JsonSerializableMixin
|
||||
from apiserver.bll.redis_cache_manager import RedisCacheManager
|
||||
from apiserver.apierrors import errors
|
||||
|
||||
|
||||
class VariantState(Base):
|
||||
name: str = StringField(required=True)
|
||||
metric: str = StringField(default=None)
|
||||
min_iteration: int = IntField()
|
||||
max_iteration: int = IntField()
|
||||
|
||||
|
||||
class DebugImageSampleState(Base, JsonSerializableMixin):
|
||||
id: str = StringField(required=True)
|
||||
iteration: int = IntField()
|
||||
variant: str = StringField()
|
||||
task: str = StringField()
|
||||
metric: str = StringField()
|
||||
variant_states: Sequence[VariantState] = ListField([VariantState])
|
||||
warning: str = StringField()
|
||||
navigate_current_metric = BoolField(default=True)
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class VariantSampleResult(object):
|
||||
scroll_id: str = None
|
||||
event: dict = None
|
||||
min_iteration: int = None
|
||||
max_iteration: int = None
|
||||
|
||||
|
||||
class HistoryDebugImageIterator:
|
||||
event_type = EventType.metrics_image
|
||||
|
||||
def __init__(self, redis: StrictRedis, es: Elasticsearch):
|
||||
self.es = es
|
||||
self.cache_manager = RedisCacheManager(
|
||||
state_class=DebugImageSampleState,
|
||||
redis=redis,
|
||||
expiration_interval=EventSettings.state_expiration_sec,
|
||||
)
|
||||
|
||||
def get_next_sample(
|
||||
self,
|
||||
company_id: str,
|
||||
task: str,
|
||||
state_id: str,
|
||||
navigate_earlier: bool,
|
||||
next_iteration: bool,
|
||||
) -> VariantSampleResult:
|
||||
"""
|
||||
Get the sample for next/prev variant on the current iteration
|
||||
If does not exist then try getting sample for the first/last variant from next/prev iteration
|
||||
"""
|
||||
res = VariantSampleResult(scroll_id=state_id)
|
||||
state = self.cache_manager.get_state(state_id)
|
||||
if not state or state.task != task:
|
||||
raise errors.bad_request.InvalidScrollId(scroll_id=state_id)
|
||||
|
||||
if check_empty_data(self.es, company_id=company_id, event_type=self.event_type):
|
||||
return res
|
||||
|
||||
if next_iteration:
|
||||
event = self._get_next_for_another_iteration(
|
||||
company_id=company_id, navigate_earlier=navigate_earlier, state=state
|
||||
)
|
||||
else:
|
||||
# noinspection PyArgumentList
|
||||
event = first(
|
||||
f(company_id=company_id, navigate_earlier=navigate_earlier, state=state)
|
||||
for f in (
|
||||
self._get_next_for_current_iteration,
|
||||
self._get_next_for_another_iteration,
|
||||
)
|
||||
)
|
||||
if not event:
|
||||
return res
|
||||
|
||||
self._fill_res_and_update_state(event=event, res=res, state=state)
|
||||
self.cache_manager.set_state(state=state)
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def _fill_res_and_update_state(
|
||||
event: dict, res: VariantSampleResult, state: DebugImageSampleState
|
||||
):
|
||||
state.variant = event["variant"]
|
||||
state.metric = event["metric"]
|
||||
state.iteration = event["iter"]
|
||||
res.event = event
|
||||
var_state = first(
|
||||
vs
|
||||
for vs in state.variant_states
|
||||
if vs.name == state.variant and vs.metric == state.metric
|
||||
)
|
||||
if var_state:
|
||||
res.min_iteration = var_state.min_iteration
|
||||
res.max_iteration = var_state.max_iteration
|
||||
|
||||
@staticmethod
|
||||
def _get_metric_conditions(variants: Sequence[VariantState]) -> dict:
|
||||
metrics = bucketize(variants, key=attrgetter("metric"))
|
||||
|
||||
def _get_variants_conditions(metric_variants: Sequence[VariantState]) -> dict:
|
||||
variants_conditions = [
|
||||
{
|
||||
"bool": {
|
||||
"must": [
|
||||
{"term": {"variant": v.name}},
|
||||
{"range": {"iter": {"gte": v.min_iteration}}},
|
||||
]
|
||||
}
|
||||
}
|
||||
for v in metric_variants
|
||||
]
|
||||
return {"bool": {"should": variants_conditions}}
|
||||
|
||||
metrics_conditions = [
|
||||
{
|
||||
"bool": {
|
||||
"must": [
|
||||
{"term": {"metric": metric}},
|
||||
_get_variants_conditions(metric_variants),
|
||||
]
|
||||
}
|
||||
}
|
||||
for metric, metric_variants in metrics.items()
|
||||
]
|
||||
return {"bool": {"should": metrics_conditions}}
|
||||
|
||||
def _get_next_for_current_iteration(
|
||||
self, company_id: str, navigate_earlier: bool, state: DebugImageSampleState
|
||||
) -> Optional[dict]:
|
||||
"""
|
||||
Get the sample for next (if navigate_earlier is False) or previous variant sorted by name for the same iteration
|
||||
Only variants for which the iteration falls into their valid range are considered
|
||||
Return None if no such variant or sample is found
|
||||
"""
|
||||
if state.navigate_current_metric:
|
||||
variants = [
|
||||
var_state
|
||||
for var_state in state.variant_states
|
||||
if var_state.metric == state.metric
|
||||
]
|
||||
else:
|
||||
variants = state.variant_states
|
||||
|
||||
cmp = operator.lt if navigate_earlier else operator.gt
|
||||
variants = [
|
||||
var_state
|
||||
for var_state in variants
|
||||
if cmp((var_state.metric, var_state.name), (state.metric, state.variant))
|
||||
and var_state.min_iteration <= state.iteration
|
||||
]
|
||||
if not variants:
|
||||
return
|
||||
|
||||
must_conditions = [
|
||||
{"term": {"task": state.task}},
|
||||
{"term": {"iter": state.iteration}},
|
||||
self._get_metric_conditions(variants),
|
||||
{"exists": {"field": "url"}},
|
||||
]
|
||||
order = "desc" if navigate_earlier else "asc"
|
||||
es_req = {
|
||||
"size": 1,
|
||||
"sort": [{"metric": order}, {"variant": order}, {"url": "desc"}],
|
||||
"query": {"bool": {"must": must_conditions}},
|
||||
}
|
||||
|
||||
es_res = search_company_events(
|
||||
self.es,
|
||||
company_id=company_id,
|
||||
event_type=self.event_type,
|
||||
body=es_req,
|
||||
)
|
||||
|
||||
hits = nested_get(es_res, ("hits", "hits"))
|
||||
if not hits:
|
||||
return
|
||||
|
||||
return hits[0]["_source"]
|
||||
|
||||
def _get_next_for_another_iteration(
|
||||
self, company_id: str, navigate_earlier: bool, state: DebugImageSampleState
|
||||
) -> Optional[dict]:
|
||||
"""
|
||||
Get the sample for the first variant for the next iteration (if navigate_earlier is set to False)
|
||||
or from the last variant for the previous iteration (otherwise)
|
||||
The variants for which the sample falls in invalid range are discarded
|
||||
If no suitable sample is found then None is returned
|
||||
"""
|
||||
if state.navigate_current_metric:
|
||||
variants = [
|
||||
var_state
|
||||
for var_state in state.variant_states
|
||||
if var_state.metric == state.metric
|
||||
]
|
||||
else:
|
||||
variants = state.variant_states
|
||||
|
||||
if navigate_earlier:
|
||||
range_operator = "lt"
|
||||
order = "desc"
|
||||
variants = [
|
||||
var_state
|
||||
for var_state in variants
|
||||
if var_state.min_iteration < state.iteration
|
||||
]
|
||||
else:
|
||||
range_operator = "gt"
|
||||
order = "asc"
|
||||
variants = variants
|
||||
|
||||
if not variants:
|
||||
return
|
||||
|
||||
must_conditions = [
|
||||
{"term": {"task": state.task}},
|
||||
self._get_metric_conditions(variants),
|
||||
{"range": {"iter": {range_operator: state.iteration}}},
|
||||
{"exists": {"field": "url"}},
|
||||
]
|
||||
es_req = {
|
||||
"size": 1,
|
||||
"sort": [{"iter": order}, {"metric": order}, {"variant": order}, {"url": "desc"}],
|
||||
"query": {"bool": {"must": must_conditions}},
|
||||
}
|
||||
es_res = search_company_events(
|
||||
self.es,
|
||||
company_id=company_id,
|
||||
event_type=self.event_type,
|
||||
body=es_req,
|
||||
)
|
||||
|
||||
hits = nested_get(es_res, ("hits", "hits"))
|
||||
if not hits:
|
||||
return
|
||||
|
||||
return hits[0]["_source"]
|
||||
|
||||
def get_sample_for_variant(
|
||||
self,
|
||||
company_id: str,
|
||||
task: str,
|
||||
metric: str,
|
||||
variant: str,
|
||||
iteration: Optional[int] = None,
|
||||
refresh: bool = False,
|
||||
state_id: str = None,
|
||||
navigate_current_metric: bool = True,
|
||||
) -> VariantSampleResult:
|
||||
"""
|
||||
Get the sample for the requested iteration or the latest before it
|
||||
If the iteration is not passed then get the latest event
|
||||
"""
|
||||
res = VariantSampleResult()
|
||||
if check_empty_data(self.es, company_id=company_id, event_type=self.event_type):
|
||||
return res
|
||||
|
||||
def init_state(state_: DebugImageSampleState):
|
||||
state_.task = task
|
||||
state_.metric = metric
|
||||
state_.navigate_current_metric = navigate_current_metric
|
||||
self._reset_variant_states(company_id=company_id, state=state_)
|
||||
|
||||
def validate_state(state_: DebugImageSampleState):
|
||||
if (
|
||||
state_.task != task
|
||||
or state_.navigate_current_metric != navigate_current_metric
|
||||
or (state_.navigate_current_metric and state_.metric != metric)
|
||||
):
|
||||
raise errors.bad_request.InvalidScrollId(
|
||||
"Task and metric stored in the state do not match the passed ones",
|
||||
scroll_id=state_.id,
|
||||
)
|
||||
# fix old variant states:
|
||||
for vs in state_.variant_states:
|
||||
if vs.metric is None:
|
||||
vs.metric = metric
|
||||
if refresh:
|
||||
self._reset_variant_states(company_id=company_id, state=state_)
|
||||
|
||||
state: DebugImageSampleState
|
||||
with self.cache_manager.get_or_create_state(
|
||||
state_id=state_id, init_state=init_state, validate_state=validate_state,
|
||||
) as state:
|
||||
res.scroll_id = state.id
|
||||
|
||||
var_state = first(
|
||||
vs
|
||||
for vs in state.variant_states
|
||||
if vs.name == variant and vs.metric == metric
|
||||
)
|
||||
if not var_state:
|
||||
return res
|
||||
|
||||
res.min_iteration = var_state.min_iteration
|
||||
res.max_iteration = var_state.max_iteration
|
||||
|
||||
must_conditions = [
|
||||
{"term": {"task": task}},
|
||||
{"term": {"metric": metric}},
|
||||
{"term": {"variant": variant}},
|
||||
{"exists": {"field": "url"}},
|
||||
]
|
||||
if iteration is not None:
|
||||
must_conditions.append(
|
||||
{
|
||||
"range": {
|
||||
"iter": {"lte": iteration, "gte": var_state.min_iteration}
|
||||
}
|
||||
}
|
||||
)
|
||||
else:
|
||||
must_conditions.append(
|
||||
{"range": {"iter": {"gte": var_state.min_iteration}}}
|
||||
)
|
||||
|
||||
es_req = {
|
||||
"size": 1,
|
||||
"sort": [{"iter": "desc"}, {"url": "desc"}],
|
||||
"query": {"bool": {"must": must_conditions}},
|
||||
}
|
||||
|
||||
es_res = search_company_events(
|
||||
self.es,
|
||||
company_id=company_id,
|
||||
event_type=self.event_type,
|
||||
body=es_req,
|
||||
)
|
||||
|
||||
hits = nested_get(es_res, ("hits", "hits"))
|
||||
if not hits:
|
||||
return res
|
||||
|
||||
self._fill_res_and_update_state(
|
||||
event=hits[0]["_source"], res=res, state=state
|
||||
)
|
||||
return res
|
||||
|
||||
def _reset_variant_states(self, company_id: str, state: DebugImageSampleState):
|
||||
metrics = self._get_metric_variant_iterations(
|
||||
company_id=company_id,
|
||||
task=state.task,
|
||||
metric=state.metric if state.navigate_current_metric else None,
|
||||
)
|
||||
state.variant_states = [
|
||||
VariantState(
|
||||
metric=metric,
|
||||
name=var_name,
|
||||
min_iteration=min_iter,
|
||||
max_iteration=max_iter,
|
||||
)
|
||||
for metric, variants in metrics.items()
|
||||
for var_name, min_iter, max_iter in variants
|
||||
]
|
||||
|
||||
def _get_metric_variant_iterations(
|
||||
self, company_id: str, task: str, metric: str,
|
||||
) -> Mapping[str, Sequence[Tuple[str, int, int]]]:
|
||||
"""
|
||||
Return valid min and max iterations that the task reported events of the required type
|
||||
"""
|
||||
must = [
|
||||
{"term": {"task": task}},
|
||||
{"exists": {"field": "url"}},
|
||||
]
|
||||
if metric is not None:
|
||||
must.append({"term": {"metric": metric}})
|
||||
query = {"bool": {"must": must}}
|
||||
|
||||
search_args = dict(
|
||||
es=self.es, company_id=company_id, event_type=self.event_type,
|
||||
)
|
||||
max_metrics, max_variants = get_max_metric_and_variant_counts(
|
||||
query=query, **search_args
|
||||
)
|
||||
max_variants = int(max_variants // 2)
|
||||
es_req: dict = {
|
||||
"size": 0,
|
||||
"query": query,
|
||||
"aggs": {
|
||||
"metrics": {
|
||||
"terms": {
|
||||
"field": "metric",
|
||||
"size": max_metrics,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
"variants": {
|
||||
"terms": {
|
||||
"field": "variant",
|
||||
"size": max_variants,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
"last_iter": {"max": {"field": "iter"}},
|
||||
"urls": {
|
||||
# group by urls and choose the minimal iteration
|
||||
# from all the maximal iterations per url
|
||||
"terms": {
|
||||
"field": "url",
|
||||
"order": {"max_iter": "asc"},
|
||||
"size": 1,
|
||||
},
|
||||
"aggs": {
|
||||
# find max iteration for each url
|
||||
"max_iter": {"max": {"field": "iter"}}
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
es_res = search_company_events(body=es_req, **search_args)
|
||||
|
||||
def get_variant_data(variant_bucket: dict) -> Tuple[str, int, int]:
|
||||
variant = variant_bucket["key"]
|
||||
urls = nested_get(variant_bucket, ("urls", "buckets"))
|
||||
min_iter = int(urls[0]["max_iter"]["value"])
|
||||
max_iter = int(variant_bucket["last_iter"]["value"])
|
||||
return variant, min_iter, max_iter
|
||||
|
||||
return {
|
||||
metric_bucket["key"]: [
|
||||
get_variant_data(variant_bucket)
|
||||
for variant_bucket in nested_get(metric_bucket, ("variants", "buckets"))
|
||||
]
|
||||
for metric_bucket in nested_get(
|
||||
es_res, ("aggregations", "metrics", "buckets")
|
||||
)
|
||||
}
|
||||
316
apiserver/bll/event/history_plots_iterator.py
Normal file
316
apiserver/bll/event/history_plots_iterator.py
Normal file
@@ -0,0 +1,316 @@
|
||||
from typing import Sequence, Tuple, Optional, Mapping
|
||||
|
||||
import attr
|
||||
from boltons.iterutils import first
|
||||
from elasticsearch import Elasticsearch
|
||||
from jsonmodels.fields import StringField, IntField, ListField, BoolField
|
||||
from jsonmodels.models import Base
|
||||
from redis.client import StrictRedis
|
||||
|
||||
from .event_common import (
|
||||
EventType,
|
||||
uncompress_plot,
|
||||
EventSettings,
|
||||
check_empty_data,
|
||||
search_company_events,
|
||||
)
|
||||
from apiserver.apimodels import JsonSerializableMixin
|
||||
from apiserver.utilities.dicts import nested_get
|
||||
from apiserver.bll.redis_cache_manager import RedisCacheManager
|
||||
from apiserver.apierrors import errors
|
||||
|
||||
|
||||
class MetricState(Base):
|
||||
name: str = StringField(default=None)
|
||||
min_iteration: int = IntField()
|
||||
max_iteration: int = IntField()
|
||||
|
||||
|
||||
class PlotsSampleState(Base, JsonSerializableMixin):
|
||||
id: str = StringField(required=True)
|
||||
iteration: int = IntField()
|
||||
task: str = StringField()
|
||||
metric: str = StringField()
|
||||
metric_states: Sequence[MetricState] = ListField([MetricState])
|
||||
warning: str = StringField()
|
||||
navigate_current_metric = BoolField(default=True)
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class MetricSamplesResult(object):
|
||||
scroll_id: str = None
|
||||
events: list = []
|
||||
min_iteration: int = None
|
||||
max_iteration: int = None
|
||||
|
||||
|
||||
class HistoryPlotsIterator:
|
||||
event_type = EventType.metrics_plot
|
||||
|
||||
def __init__(self, redis: StrictRedis, es: Elasticsearch):
|
||||
self.es = es
|
||||
self.cache_manager = RedisCacheManager(
|
||||
state_class=PlotsSampleState,
|
||||
redis=redis,
|
||||
expiration_interval=EventSettings.state_expiration_sec,
|
||||
)
|
||||
|
||||
def get_next_sample(
|
||||
self,
|
||||
company_id: str,
|
||||
task: str,
|
||||
state_id: str,
|
||||
navigate_earlier: bool,
|
||||
next_iteration: bool,
|
||||
) -> MetricSamplesResult:
|
||||
"""
|
||||
Get the samples for next/prev metric on the current iteration
|
||||
If does not exist then try getting sample for the first/last metric from next/prev iteration
|
||||
"""
|
||||
res = MetricSamplesResult(scroll_id=state_id)
|
||||
state = self.cache_manager.get_state(state_id)
|
||||
if not state or state.task != task:
|
||||
raise errors.bad_request.InvalidScrollId(scroll_id=state_id)
|
||||
|
||||
if check_empty_data(self.es, company_id=company_id, event_type=self.event_type):
|
||||
return res
|
||||
|
||||
if navigate_earlier:
|
||||
range_operator = "lt"
|
||||
order = "desc"
|
||||
else:
|
||||
range_operator = "gt"
|
||||
order = "asc"
|
||||
|
||||
must_conditions = [
|
||||
{"term": {"task": state.task}},
|
||||
]
|
||||
if state.navigate_current_metric:
|
||||
must_conditions.append({"term": {"metric": state.metric}})
|
||||
|
||||
next_iteration_condition = {
|
||||
"range": {"iter": {range_operator: state.iteration}}
|
||||
}
|
||||
if next_iteration or state.navigate_current_metric:
|
||||
must_conditions.append(next_iteration_condition)
|
||||
else:
|
||||
next_metric_condition = {
|
||||
"bool": {
|
||||
"must": [
|
||||
{"term": {"iter": state.iteration}},
|
||||
{"range": {"metric": {range_operator: state.metric}}},
|
||||
]
|
||||
}
|
||||
}
|
||||
must_conditions.append(
|
||||
{"bool": {"should": [next_metric_condition, next_iteration_condition]}}
|
||||
)
|
||||
|
||||
events = self._get_metric_events_for_condition(
|
||||
company_id=company_id,
|
||||
task=state.task,
|
||||
order=order,
|
||||
must_conditions=must_conditions,
|
||||
)
|
||||
|
||||
if not events:
|
||||
return res
|
||||
|
||||
self._fill_res_and_update_state(events=events, res=res, state=state)
|
||||
self.cache_manager.set_state(state=state)
|
||||
return res
|
||||
|
||||
def get_samples_for_metric(
|
||||
self,
|
||||
company_id: str,
|
||||
task: str,
|
||||
metric: str,
|
||||
iteration: Optional[int] = None,
|
||||
refresh: bool = False,
|
||||
state_id: str = None,
|
||||
navigate_current_metric: bool = True,
|
||||
) -> MetricSamplesResult:
|
||||
"""
|
||||
Get the sample for the requested iteration or the latest before it
|
||||
If the iteration is not passed then get the latest event
|
||||
"""
|
||||
res = MetricSamplesResult()
|
||||
if check_empty_data(self.es, company_id=company_id, event_type=self.event_type):
|
||||
return res
|
||||
|
||||
def init_state(state_: PlotsSampleState):
|
||||
state_.task = task
|
||||
state_.metric = metric
|
||||
state_.navigate_current_metric = navigate_current_metric
|
||||
self._reset_metric_states(company_id=company_id, state=state_)
|
||||
|
||||
def validate_state(state_: PlotsSampleState):
|
||||
if (
|
||||
state_.task != task
|
||||
or state_.navigate_current_metric != navigate_current_metric
|
||||
or (state_.navigate_current_metric and state_.metric != metric)
|
||||
):
|
||||
raise errors.bad_request.InvalidScrollId(
|
||||
"Task and metric stored in the state do not match the passed ones",
|
||||
scroll_id=state_.id,
|
||||
)
|
||||
if refresh:
|
||||
self._reset_metric_states(company_id=company_id, state=state_)
|
||||
|
||||
state: PlotsSampleState
|
||||
with self.cache_manager.get_or_create_state(
|
||||
state_id=state_id, init_state=init_state, validate_state=validate_state,
|
||||
) as state:
|
||||
res.scroll_id = state.id
|
||||
|
||||
metric_state = first(ms for ms in state.metric_states if ms.name == metric)
|
||||
if not metric_state:
|
||||
return res
|
||||
|
||||
res.min_iteration = metric_state.min_iteration
|
||||
res.max_iteration = metric_state.max_iteration
|
||||
|
||||
must_conditions = [
|
||||
{"term": {"task": task}},
|
||||
{"term": {"metric": metric}},
|
||||
]
|
||||
if iteration is not None:
|
||||
must_conditions.append({"range": {"iter": {"lte": iteration}}})
|
||||
|
||||
events = self._get_metric_events_for_condition(
|
||||
company_id=company_id,
|
||||
task=state.task,
|
||||
order="desc",
|
||||
must_conditions=must_conditions,
|
||||
)
|
||||
if not events:
|
||||
return res
|
||||
|
||||
self._fill_res_and_update_state(events=events, res=res, state=state)
|
||||
return res
|
||||
|
||||
def _reset_metric_states(self, company_id: str, state: PlotsSampleState):
|
||||
metrics = self._get_metric_iterations(
|
||||
company_id=company_id,
|
||||
task=state.task,
|
||||
metric=state.metric if state.navigate_current_metric else None,
|
||||
)
|
||||
state.metric_states = [
|
||||
MetricState(name=metric, min_iteration=min_iter, max_iteration=max_iter)
|
||||
for metric, (min_iter, max_iter) in metrics.items()
|
||||
]
|
||||
|
||||
def _get_metric_iterations(
|
||||
self, company_id: str, task: str, metric: str,
|
||||
) -> Mapping[str, Tuple[int, int]]:
|
||||
"""
|
||||
Return valid min and max iterations that the task reported events of the required type
|
||||
"""
|
||||
must = [
|
||||
{"term": {"task": task}},
|
||||
]
|
||||
if metric is not None:
|
||||
must.append({"term": {"metric": metric}})
|
||||
query = {"bool": {"must": must}}
|
||||
|
||||
es_req: dict = {
|
||||
"size": 0,
|
||||
"query": query,
|
||||
"aggs": {
|
||||
"metrics": {
|
||||
"terms": {
|
||||
"field": "metric",
|
||||
"size": 5000,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
"last_iter": {"max": {"field": "iter"}},
|
||||
"first_iter": {"min": {"field": "iter"}},
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
es_res = search_company_events(
|
||||
body=es_req,
|
||||
es=self.es,
|
||||
company_id=company_id,
|
||||
event_type=self.event_type,
|
||||
)
|
||||
|
||||
return {
|
||||
metric_bucket["key"]: (
|
||||
int(metric_bucket["first_iter"]["value"]),
|
||||
int(metric_bucket["last_iter"]["value"]),
|
||||
)
|
||||
for metric_bucket in nested_get(
|
||||
es_res, ("aggregations", "metrics", "buckets")
|
||||
)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fill_res_and_update_state(
|
||||
events: Sequence[dict], res: MetricSamplesResult, state: PlotsSampleState
|
||||
):
|
||||
for event in events:
|
||||
uncompress_plot(event)
|
||||
state.metric = events[0]["metric"]
|
||||
state.iteration = events[0]["iter"]
|
||||
res.events = events
|
||||
metric_state = first(
|
||||
ms for ms in state.metric_states if ms.name == state.metric
|
||||
)
|
||||
if metric_state:
|
||||
res.min_iteration = metric_state.min_iteration
|
||||
res.max_iteration = metric_state.max_iteration
|
||||
|
||||
def _get_metric_events_for_condition(
|
||||
self, company_id: str, task: str, order: str, must_conditions: Sequence
|
||||
) -> Sequence:
|
||||
es_req = {
|
||||
"size": 0,
|
||||
"query": {"bool": {"must": must_conditions}},
|
||||
"aggs": {
|
||||
"iters": {
|
||||
"terms": {"field": "iter", "size": 1, "order": {"_key": order}},
|
||||
"aggs": {
|
||||
"metrics": {
|
||||
"terms": {
|
||||
"field": "metric",
|
||||
"size": 1,
|
||||
"order": {"_key": order},
|
||||
},
|
||||
"aggs": {
|
||||
"events": {
|
||||
"top_hits": {
|
||||
"sort": {"variant": {"order": "asc"}},
|
||||
"size": 100,
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
es_res = search_company_events(
|
||||
self.es,
|
||||
company_id=company_id,
|
||||
event_type=self.event_type,
|
||||
body=es_req,
|
||||
)
|
||||
|
||||
aggs_result = es_res.get("aggregations")
|
||||
if not aggs_result:
|
||||
return []
|
||||
|
||||
for level in ("iters", "metrics"):
|
||||
level_data = aggs_result[level]["buckets"]
|
||||
if not level_data:
|
||||
return []
|
||||
aggs_result = level_data[0]
|
||||
|
||||
return [
|
||||
hit["_source"]
|
||||
for hit in nested_get(aggs_result, ("events", "hits", "hits"))
|
||||
]
|
||||
53
apiserver/bll/event/metric_debug_images_iterator.py
Normal file
53
apiserver/bll/event/metric_debug_images_iterator.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from typing import Sequence, Tuple, Callable
|
||||
|
||||
from elasticsearch import Elasticsearch
|
||||
from redis.client import StrictRedis
|
||||
|
||||
from apiserver.utilities.dicts import nested_get
|
||||
from .event_common import EventType
|
||||
from .metric_events_iterator import MetricEventsIterator, VariantState
|
||||
|
||||
|
||||
class MetricDebugImagesIterator(MetricEventsIterator):
|
||||
def __init__(self, redis: StrictRedis, es: Elasticsearch):
|
||||
super().__init__(redis, es, EventType.metrics_image)
|
||||
|
||||
def _get_extra_conditions(self) -> Sequence[dict]:
|
||||
return [{"exists": {"field": "url"}}]
|
||||
|
||||
def _get_variant_state_aggs(self) -> Tuple[dict, Callable[[dict, VariantState], None]]:
|
||||
aggs = {
|
||||
"urls": {
|
||||
"terms": {
|
||||
"field": "url",
|
||||
"order": {"max_iter": "desc"},
|
||||
"size": 1, # we need only one url from the most recent iteration
|
||||
},
|
||||
"aggs": {
|
||||
"max_iter": {"max": {"field": "iter"}},
|
||||
"iters": {
|
||||
"top_hits": {
|
||||
"sort": {"iter": {"order": "desc"}},
|
||||
"size": 2, # need two last iterations so that we can take
|
||||
# the second one as invalid
|
||||
"_source": "iter",
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
def fill_variant_state_data(variant_bucket: dict, state: VariantState):
|
||||
"""If the image urls get recycled then fill the last_invalid_iteration field"""
|
||||
top_iter_url = nested_get(variant_bucket, ("urls", "buckets"))[0]
|
||||
iters = nested_get(top_iter_url, ("iters", "hits", "hits"))
|
||||
if len(iters) > 1:
|
||||
state.last_invalid_iteration = nested_get(iters[1], ("_source", "iter"))
|
||||
|
||||
return aggs, fill_variant_state_data
|
||||
|
||||
def _process_event(self, event: dict) -> dict:
|
||||
return event
|
||||
|
||||
def _get_same_variant_events_order(self) -> dict:
|
||||
return {"url": {"order": "desc"}}
|
||||
@@ -1,11 +1,11 @@
|
||||
import abc
|
||||
from concurrent.futures.thread import ThreadPoolExecutor
|
||||
from datetime import datetime
|
||||
from functools import partial
|
||||
from operator import itemgetter
|
||||
from typing import Sequence, Tuple, Optional, Mapping
|
||||
from typing import Sequence, Tuple, Optional, Mapping, Callable
|
||||
|
||||
import attr
|
||||
import dpath
|
||||
from boltons.iterutils import first
|
||||
from elasticsearch import Elasticsearch
|
||||
from jsonmodels.fields import StringField, ListField, IntField
|
||||
@@ -19,12 +19,14 @@ from apiserver.bll.event.event_common import (
|
||||
search_company_events,
|
||||
EventType,
|
||||
get_metric_variants_condition,
|
||||
get_max_metric_and_variant_counts,
|
||||
)
|
||||
from apiserver.bll.redis_cache_manager import RedisCacheManager
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.database.model.task.metrics import MetricEventStats
|
||||
from apiserver.database.model.task.task import Task
|
||||
from apiserver.timing_context import TimingContext
|
||||
from apiserver.utilities.dicts import nested_get
|
||||
|
||||
|
||||
class VariantState(Base):
|
||||
@@ -49,65 +51,78 @@ class TaskScrollState(Base):
|
||||
self.last_min_iter = self.last_max_iter = None
|
||||
|
||||
|
||||
class DebugImageEventsScrollState(Base, JsonSerializableMixin):
|
||||
class MetricEventsScrollState(Base, JsonSerializableMixin):
|
||||
id: str = StringField(required=True)
|
||||
tasks: Sequence[TaskScrollState] = ListField([TaskScrollState])
|
||||
warning: str = StringField()
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class DebugImagesResult(object):
|
||||
class MetricEventsResult(object):
|
||||
metric_events: Sequence[tuple] = []
|
||||
next_scroll_id: str = None
|
||||
|
||||
|
||||
class DebugImagesIterator:
|
||||
EVENT_TYPE = EventType.metrics_image
|
||||
|
||||
def __init__(self, redis: StrictRedis, es: Elasticsearch):
|
||||
class MetricEventsIterator:
|
||||
def __init__(self, redis: StrictRedis, es: Elasticsearch, event_type: EventType):
|
||||
self.es = es
|
||||
self.event_type = event_type
|
||||
self.cache_manager = RedisCacheManager(
|
||||
state_class=DebugImageEventsScrollState,
|
||||
state_class=MetricEventsScrollState,
|
||||
redis=redis,
|
||||
expiration_interval=EventSettings.state_expiration_sec,
|
||||
)
|
||||
|
||||
def get_task_events(
|
||||
self,
|
||||
company_id: str,
|
||||
companies: Mapping[str, str],
|
||||
task_metrics: Mapping[str, dict],
|
||||
iter_count: int,
|
||||
navigate_earlier: bool = True,
|
||||
refresh: bool = False,
|
||||
state_id: str = None,
|
||||
) -> DebugImagesResult:
|
||||
if check_empty_data(self.es, company_id, self.EVENT_TYPE):
|
||||
return DebugImagesResult()
|
||||
) -> MetricEventsResult:
|
||||
companies = {
|
||||
task_id: company_id
|
||||
for task_id, company_id in companies.items()
|
||||
if not check_empty_data(
|
||||
self.es, company_id=company_id, event_type=self.event_type
|
||||
)
|
||||
}
|
||||
if not companies:
|
||||
return MetricEventsResult()
|
||||
|
||||
def init_state(state_: DebugImageEventsScrollState):
|
||||
state_.tasks = self._init_task_states(company_id, task_metrics)
|
||||
def init_state(state_: MetricEventsScrollState):
|
||||
state_.tasks = self._init_task_states(companies, task_metrics)
|
||||
|
||||
def validate_state(state_: DebugImageEventsScrollState):
|
||||
def validate_state(state_: MetricEventsScrollState):
|
||||
"""
|
||||
Validate that the metrics stored in the state are the same
|
||||
as requested in the current call.
|
||||
Refresh the state if requested
|
||||
"""
|
||||
if refresh:
|
||||
self._reinit_outdated_task_states(company_id, state_, task_metrics)
|
||||
self._reinit_outdated_task_states(companies, state_, task_metrics)
|
||||
|
||||
with self.cache_manager.get_or_create_state(
|
||||
state_id=state_id, init_state=init_state, validate_state=validate_state
|
||||
) as state:
|
||||
res = DebugImagesResult(next_scroll_id=state.id)
|
||||
res = MetricEventsResult(next_scroll_id=state.id)
|
||||
specific_variants_requested = any(
|
||||
variants
|
||||
for t, metrics in task_metrics.items()
|
||||
if metrics
|
||||
for m, variants in metrics.items()
|
||||
)
|
||||
with ThreadPoolExecutor(EventSettings.max_workers) as pool:
|
||||
res.metric_events = list(
|
||||
pool.map(
|
||||
partial(
|
||||
self._get_task_metric_events,
|
||||
company_id=company_id,
|
||||
companies=companies,
|
||||
iter_count=iter_count,
|
||||
navigate_earlier=navigate_earlier,
|
||||
specific_variants_requested=specific_variants_requested,
|
||||
),
|
||||
state.tasks,
|
||||
)
|
||||
@@ -117,22 +132,20 @@ class DebugImagesIterator:
|
||||
|
||||
def _reinit_outdated_task_states(
|
||||
self,
|
||||
company_id,
|
||||
state: DebugImageEventsScrollState,
|
||||
companies: Mapping[str, str],
|
||||
state: MetricEventsScrollState,
|
||||
task_metrics: Mapping[str, dict],
|
||||
):
|
||||
"""
|
||||
Determine the metrics for which new debug image events were added
|
||||
Determine the metrics for which new event_type events were added
|
||||
since their states were initialized and re-init these states
|
||||
"""
|
||||
tasks = Task.objects(id__in=list(task_metrics), company=company_id).only(
|
||||
"id", "metric_stats"
|
||||
)
|
||||
tasks = Task.objects(id__in=list(task_metrics)).only("id", "metric_stats")
|
||||
|
||||
def get_last_update_times_for_task_metrics(
|
||||
task: Task,
|
||||
) -> Mapping[str, datetime]:
|
||||
"""For metrics that reported debug image events get mapping of the metric name to the last update times"""
|
||||
"""For metrics that reported event_type events get mapping of the metric name to the last update times"""
|
||||
metric_stats: Mapping[str, MetricEventStats] = task.metric_stats
|
||||
if not metric_stats:
|
||||
return {}
|
||||
@@ -140,10 +153,10 @@ class DebugImagesIterator:
|
||||
requested_metrics = task_metrics[task.id]
|
||||
return {
|
||||
stats.metric: stats.event_stats_by_type[
|
||||
self.EVENT_TYPE.value
|
||||
self.event_type.value
|
||||
].last_update
|
||||
for stats in metric_stats.values()
|
||||
if self.EVENT_TYPE.value in stats.event_stats_by_type
|
||||
if self.event_type.value in stats.event_stats_by_type
|
||||
and (not requested_metrics or stats.metric in requested_metrics)
|
||||
}
|
||||
|
||||
@@ -167,7 +180,7 @@ class DebugImagesIterator:
|
||||
if metrics_to_recalc:
|
||||
task_metrics_to_recalc[task] = metrics_to_recalc
|
||||
|
||||
updated_task_states = self._init_task_states(company_id, task_metrics_to_recalc)
|
||||
updated_task_states = self._init_task_states(companies, task_metrics_to_recalc)
|
||||
|
||||
def merge_with_updated_task_states(
|
||||
old_state: TaskScrollState, updates: Sequence[TaskScrollState]
|
||||
@@ -197,14 +210,14 @@ class DebugImagesIterator:
|
||||
]
|
||||
|
||||
def _init_task_states(
|
||||
self, company_id: str, task_metrics: Mapping[str, dict]
|
||||
self, companies: Mapping[str, str], task_metrics: Mapping[str, dict]
|
||||
) -> Sequence[TaskScrollState]:
|
||||
"""
|
||||
Returned initialized metric scroll stated for the requested task metrics
|
||||
"""
|
||||
with ThreadPoolExecutor(EventSettings.max_workers) as pool:
|
||||
task_metric_states = pool.map(
|
||||
partial(self._init_metric_states_for_task, company_id=company_id),
|
||||
partial(self._init_metric_states_for_task, companies=companies),
|
||||
task_metrics.items(),
|
||||
)
|
||||
|
||||
@@ -213,18 +226,38 @@ class DebugImagesIterator:
|
||||
for task, metric_states in zip(task_metrics, task_metric_states)
|
||||
]
|
||||
|
||||
@abc.abstractmethod
|
||||
def _get_extra_conditions(self) -> Sequence[dict]:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def _get_variant_state_aggs(
|
||||
self,
|
||||
) -> Tuple[dict, Callable[[dict, VariantState], None]]:
|
||||
pass
|
||||
|
||||
def _init_metric_states_for_task(
|
||||
self, task_metrics: Tuple[str, dict], company_id: str
|
||||
self, task_metrics: Tuple[str, dict], companies: Mapping[str, str]
|
||||
) -> Sequence[MetricState]:
|
||||
"""
|
||||
Return metric scroll states for the task filled with the variant states
|
||||
for the variants that reported any debug images
|
||||
for the variants that reported any event_type events
|
||||
"""
|
||||
task, metrics = task_metrics
|
||||
must = [{"term": {"task": task}}, {"exists": {"field": "url"}}]
|
||||
company_id = companies[task]
|
||||
must = [{"term": {"task": task}}, *self._get_extra_conditions()]
|
||||
if metrics:
|
||||
must.append(get_metric_variants_condition(metrics))
|
||||
query = {"bool": {"must": must}}
|
||||
|
||||
search_args = dict(
|
||||
es=self.es, company_id=company_id, event_type=self.event_type
|
||||
)
|
||||
max_metrics, max_variants = get_max_metric_and_variant_counts(
|
||||
query=query, **search_args
|
||||
)
|
||||
max_variants = int(max_variants // 2)
|
||||
variant_state_aggs, fill_variant_state_data = self._get_variant_state_aggs()
|
||||
es_req: dict = {
|
||||
"size": 0,
|
||||
"query": query,
|
||||
@@ -232,7 +265,7 @@ class DebugImagesIterator:
|
||||
"metrics": {
|
||||
"terms": {
|
||||
"field": "metric",
|
||||
"size": EventSettings.max_metrics_count,
|
||||
"size": max_metrics,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
@@ -240,72 +273,62 @@ class DebugImagesIterator:
|
||||
"variants": {
|
||||
"terms": {
|
||||
"field": "variant",
|
||||
"size": EventSettings.max_variants_count,
|
||||
"size": max_variants,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
"urls": {
|
||||
"terms": {
|
||||
"field": "url",
|
||||
"order": {"max_iter": "desc"},
|
||||
"size": 1, # we need only one url from the most recent iteration
|
||||
},
|
||||
"aggs": {
|
||||
"max_iter": {"max": {"field": "iter"}},
|
||||
"iters": {
|
||||
"top_hits": {
|
||||
"sort": {"iter": {"order": "desc"}},
|
||||
"size": 2, # need two last iterations so that we can take
|
||||
# the second one as invalid
|
||||
"_source": "iter",
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
**(
|
||||
{"aggs": variant_state_aggs}
|
||||
if variant_state_aggs
|
||||
else {}
|
||||
),
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
with translate_errors_context(), TimingContext("es", "_init_metric_states"):
|
||||
es_res = search_company_events(
|
||||
self.es, company_id=company_id, event_type=self.EVENT_TYPE, body=es_req,
|
||||
)
|
||||
with translate_errors_context():
|
||||
es_res = search_company_events(body=es_req, **search_args)
|
||||
if "aggregations" not in es_res:
|
||||
return []
|
||||
|
||||
def init_variant_state(variant: dict):
|
||||
"""
|
||||
Return new variant state for the passed variant bucket
|
||||
If the image urls get recycled then fill the last_invalid_iteration field
|
||||
"""
|
||||
state = VariantState(variant=variant["key"])
|
||||
top_iter_url = dpath.get(variant, "urls/buckets")[0]
|
||||
iters = dpath.get(top_iter_url, "iters/hits/hits")
|
||||
if len(iters) > 1:
|
||||
state.last_invalid_iteration = dpath.get(iters[1], "_source/iter")
|
||||
if fill_variant_state_data:
|
||||
fill_variant_state_data(variant, state)
|
||||
|
||||
return state
|
||||
|
||||
return [
|
||||
MetricState(
|
||||
metric=metric["key"],
|
||||
timestamp=dpath.get(metric, "last_event_timestamp/value"),
|
||||
timestamp=nested_get(metric, ("last_event_timestamp", "value")),
|
||||
variants=[
|
||||
init_variant_state(variant)
|
||||
for variant in dpath.get(metric, "variants/buckets")
|
||||
for variant in nested_get(metric, ("variants", "buckets"))
|
||||
],
|
||||
)
|
||||
for metric in dpath.get(es_res, "aggregations/metrics/buckets")
|
||||
for metric in nested_get(es_res, ("aggregations", "metrics", "buckets"))
|
||||
]
|
||||
|
||||
@abc.abstractmethod
|
||||
def _process_event(self, event: dict) -> dict:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def _get_same_variant_events_order(self) -> dict:
|
||||
pass
|
||||
|
||||
def _get_task_metric_events(
|
||||
self,
|
||||
task_state: TaskScrollState,
|
||||
company_id: str,
|
||||
companies: Mapping[str, str],
|
||||
iter_count: int,
|
||||
navigate_earlier: bool,
|
||||
specific_variants_requested: bool,
|
||||
) -> Tuple:
|
||||
"""
|
||||
Return task metric events grouped by iterations
|
||||
@@ -321,7 +344,7 @@ class DebugImagesIterator:
|
||||
must_conditions = [
|
||||
{"term": {"task": task_state.task}},
|
||||
{"terms": {"metric": [m.metric for m in task_state.metrics]}},
|
||||
{"exists": {"field": "url"}},
|
||||
*self._get_extra_conditions(),
|
||||
]
|
||||
|
||||
range_condition = None
|
||||
@@ -332,6 +355,8 @@ class DebugImagesIterator:
|
||||
if range_condition:
|
||||
must_conditions.append({"range": {"iter": range_condition}})
|
||||
|
||||
metrics_count = len(task_state.metrics)
|
||||
max_variants = int(EventSettings.max_es_buckets / (metrics_count * iter_count))
|
||||
es_req = {
|
||||
"size": 0,
|
||||
"query": {"bool": {"must": must_conditions}},
|
||||
@@ -346,20 +371,21 @@ class DebugImagesIterator:
|
||||
"metrics": {
|
||||
"terms": {
|
||||
"field": "metric",
|
||||
"size": EventSettings.max_metrics_count,
|
||||
"size": metrics_count,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
"variants": {
|
||||
"terms": {
|
||||
"field": "variant",
|
||||
"size": EventSettings.max_variants_count,
|
||||
"size": max_variants,
|
||||
"order": {"_key": "asc"},
|
||||
},
|
||||
"aggs": {
|
||||
"events": {
|
||||
"top_hits": {
|
||||
"sort": {"url": {"order": "desc"}}
|
||||
"sort": self._get_same_variant_events_order(),
|
||||
"size": 1,
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -370,9 +396,12 @@ class DebugImagesIterator:
|
||||
}
|
||||
},
|
||||
}
|
||||
with translate_errors_context(), TimingContext("es", "get_debug_image_events"):
|
||||
with translate_errors_context():
|
||||
es_res = search_company_events(
|
||||
self.es, company_id=company_id, event_type=self.EVENT_TYPE, body=es_req,
|
||||
self.es,
|
||||
company_id=companies[task_state.task],
|
||||
event_type=self.event_type,
|
||||
body=es_req,
|
||||
)
|
||||
if "aggregations" not in es_res:
|
||||
return task_state.task, []
|
||||
@@ -382,26 +411,34 @@ class DebugImagesIterator:
|
||||
for m in task_state.metrics
|
||||
for v in m.variants
|
||||
}
|
||||
allow_uninitialized = (
|
||||
False
|
||||
if specific_variants_requested
|
||||
else config.get(
|
||||
"services.events.events_retrieval.debug_images.allow_uninitialized_variants",
|
||||
False,
|
||||
)
|
||||
)
|
||||
|
||||
def is_valid_event(event: dict) -> bool:
|
||||
key = event.get("metric"), event.get("variant")
|
||||
if key not in invalid_iterations:
|
||||
return False
|
||||
return allow_uninitialized
|
||||
|
||||
max_invalid = invalid_iterations[key]
|
||||
return max_invalid is None or event.get("iter") > max_invalid
|
||||
|
||||
def get_iteration_events(it_: dict) -> Sequence:
|
||||
return [
|
||||
ev["_source"]
|
||||
for m in dpath.get(it_, "metrics/buckets")
|
||||
for v in dpath.get(m, "variants/buckets")
|
||||
for ev in dpath.get(v, "events/hits/hits")
|
||||
self._process_event(ev["_source"])
|
||||
for m in nested_get(it_, ("metrics", "buckets"))
|
||||
for v in nested_get(m, ("variants", "buckets"))
|
||||
for ev in nested_get(v, ("events", "hits", "hits"))
|
||||
if is_valid_event(ev["_source"])
|
||||
]
|
||||
|
||||
iterations = []
|
||||
for it in dpath.get(es_res, "aggregations/iters/buckets"):
|
||||
for it in nested_get(es_res, ("aggregations", "iters", "buckets")):
|
||||
events = get_iteration_events(it)
|
||||
if events:
|
||||
iterations.append({"iter": it["key"], "events": events})
|
||||
25
apiserver/bll/event/metric_plots_iterator.py
Normal file
25
apiserver/bll/event/metric_plots_iterator.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from typing import Sequence
|
||||
|
||||
from elasticsearch import Elasticsearch
|
||||
from redis.client import StrictRedis
|
||||
|
||||
from .event_common import EventType, uncompress_plot
|
||||
from .metric_events_iterator import MetricEventsIterator
|
||||
|
||||
|
||||
class MetricPlotsIterator(MetricEventsIterator):
|
||||
def __init__(self, redis: StrictRedis, es: Elasticsearch):
|
||||
super().__init__(redis, es, EventType.metrics_plot)
|
||||
|
||||
def _get_extra_conditions(self) -> Sequence[dict]:
|
||||
return []
|
||||
|
||||
def _get_variant_state_aggs(self):
|
||||
return None, None
|
||||
|
||||
def _process_event(self, event: dict) -> dict:
|
||||
uncompress_plot(event)
|
||||
return event
|
||||
|
||||
def _get_same_variant_events_order(self) -> dict:
|
||||
return {"timestamp": {"order": "desc"}}
|
||||
@@ -1,12 +1,15 @@
|
||||
from datetime import datetime
|
||||
from typing import Callable, Tuple
|
||||
from typing import Callable, Tuple, Sequence, Dict, Optional
|
||||
|
||||
from mongoengine import Q
|
||||
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.apimodels.models import ModelTaskPublishResponse
|
||||
from apiserver.bll.task.utils import deleted_prefix
|
||||
from apiserver.bll.task.utils import deleted_prefix, get_last_metric_updates
|
||||
from apiserver.database.model import EntityVisibility
|
||||
from apiserver.database.model.model import Model
|
||||
from apiserver.database.model.task.task import Task, TaskStatus
|
||||
from apiserver.service_repo.auth import Identity
|
||||
from .metadata import Metadata
|
||||
|
||||
|
||||
@@ -24,18 +27,43 @@ class ModelBLL:
|
||||
raise errors.bad_request.InvalidModelId(**query)
|
||||
return model
|
||||
|
||||
@staticmethod
|
||||
def assert_exists(
|
||||
company_id, model_ids, only=None, allow_public=False, return_models=True,
|
||||
) -> Optional[Sequence[Model]]:
|
||||
model_ids = [model_ids] if isinstance(model_ids, str) else model_ids
|
||||
ids = set(model_ids)
|
||||
query = Q(id__in=ids)
|
||||
|
||||
q = Model.get_many(
|
||||
company=company_id,
|
||||
query=query,
|
||||
allow_public=allow_public,
|
||||
return_dicts=False,
|
||||
)
|
||||
if only:
|
||||
q = q.only(*only)
|
||||
|
||||
if q.count() != len(ids):
|
||||
raise errors.bad_request.InvalidModelId(ids=model_ids)
|
||||
|
||||
if return_models:
|
||||
return list(q)
|
||||
|
||||
@classmethod
|
||||
def publish_model(
|
||||
cls,
|
||||
model_id: str,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
force_publish_task: bool = False,
|
||||
publish_task_func: Callable[[str, str, bool], dict] = None,
|
||||
publish_task_func: Callable[[str, str, Identity, bool], dict] = None,
|
||||
) -> Tuple[int, ModelTaskPublishResponse]:
|
||||
model = cls.get_company_model_by_id(company_id=company_id, model_id=model_id)
|
||||
if model.ready:
|
||||
raise errors.bad_request.ModelIsReady(company=company_id, model=model_id)
|
||||
|
||||
user_id = identity.user
|
||||
published_task = None
|
||||
if model.task and publish_task_func:
|
||||
task = (
|
||||
@@ -45,18 +73,25 @@ class ModelBLL:
|
||||
)
|
||||
if task and task.status != TaskStatus.published:
|
||||
task_publish_res = publish_task_func(
|
||||
model.task, company_id, force_publish_task
|
||||
model.task, company_id, identity, force_publish_task
|
||||
)
|
||||
published_task = ModelTaskPublishResponse(
|
||||
id=model.task, data=task_publish_res
|
||||
)
|
||||
|
||||
updated = model.update(upsert=False, ready=True, last_update=datetime.utcnow())
|
||||
now = datetime.utcnow()
|
||||
updated = model.update(
|
||||
upsert=False,
|
||||
ready=True,
|
||||
last_update=now,
|
||||
last_change=now,
|
||||
last_changed_by=user_id,
|
||||
)
|
||||
return updated, published_task
|
||||
|
||||
@classmethod
|
||||
def delete_model(
|
||||
cls, model_id: str, company_id: str, force: bool
|
||||
cls, model_id: str, company_id: str, user_id: str, force: bool
|
||||
) -> Tuple[int, Model]:
|
||||
model = cls.get_company_model_by_id(
|
||||
company_id=company_id,
|
||||
@@ -82,49 +117,121 @@ class ModelBLL:
|
||||
|
||||
if model.task:
|
||||
task = Task.objects(id=model.task).first()
|
||||
if task and task.status == TaskStatus.published:
|
||||
if not force:
|
||||
raise errors.bad_request.ModelCreatingTaskExists(
|
||||
"and published, use force=True to delete", task=model.task
|
||||
)
|
||||
if task.models.output and model_id in task.models.output:
|
||||
now = datetime.utcnow()
|
||||
if task:
|
||||
now = datetime.utcnow()
|
||||
if task.status == TaskStatus.published:
|
||||
if not force:
|
||||
raise errors.bad_request.ModelCreatingTaskExists(
|
||||
"and published, use force=True to delete", task=model.task
|
||||
)
|
||||
Task._get_collection().update_one(
|
||||
filter={"_id": model.task, "models.output.model": model_id},
|
||||
update={
|
||||
"$set": {
|
||||
"models.output.$[elem].model": deleted_model_id,
|
||||
"output.error": f"model deleted on {now.isoformat()}",
|
||||
"last_change": now,
|
||||
"last_changed_by": user_id,
|
||||
},
|
||||
"last_change": now,
|
||||
},
|
||||
array_filters=[{"elem.model": model_id}],
|
||||
upsert=False,
|
||||
)
|
||||
else:
|
||||
task.update(
|
||||
pull__models__output__model=model_id,
|
||||
set__last_change=now,
|
||||
set__last_changed_by=user_id,
|
||||
)
|
||||
|
||||
del_count = Model.objects(id=model_id, company=company_id).delete()
|
||||
return del_count, model
|
||||
|
||||
@classmethod
|
||||
def archive_model(cls, model_id: str, company_id: str):
|
||||
def archive_model(cls, model_id: str, company_id: str, user_id: str):
|
||||
cls.get_company_model_by_id(
|
||||
company_id=company_id, model_id=model_id, only_fields=("id",)
|
||||
)
|
||||
now = datetime.utcnow()
|
||||
archived = Model.objects(company=company_id, id=model_id).update(
|
||||
add_to_set__system_tags=EntityVisibility.archived.value,
|
||||
last_update=datetime.utcnow(),
|
||||
last_change=now,
|
||||
last_changed_by=user_id,
|
||||
)
|
||||
|
||||
return archived
|
||||
|
||||
@classmethod
|
||||
def unarchive_model(cls, model_id: str, company_id: str):
|
||||
def unarchive_model(cls, model_id: str, company_id: str, user_id: str):
|
||||
cls.get_company_model_by_id(
|
||||
company_id=company_id, model_id=model_id, only_fields=("id",)
|
||||
)
|
||||
now = datetime.utcnow()
|
||||
unarchived = Model.objects(company=company_id, id=model_id).update(
|
||||
pull__system_tags=EntityVisibility.archived.value,
|
||||
last_update=datetime.utcnow(),
|
||||
last_change=now,
|
||||
last_changed_by=user_id,
|
||||
)
|
||||
|
||||
return unarchived
|
||||
|
||||
@classmethod
|
||||
def get_model_stats(
|
||||
cls, company: str, model_ids: Sequence[str],
|
||||
) -> Dict[str, dict]:
|
||||
if not model_ids:
|
||||
return {}
|
||||
|
||||
result = Model.aggregate(
|
||||
[
|
||||
{
|
||||
"$match": {
|
||||
"company": {"$in": ["", company]},
|
||||
"_id": {"$in": model_ids},
|
||||
}
|
||||
},
|
||||
{
|
||||
"$addFields": {
|
||||
"labels_count": {"$size": {"$objectToArray": "$labels"}}
|
||||
}
|
||||
},
|
||||
{"$project": {"labels_count": 1}},
|
||||
]
|
||||
)
|
||||
return {r.pop("_id"): r for r in result}
|
||||
|
||||
@staticmethod
|
||||
def update_statistics(
|
||||
company_id: str,
|
||||
user_id: str,
|
||||
model_id: str,
|
||||
last_update: datetime = None,
|
||||
last_iteration_max: int = None,
|
||||
last_scalar_events: Dict[str, Dict[str, dict]] = None,
|
||||
):
|
||||
last_update = last_update or datetime.utcnow()
|
||||
updates = {
|
||||
"last_update": datetime.utcnow(),
|
||||
"last_change": last_update,
|
||||
"last_changed_by": user_id,
|
||||
}
|
||||
if last_iteration_max is not None:
|
||||
updates.update(max__last_iteration=last_iteration_max)
|
||||
|
||||
raw_updates = {}
|
||||
if last_scalar_events is not None:
|
||||
raw_updates = {}
|
||||
if last_scalar_events is not None:
|
||||
get_last_metric_updates(
|
||||
task_id=model_id,
|
||||
last_scalar_events=last_scalar_events,
|
||||
raw_updates=raw_updates,
|
||||
extra_updates=updates,
|
||||
model_events=True,
|
||||
)
|
||||
|
||||
ret = Model.objects(id=model_id).update_one(**updates)
|
||||
if ret and raw_updates:
|
||||
Model.objects(id=model_id).update_one(__raw__=[{"$set": raw_updates}])
|
||||
|
||||
return ret
|
||||
|
||||
@@ -5,13 +5,11 @@ from mongoengine import Document
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.apimodels.metadata import MetadataItem
|
||||
from apiserver.database.model.base import GetMixin
|
||||
from apiserver.service_repo import APICall
|
||||
from apiserver.utilities.parameter_key_escaper import (
|
||||
ParameterKeyEscaper,
|
||||
mongoengine_safe,
|
||||
)
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.timing_context import TimingContext
|
||||
|
||||
log = config.logger(__file__)
|
||||
|
||||
@@ -42,27 +40,25 @@ class Metadata:
|
||||
replace_metadata: bool,
|
||||
**more_updates,
|
||||
) -> int:
|
||||
with TimingContext("mongo", "edit_metadata"):
|
||||
update_cmds = dict()
|
||||
metadata = cls.metadata_from_api(items)
|
||||
if replace_metadata:
|
||||
update_cmds["set__metadata"] = metadata
|
||||
else:
|
||||
for key, value in metadata.items():
|
||||
update_cmds[f"set__metadata__{mongoengine_safe(key)}"] = value
|
||||
update_cmds = dict()
|
||||
metadata = cls.metadata_from_api(items)
|
||||
if replace_metadata:
|
||||
update_cmds["set__metadata"] = metadata
|
||||
else:
|
||||
for key, value in metadata.items():
|
||||
update_cmds[f"set__metadata__{mongoengine_safe(key)}"] = value
|
||||
|
||||
return obj.update(**update_cmds, **more_updates)
|
||||
return obj.update(**update_cmds, **more_updates)
|
||||
|
||||
@classmethod
|
||||
def delete_metadata(cls, obj: Document, keys: Sequence[str], **more_updates) -> int:
|
||||
with TimingContext("mongo", "delete_metadata"):
|
||||
return obj.update(
|
||||
**{
|
||||
f"unset__metadata__{ParameterKeyEscaper.escape(key)}": 1
|
||||
for key in set(keys)
|
||||
},
|
||||
**more_updates,
|
||||
)
|
||||
return obj.update(
|
||||
**{
|
||||
f"unset__metadata__{ParameterKeyEscaper.escape(key)}": 1
|
||||
for key in set(keys)
|
||||
},
|
||||
**more_updates,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _process_path(path: str):
|
||||
@@ -90,13 +86,13 @@ class Metadata:
|
||||
return paths
|
||||
|
||||
@classmethod
|
||||
def escape_query_parameters(cls, call: APICall) -> dict:
|
||||
if not call.data:
|
||||
return call.data
|
||||
def escape_query_parameters(cls, call_data: dict) -> dict:
|
||||
if not call_data:
|
||||
return call_data
|
||||
|
||||
keys = list(call.data)
|
||||
keys = list(call_data)
|
||||
call_data = {
|
||||
safe_key: call.data[key]
|
||||
safe_key: call_data[key]
|
||||
for key, safe_key in zip(keys, Metadata.escape_paths(keys))
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Sequence, Dict
|
||||
from typing import Sequence, Dict, Type
|
||||
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.bll.util import update_project_time
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.model.model import AttributedDocument
|
||||
from apiserver.database.model.model import Model
|
||||
from apiserver.database.model.task.task import Task
|
||||
from apiserver.redis_manager import redman
|
||||
@@ -22,6 +26,56 @@ class OrgBLL:
|
||||
self._task_tags = _TagsCache(Task, self.redis)
|
||||
self._model_tags = _TagsCache(Model, self.redis)
|
||||
|
||||
def edit_entity_tags(
|
||||
self,
|
||||
company_id,
|
||||
user_id: str,
|
||||
entity_cls: Type[AttributedDocument],
|
||||
entity_ids: Sequence[str],
|
||||
add_tags: Sequence[str],
|
||||
remove_tags: Sequence[str],
|
||||
) -> int:
|
||||
if entity_cls not in (Task, Model):
|
||||
raise errors.bad_request.ValidationError(
|
||||
"Tags editing can be called on tasks or models only"
|
||||
)
|
||||
if not entity_ids:
|
||||
raise errors.bad_request.ValidationError(
|
||||
"No entity ids provided for editing tags"
|
||||
)
|
||||
if not (add_tags or remove_tags):
|
||||
raise errors.bad_request.ValidationError(
|
||||
"Either add tags or remove tags should be provided"
|
||||
)
|
||||
|
||||
updated = 0
|
||||
last_changed = {
|
||||
"set__last_change": datetime.utcnow(),
|
||||
"set__last_changed_by": user_id,
|
||||
}
|
||||
if add_tags:
|
||||
updated += entity_cls.objects(company=company_id, id__in=entity_ids).update(
|
||||
add_to_set__tags=add_tags, **last_changed,
|
||||
)
|
||||
if remove_tags:
|
||||
updated += entity_cls.objects(company=company_id, id__in=entity_ids).update(
|
||||
pull_all__tags=remove_tags, **last_changed,
|
||||
)
|
||||
if not updated:
|
||||
return 0
|
||||
|
||||
projects = entity_cls.objects(company=company_id, id__in=entity_ids).distinct(
|
||||
"project"
|
||||
)
|
||||
update_project_time(project_ids=projects)
|
||||
self.update_tags(
|
||||
company_id,
|
||||
entity=Tags.Task if entity_cls is Task else Tags.Model,
|
||||
projects=projects,
|
||||
tags=add_tags or remove_tags
|
||||
)
|
||||
return updated
|
||||
|
||||
def get_tags(
|
||||
self,
|
||||
company_id: str,
|
||||
@@ -50,10 +104,10 @@ class OrgBLL:
|
||||
return ret
|
||||
|
||||
def update_tags(
|
||||
self, company_id: str, entity: Tags, project: str, tags=None, system_tags=None,
|
||||
self, company_id: str, entity: Tags, projects: Sequence[str], tags=None, system_tags=None,
|
||||
):
|
||||
tags_cache = self._get_tags_cache_for_entity(entity)
|
||||
tags_cache.update_tags(company_id, project, tags, system_tags)
|
||||
tags_cache.update_tags(company_id, projects, tags, system_tags)
|
||||
|
||||
def reset_tags(self, company_id: str, entity: Tags, projects: Sequence[str]):
|
||||
tags_cache = self._get_tags_cache_for_entity(entity)
|
||||
|
||||
@@ -42,6 +42,8 @@ class _TagsCache:
|
||||
query &= GetMixin.get_list_field_query(name, vals)
|
||||
if project:
|
||||
query &= Q(project__in=project_ids_with_children([project]))
|
||||
# else:
|
||||
# query &= Q(system_tags__nin=[EntityVisibility.hidden.value])
|
||||
|
||||
return self.db_cls.objects(query).distinct(field)
|
||||
|
||||
@@ -104,7 +106,7 @@ class _TagsCache:
|
||||
|
||||
return ret
|
||||
|
||||
def update_tags(self, company_id: str, project: str, tags=None, system_tags=None):
|
||||
def update_tags(self, company_id: str, projects: Sequence[str], tags=None, system_tags=None):
|
||||
"""
|
||||
Updates tags. If reset is set then both tags and system_tags
|
||||
are recalculated. Otherwise only those that are not 'None'
|
||||
@@ -120,7 +122,7 @@ class _TagsCache:
|
||||
if not fields:
|
||||
return
|
||||
|
||||
self._delete_redis_keys(company_id, projects=[project], fields=fields)
|
||||
self._delete_redis_keys(company_id, projects=projects, fields=fields)
|
||||
|
||||
def reset_tags(self, company_id: str, projects: Sequence[str]):
|
||||
self._delete_redis_keys(
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,20 +1,30 @@
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from typing import Tuple, Set, Sequence
|
||||
|
||||
import attr
|
||||
from mongoengine import Q
|
||||
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.bll.event import EventBLL
|
||||
from apiserver.bll.task.task_cleanup import (
|
||||
collect_debug_image_urls,
|
||||
collect_plot_image_urls,
|
||||
TaskUrls,
|
||||
schedule_for_delete,
|
||||
delete_task_events_and_collect_urls,
|
||||
)
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.model import EntityVisibility
|
||||
from apiserver.database.model.model import Model
|
||||
from apiserver.database.model.project import Project
|
||||
from apiserver.database.model.task.task import Task, ArtifactModes
|
||||
from apiserver.timing_context import TimingContext
|
||||
from apiserver.database.model.task.task import Task, ArtifactModes, TaskType, TaskStatus
|
||||
from .project_bll import (
|
||||
ProjectBLL,
|
||||
pipeline_tag,
|
||||
pipelines_project_name,
|
||||
dataset_tag,
|
||||
datasets_project_name,
|
||||
reports_tag,
|
||||
)
|
||||
from .sub_projects import _ids_with_children
|
||||
|
||||
log = config.logger(__file__)
|
||||
@@ -30,68 +40,171 @@ class DeleteProjectResult:
|
||||
urls: TaskUrls = None
|
||||
|
||||
|
||||
def _get_child_project_ids(
|
||||
project_id: str,
|
||||
) -> Tuple[Sequence[str], Sequence[str], Sequence[str]]:
|
||||
project_ids = _ids_with_children([project_id])
|
||||
pipeline_ids = list(
|
||||
Project.objects(
|
||||
id__in=project_ids,
|
||||
system_tags__in=[pipeline_tag],
|
||||
basename__ne=pipelines_project_name,
|
||||
).scalar("id")
|
||||
)
|
||||
dataset_ids = list(
|
||||
Project.objects(
|
||||
id__in=project_ids,
|
||||
system_tags__in=[dataset_tag],
|
||||
basename__ne=datasets_project_name,
|
||||
).scalar("id")
|
||||
)
|
||||
return project_ids, pipeline_ids, dataset_ids
|
||||
|
||||
|
||||
def validate_project_delete(company: str, project_id: str):
|
||||
project = Project.get_for_writing(
|
||||
company=company, id=project_id, _only=("id", "path")
|
||||
company=company, id=project_id, _only=("id", "path", "system_tags")
|
||||
)
|
||||
if not project:
|
||||
raise errors.bad_request.InvalidProjectId(id=project_id)
|
||||
|
||||
project_ids = _ids_with_children([project_id])
|
||||
project_ids, pipeline_ids, dataset_ids = _get_child_project_ids(project_id)
|
||||
ret = {}
|
||||
for cls in (Task, Model):
|
||||
ret[f"{cls.__name__.lower()}s"] = cls.objects(
|
||||
project__in=project_ids,
|
||||
).count()
|
||||
for cls in (Task, Model):
|
||||
ret[f"non_archived_{cls.__name__.lower()}s"] = cls.objects(
|
||||
project__in=project_ids,
|
||||
if pipeline_ids:
|
||||
pipelines_with_active_controllers = Task.objects(
|
||||
project__in=pipeline_ids,
|
||||
type=TaskType.controller,
|
||||
system_tags__nin=[EntityVisibility.archived.value],
|
||||
).distinct("project")
|
||||
ret["pipelines"] = len(pipelines_with_active_controllers)
|
||||
else:
|
||||
ret["pipelines"] = 0
|
||||
if dataset_ids:
|
||||
datasets_with_data = Task.objects(
|
||||
project__in=dataset_ids,
|
||||
system_tags__nin=[EntityVisibility.archived.value],
|
||||
).distinct("project")
|
||||
ret["datasets"] = len(datasets_with_data)
|
||||
else:
|
||||
ret["datasets"] = 0
|
||||
|
||||
project_ids = list(set(project_ids) - set(pipeline_ids) - set(dataset_ids))
|
||||
if project_ids:
|
||||
in_project_query = Q(project__in=project_ids)
|
||||
for cls in (Task, Model):
|
||||
query = (
|
||||
in_project_query & Q(system_tags__nin=[reports_tag])
|
||||
if cls is Task
|
||||
else in_project_query
|
||||
)
|
||||
ret[f"{cls.__name__.lower()}s"] = cls.objects(query).count()
|
||||
ret[f"non_archived_{cls.__name__.lower()}s"] = cls.objects(
|
||||
query & Q(system_tags__nin=[EntityVisibility.archived.value])
|
||||
).count()
|
||||
ret["reports"] = Task.objects(
|
||||
in_project_query & Q(system_tags__in=[reports_tag])
|
||||
).count()
|
||||
ret["non_archived_reports"] = Task.objects(
|
||||
in_project_query
|
||||
& Q(
|
||||
system_tags__in=[reports_tag],
|
||||
system_tags__nin=[EntityVisibility.archived.value],
|
||||
)
|
||||
).count()
|
||||
else:
|
||||
for cls in (Task, Model):
|
||||
ret[f"{cls.__name__.lower()}s"] = 0
|
||||
ret[f"non_archived_{cls.__name__.lower()}s"] = 0
|
||||
ret["reports"] = 0
|
||||
ret["non_archived_reports"] = 0
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def delete_project(
|
||||
company: str, project_id: str, force: bool, delete_contents: bool
|
||||
company: str,
|
||||
user: str,
|
||||
project_id: str,
|
||||
force: bool,
|
||||
delete_contents: bool,
|
||||
delete_external_artifacts: bool,
|
||||
) -> Tuple[DeleteProjectResult, Set[str]]:
|
||||
project = Project.get_for_writing(
|
||||
company=company, id=project_id, _only=("id", "path")
|
||||
company=company, id=project_id, _only=("id", "path", "system_tags")
|
||||
)
|
||||
if not project:
|
||||
raise errors.bad_request.InvalidProjectId(id=project_id)
|
||||
|
||||
project_ids = _ids_with_children([project_id])
|
||||
delete_external_artifacts = delete_external_artifacts and config.get(
|
||||
"services.async_urls_delete.enabled", True
|
||||
)
|
||||
project_ids, pipeline_ids, dataset_ids = _get_child_project_ids(project_id)
|
||||
if not force:
|
||||
for cls, error in (
|
||||
(Task, errors.bad_request.ProjectHasTasks),
|
||||
(Model, errors.bad_request.ProjectHasModels),
|
||||
):
|
||||
non_archived = cls.objects(
|
||||
project__in=project_ids,
|
||||
if pipeline_ids:
|
||||
active_controllers = Task.objects(
|
||||
project__in=pipeline_ids,
|
||||
type=TaskType.controller,
|
||||
system_tags__nin=[EntityVisibility.archived.value],
|
||||
).only("id")
|
||||
if non_archived:
|
||||
raise error("use force=true to delete", id=project_id)
|
||||
if active_controllers:
|
||||
raise errors.bad_request.ProjectHasPipelines(
|
||||
"please archive all the controllers or use force=true",
|
||||
id=project_id,
|
||||
)
|
||||
if dataset_ids:
|
||||
datasets_with_data = Task.objects(
|
||||
project__in=dataset_ids,
|
||||
system_tags__nin=[EntityVisibility.archived.value],
|
||||
).only("id")
|
||||
if datasets_with_data:
|
||||
raise errors.bad_request.ProjectHasDatasets(
|
||||
"please delete all the dataset versions or use force=true",
|
||||
id=project_id,
|
||||
)
|
||||
|
||||
regular_projects = list(set(project_ids) - set(pipeline_ids) - set(dataset_ids))
|
||||
if regular_projects:
|
||||
for cls, error in (
|
||||
(Task, errors.bad_request.ProjectHasTasks),
|
||||
(Model, errors.bad_request.ProjectHasModels),
|
||||
):
|
||||
non_archived = cls.objects(
|
||||
project__in=regular_projects,
|
||||
system_tags__nin=[EntityVisibility.archived.value],
|
||||
).only("id")
|
||||
if non_archived:
|
||||
raise error("use force=true", id=project_id)
|
||||
|
||||
if not delete_contents:
|
||||
with TimingContext("mongo", "update_children"):
|
||||
for cls in (Model, Task):
|
||||
updated_count = cls.objects(project__in=project_ids).update(
|
||||
project=None
|
||||
)
|
||||
res = DeleteProjectResult(disassociated_tasks=updated_count)
|
||||
disassociated = defaultdict(int)
|
||||
for cls in ProjectBLL.child_classes:
|
||||
disassociated[cls] = cls.objects(project__in=project_ids).update(
|
||||
project=None
|
||||
)
|
||||
res = DeleteProjectResult(disassociated_tasks=disassociated[Task])
|
||||
else:
|
||||
deleted_models, model_urls = _delete_models(projects=project_ids)
|
||||
deleted_tasks, event_urls, artifact_urls = _delete_tasks(
|
||||
company=company, projects=project_ids
|
||||
deleted_models, model_event_urls, model_urls = _delete_models(
|
||||
company=company, user=user, projects=project_ids
|
||||
)
|
||||
deleted_tasks, task_event_urls, artifact_urls = _delete_tasks(
|
||||
company=company, user=user, projects=project_ids
|
||||
)
|
||||
event_urls = task_event_urls | model_event_urls
|
||||
if delete_external_artifacts:
|
||||
scheduled = schedule_for_delete(
|
||||
task_id=project_id,
|
||||
company=company,
|
||||
user=user,
|
||||
urls=event_urls | model_urls | artifact_urls,
|
||||
can_delete_folders=True,
|
||||
)
|
||||
for urls in (event_urls, model_urls, artifact_urls):
|
||||
urls.difference_update(scheduled)
|
||||
res = DeleteProjectResult(
|
||||
deleted_tasks=deleted_tasks,
|
||||
deleted_models=deleted_models,
|
||||
urls=TaskUrls(
|
||||
model_urls=list(model_urls),
|
||||
event_urls=list(event_urls),
|
||||
artifact_urls=list(artifact_urls),
|
||||
),
|
||||
)
|
||||
@@ -102,7 +215,9 @@ def delete_project(
|
||||
return res, affected
|
||||
|
||||
|
||||
def _delete_tasks(company: str, projects: Sequence[str]) -> Tuple[int, Set, Set]:
|
||||
def _delete_tasks(
|
||||
company: str, user: str, projects: Sequence[str]
|
||||
) -> Tuple[int, Set, Set]:
|
||||
"""
|
||||
Delete only the task themselves and their non published version.
|
||||
Child models under the same project are deleted separately.
|
||||
@@ -113,15 +228,21 @@ def _delete_tasks(company: str, projects: Sequence[str]) -> Tuple[int, Set, Set]
|
||||
if not tasks:
|
||||
return 0, set(), set()
|
||||
|
||||
task_ids = {t.id for t in tasks}
|
||||
with TimingContext("mongo", "delete_tasks_update_children"):
|
||||
Task.objects(parent__in=task_ids, project__nin=projects).update(parent=None)
|
||||
Model.objects(task__in=task_ids, project__nin=projects).update(task=None)
|
||||
task_ids = list({t.id for t in tasks})
|
||||
now = datetime.utcnow()
|
||||
Task.objects(parent__in=task_ids, project__nin=projects).update(
|
||||
parent=None,
|
||||
last_change=now,
|
||||
last_changed_by=user,
|
||||
)
|
||||
Model.objects(task__in=task_ids, project__nin=projects).update(
|
||||
task=None,
|
||||
last_change=now,
|
||||
last_changed_by=user,
|
||||
)
|
||||
|
||||
event_urls, artifact_urls = set(), set()
|
||||
artifact_urls = set()
|
||||
for task in tasks:
|
||||
event_urls.update(collect_debug_image_urls(company, task.id))
|
||||
event_urls.update(collect_plot_image_urls(company, task.id))
|
||||
if task.execution and task.execution.artifacts:
|
||||
artifact_urls.update(
|
||||
{
|
||||
@@ -131,46 +252,73 @@ def _delete_tasks(company: str, projects: Sequence[str]) -> Tuple[int, Set, Set]
|
||||
}
|
||||
)
|
||||
|
||||
event_bll.delete_multi_task_events(company, list(task_ids))
|
||||
event_urls = delete_task_events_and_collect_urls(
|
||||
company=company, task_ids=task_ids, wait_for_delete=False
|
||||
)
|
||||
deleted = tasks.delete()
|
||||
|
||||
return deleted, event_urls, artifact_urls
|
||||
|
||||
|
||||
def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]:
|
||||
def _delete_models(
|
||||
company: str, user: str, projects: Sequence[str]
|
||||
) -> Tuple[int, Set[str], Set[str]]:
|
||||
"""
|
||||
Delete project models and update the tasks from other projects
|
||||
that reference them to reference None.
|
||||
"""
|
||||
with TimingContext("mongo", "delete_models"):
|
||||
models = Model.objects(project__in=projects).only("task", "id", "uri")
|
||||
if not models:
|
||||
return 0, set()
|
||||
models = Model.objects(project__in=projects).only("task", "id", "uri")
|
||||
if not models:
|
||||
return 0, set(), set()
|
||||
|
||||
model_ids = list({m.id for m in models})
|
||||
model_ids = list({m.id for m in models})
|
||||
deleted = "__DELETED__"
|
||||
Task._get_collection().update_many(
|
||||
filter={
|
||||
"project": {"$nin": projects},
|
||||
"models.input.model": {"$in": model_ids},
|
||||
},
|
||||
update={"$set": {"models.input.$[elem].model": deleted}},
|
||||
array_filters=[{"elem.model": {"$in": model_ids}}],
|
||||
upsert=False,
|
||||
)
|
||||
|
||||
model_tasks = list({m.task for m in models if m.task})
|
||||
if model_tasks:
|
||||
now = datetime.utcnow()
|
||||
# update published tasks
|
||||
Task._get_collection().update_many(
|
||||
filter={
|
||||
"_id": {"$in": model_tasks},
|
||||
"project": {"$nin": projects},
|
||||
"models.input.model": {"$in": model_ids},
|
||||
"models.output.model": {"$in": model_ids},
|
||||
"status": TaskStatus.published,
|
||||
},
|
||||
update={
|
||||
"$set": {
|
||||
"models.output.$[elem].model": deleted,
|
||||
"last_change": now,
|
||||
"last_changed_by": user,
|
||||
}
|
||||
},
|
||||
update={"$set": {"models.input.$[elem].model": None}},
|
||||
array_filters=[{"elem.model": {"$in": model_ids}}],
|
||||
upsert=False,
|
||||
)
|
||||
# update unpublished tasks
|
||||
Task.objects(
|
||||
id__in=model_tasks,
|
||||
project__nin=projects,
|
||||
status__ne=TaskStatus.published,
|
||||
).update(
|
||||
pull__models__output__model__in=model_ids,
|
||||
set__last_change=now,
|
||||
set__last_changed_by=user,
|
||||
)
|
||||
|
||||
model_tasks = list({m.task for m in models if m.task})
|
||||
if model_tasks:
|
||||
Task._get_collection().update_many(
|
||||
filter={
|
||||
"_id": {"$in": model_tasks},
|
||||
"project": {"$nin": projects},
|
||||
"models.output.model": {"$in": model_ids},
|
||||
},
|
||||
update={"$set": {"models.output.$[elem].model": None}},
|
||||
array_filters=[{"elem.model": {"$in": model_ids}}],
|
||||
upsert=False,
|
||||
)
|
||||
model_urls = {m.uri for m in models if m.uri}
|
||||
event_urls = delete_task_events_and_collect_urls(
|
||||
company=company, task_ids=model_ids, model=True, wait_for_delete=False
|
||||
)
|
||||
deleted = models.delete()
|
||||
|
||||
urls = {m.uri for m in models if m.uri}
|
||||
deleted = models.delete()
|
||||
return deleted, urls
|
||||
return deleted, event_urls, model_urls
|
||||
|
||||
@@ -47,7 +47,7 @@ class ProjectQueries:
|
||||
@staticmethod
|
||||
def _get_company_constraint(company_id: str, allow_public: bool = True) -> dict:
|
||||
if allow_public:
|
||||
return {"company": {"$in": [None, "", company_id]}}
|
||||
return {"company": {"$in": ["", company_id]}}
|
||||
|
||||
return {"company": company_id}
|
||||
|
||||
@@ -140,7 +140,12 @@ class ProjectQueries:
|
||||
name: str,
|
||||
include_subprojects: bool,
|
||||
allow_public: bool = True,
|
||||
pattern: str = None,
|
||||
page: int = 0,
|
||||
page_size: int = 500,
|
||||
) -> ParamValues:
|
||||
page = max(0, page)
|
||||
page_size = max(1, page_size)
|
||||
company_constraint = self._get_company_constraint(company_id, allow_public)
|
||||
project_constraint = self._get_project_constraint(
|
||||
project_ids, include_subprojects
|
||||
@@ -160,7 +165,20 @@ class ProjectQueries:
|
||||
if not last_updated_task:
|
||||
return 0, []
|
||||
|
||||
redis_key = f"hyperparam_values_{company_id}_{'_'.join(project_ids)}_{section}_{name}_{allow_public}"
|
||||
redis_key = "_".join(
|
||||
str(part)
|
||||
for part in (
|
||||
"hyperparam_values",
|
||||
company_id,
|
||||
"_".join(project_ids),
|
||||
section,
|
||||
name,
|
||||
allow_public,
|
||||
pattern,
|
||||
page,
|
||||
page_size,
|
||||
)
|
||||
)
|
||||
last_update = last_updated_task.last_update or datetime.utcnow()
|
||||
cached_res = self._get_cached_param_values(
|
||||
key=redis_key,
|
||||
@@ -172,19 +190,27 @@ class ProjectQueries:
|
||||
if cached_res:
|
||||
return cached_res
|
||||
|
||||
max_values = config.get("services.tasks.hyperparam_values.max_count", 100)
|
||||
pipeline = [
|
||||
{
|
||||
"$match": {
|
||||
**company_constraint,
|
||||
**project_constraint,
|
||||
key_path: {"$exists": True},
|
||||
match_condition = {
|
||||
**company_constraint,
|
||||
**project_constraint,
|
||||
key_path: {"$exists": True},
|
||||
}
|
||||
if pattern:
|
||||
match_condition["$expr"] = {
|
||||
"$regexMatch": {
|
||||
"input": f"${key_path}.value",
|
||||
"regex": pattern,
|
||||
"options": "i",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
pipeline = [
|
||||
{"$match": match_condition},
|
||||
{"$project": {"value": f"${key_path}.value"}},
|
||||
{"$group": {"_id": "$value"}},
|
||||
{"$sort": {"_id": 1}},
|
||||
{"$limit": max_values},
|
||||
{"$skip": page * page_size},
|
||||
{"$limit": page_size},
|
||||
{
|
||||
"$group": {
|
||||
"_id": 1,
|
||||
@@ -209,13 +235,19 @@ class ProjectQueries:
|
||||
|
||||
@classmethod
|
||||
def get_unique_metric_variants(
|
||||
cls, company_id, project_ids: Sequence[str], include_subprojects: bool
|
||||
cls,
|
||||
company_id,
|
||||
project_ids: Sequence[str],
|
||||
include_subprojects: bool,
|
||||
ids: Sequence[str],
|
||||
model_metrics: bool = False,
|
||||
):
|
||||
pipeline = [
|
||||
{
|
||||
"$match": {
|
||||
**cls._get_company_constraint(company_id),
|
||||
**cls._get_project_constraint(project_ids, include_subprojects),
|
||||
**({"_id": {"$in": ids}} if ids else {}),
|
||||
}
|
||||
},
|
||||
{"$project": {"metrics": {"$objectToArray": "$last_metrics"}}},
|
||||
@@ -246,7 +278,8 @@ class ProjectQueries:
|
||||
{"$sort": OrderedDict({"_id.metric": 1, "_id.variant": 1})},
|
||||
]
|
||||
|
||||
result = Task.aggregate(pipeline)
|
||||
entity_cls = Model if model_metrics else Task
|
||||
result = entity_cls.aggregate(pipeline)
|
||||
return [r["metrics"][0] for r in result]
|
||||
|
||||
@classmethod
|
||||
@@ -306,7 +339,11 @@ class ProjectQueries:
|
||||
key: str,
|
||||
include_subprojects: bool,
|
||||
allow_public: bool = True,
|
||||
page: int = 0,
|
||||
page_size: int = 500,
|
||||
) -> ParamValues:
|
||||
page = max(0, page)
|
||||
page_size = max(1, page_size)
|
||||
company_constraint = self._get_company_constraint(company_id, allow_public)
|
||||
project_constraint = self._get_project_constraint(
|
||||
project_ids, include_subprojects
|
||||
@@ -326,7 +363,7 @@ class ProjectQueries:
|
||||
if not last_updated_model:
|
||||
return 0, []
|
||||
|
||||
redis_key = f"modelmetadata_values_{company_id}_{'_'.join(project_ids)}_{key}_{allow_public}"
|
||||
redis_key = f"modelmetadata_values_{company_id}_{'_'.join(project_ids)}_{key}_{allow_public}_{page}_{page_size}"
|
||||
last_update = last_updated_model.last_update or datetime.utcnow()
|
||||
cached_res = self._get_cached_param_values(
|
||||
key=redis_key, last_update=last_update
|
||||
@@ -334,7 +371,6 @@ class ProjectQueries:
|
||||
if cached_res:
|
||||
return cached_res
|
||||
|
||||
max_values = config.get("services.models.metadata_values.max_count", 100)
|
||||
pipeline = [
|
||||
{
|
||||
"$match": {
|
||||
@@ -346,7 +382,8 @@ class ProjectQueries:
|
||||
{"$project": {"value": f"${key_path}.value"}},
|
||||
{"$group": {"_id": "$value"}},
|
||||
{"$sort": {"_id": 1}},
|
||||
{"$limit": max_values},
|
||||
{"$skip": page * page_size},
|
||||
{"$limit": page_size},
|
||||
{
|
||||
"$group": {
|
||||
"_id": 1,
|
||||
|
||||
@@ -2,8 +2,11 @@ import itertools
|
||||
from datetime import datetime
|
||||
from typing import Tuple, Optional, Sequence, Mapping
|
||||
|
||||
from boltons.iterutils import first
|
||||
|
||||
from apiserver import database
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.database.model import EntityVisibility
|
||||
from apiserver.database.model.project import Project
|
||||
|
||||
name_separator = "/"
|
||||
@@ -13,14 +16,16 @@ def _get_project_depth(project_name: str) -> int:
|
||||
return len(list(filter(None, project_name.split(name_separator))))
|
||||
|
||||
|
||||
def _validate_project_name(project_name: str) -> Tuple[str, str]:
|
||||
def _validate_project_name(project_name: str, raise_if_empty=True) -> Tuple[str, str]:
|
||||
"""
|
||||
Remove redundant '/' characters. Ensure that the project name is not empty
|
||||
Return the cleaned up project name and location
|
||||
"""
|
||||
name_parts = list(filter(None, project_name.split(name_separator)))
|
||||
name_parts = [p.strip() for p in project_name.split(name_separator) if p]
|
||||
if not name_parts:
|
||||
raise errors.bad_request.InvalidProjectName(name=project_name)
|
||||
if raise_if_empty:
|
||||
raise errors.bad_request.InvalidProjectName(name=project_name)
|
||||
return "", ""
|
||||
|
||||
return name_separator.join(name_parts), name_separator.join(name_parts[:-1])
|
||||
|
||||
@@ -33,7 +38,7 @@ def _ensure_project(
|
||||
If needed auto-create the project and all the missing projects in the path to it
|
||||
Return the project
|
||||
"""
|
||||
name = name.strip(name_separator)
|
||||
name, location = _validate_project_name(name, raise_if_empty=False)
|
||||
if not name:
|
||||
return None
|
||||
|
||||
@@ -42,7 +47,6 @@ def _ensure_project(
|
||||
return project
|
||||
|
||||
now = datetime.utcnow()
|
||||
name, location = _validate_project_name(name)
|
||||
project = Project(
|
||||
id=database.utils.id(),
|
||||
user=user,
|
||||
@@ -50,6 +54,7 @@ def _ensure_project(
|
||||
created=now,
|
||||
last_update=now,
|
||||
name=name,
|
||||
basename=name.split("/")[-1],
|
||||
**(creation_params or dict(description="")),
|
||||
)
|
||||
parent = _ensure_project(company, user, location, creation_params=creation_params)
|
||||
@@ -93,19 +98,42 @@ def _get_writable_project_from_name(
|
||||
"""
|
||||
Return a project from name. If the project not found then return None
|
||||
"""
|
||||
qs = Project.objects(company=company, name=name)
|
||||
qs = Project.objects(company__in=[company, ""], name=name)
|
||||
if _only:
|
||||
if "company" not in _only:
|
||||
_only = ["company", *_only]
|
||||
qs = qs.only(*_only)
|
||||
return qs.first()
|
||||
projects = list(qs)
|
||||
|
||||
if not projects:
|
||||
return
|
||||
|
||||
project = first(p for p in projects if p.company == company)
|
||||
if not project:
|
||||
raise errors.bad_request.PublicProjectExists(name=name)
|
||||
|
||||
return project
|
||||
|
||||
|
||||
ProjectsChildren = Mapping[str, Sequence[Project]]
|
||||
|
||||
|
||||
def _get_sub_projects(
|
||||
project_ids: Sequence[str], _only: Sequence[str] = ("id", "path")
|
||||
) -> Mapping[str, Sequence[Project]]:
|
||||
project_ids: Sequence[str],
|
||||
_only: Sequence[str] = ("id", "path"),
|
||||
search_hidden=True,
|
||||
allowed_ids: Sequence[str] = None,
|
||||
) -> ProjectsChildren:
|
||||
"""
|
||||
Return the list of child projects of all the levels for the parent project ids
|
||||
"""
|
||||
qs = Project.objects(path__in=project_ids)
|
||||
query = dict(path__in=project_ids)
|
||||
if not search_hidden:
|
||||
query["system_tags__nin"] = [EntityVisibility.hidden.value]
|
||||
if allowed_ids:
|
||||
query["id__in"] = allowed_ids
|
||||
|
||||
qs = Project.objects(**query)
|
||||
if _only:
|
||||
_only = set(_only) | {"path"}
|
||||
qs = qs.only(*_only)
|
||||
@@ -129,8 +157,8 @@ def _ids_with_children(project_ids: Sequence[str]) -> Sequence[str]:
|
||||
"""
|
||||
Return project ids with the ids of all the subprojects
|
||||
"""
|
||||
subprojects = Project.objects(path__in=project_ids).only("id")
|
||||
return list({*project_ids, *(child.id for child in subprojects)})
|
||||
children_ids = Project.objects(path__in=project_ids).scalar("id")
|
||||
return list({*project_ids, *children_ids})
|
||||
|
||||
|
||||
def _update_subproject_names(
|
||||
@@ -145,13 +173,17 @@ def _update_subproject_names(
|
||||
Optionally update the paths
|
||||
"""
|
||||
updated = 0
|
||||
now = datetime.utcnow()
|
||||
for child in children:
|
||||
child_suffix = name_separator.join(
|
||||
child.name.split(name_separator)[len(old_name.split(name_separator)) :]
|
||||
child.name.split(name_separator)[len(old_name.split(name_separator)):]
|
||||
)
|
||||
updates = {"name": name_separator.join((project.name, child_suffix))}
|
||||
updates = {
|
||||
"name": name_separator.join((project.name, child_suffix)),
|
||||
"last_update": now,
|
||||
}
|
||||
if update_path:
|
||||
updates["path"] = project.path + child.path[len(old_path) :]
|
||||
updates["path"] = project.path + child.path[len(old_path):]
|
||||
updated += child.update(upsert=False, **updates)
|
||||
|
||||
return updated
|
||||
@@ -166,6 +198,7 @@ def _reposition_project_with_children(
|
||||
project.name = name_separator.join(
|
||||
filter(None, (new_location, project.name.split(name_separator)[-1]))
|
||||
)
|
||||
project.last_update = datetime.utcnow()
|
||||
_save_under_parent(project, parent=parent)
|
||||
|
||||
moved = 1 + _update_subproject_names(
|
||||
|
||||
@@ -9,20 +9,35 @@ RANGE_IGNORE_VALUE = -1
|
||||
|
||||
class Builder:
|
||||
@staticmethod
|
||||
def dates_range(from_date: Union[int, float], to_date: Union[int, float]) -> dict:
|
||||
def dates_range(
|
||||
from_date: Optional[Union[int, float]] = None,
|
||||
to_date: Optional[Union[int, float]] = None,
|
||||
) -> dict:
|
||||
assert (
|
||||
from_date or to_date
|
||||
), "range condition requires that at least one of from_date or to_date specified"
|
||||
conditions = {}
|
||||
if from_date:
|
||||
conditions["gte"] = int(from_date)
|
||||
if to_date:
|
||||
conditions["lte"] = int(to_date)
|
||||
return {
|
||||
"range": {
|
||||
"timestamp": {
|
||||
"gte": int(from_date),
|
||||
"lte": int(to_date),
|
||||
**conditions,
|
||||
"format": "epoch_second",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def terms(field: str, values: Iterable[str]) -> dict:
|
||||
def terms(field: str, values: Iterable) -> dict:
|
||||
if isinstance(values, str):
|
||||
assert not isinstance(values, str), "apparently 'term' should be used here"
|
||||
return {"terms": {field: list(values)}}
|
||||
@staticmethod
|
||||
def term(field: str, value) -> dict:
|
||||
return {"term": {field: value}}
|
||||
|
||||
@staticmethod
|
||||
def normalize_range(
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from typing import Callable, Sequence, Optional, Tuple
|
||||
from typing import Sequence, Optional, Tuple, Union, Iterable
|
||||
|
||||
from elasticsearch import Elasticsearch
|
||||
from mongoengine import Q
|
||||
|
||||
from apiserver import database
|
||||
from apiserver.database.model.task.task import Task, TaskStatus
|
||||
from apiserver.es_factory import es_factory
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.bll.queue.queue_metrics import QueueMetrics
|
||||
@@ -14,6 +16,8 @@ from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.database.model.queue import Queue, Entry
|
||||
|
||||
log = config.logger(__file__)
|
||||
MOVE_FIRST = "first"
|
||||
MOVE_LAST = "last"
|
||||
|
||||
|
||||
class QueueBLL(object):
|
||||
@@ -30,6 +34,7 @@ class QueueBLL(object):
|
||||
def create(
|
||||
company_id: str,
|
||||
name: str,
|
||||
display_name: str = None,
|
||||
tags: Optional[Sequence[str]] = None,
|
||||
system_tags: Optional[Sequence[str]] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
@@ -42,6 +47,7 @@ class QueueBLL(object):
|
||||
company=company_id,
|
||||
created=now,
|
||||
name=name,
|
||||
display_name=display_name,
|
||||
tags=tags or [],
|
||||
system_tags=system_tags or [],
|
||||
metadata=metadata,
|
||||
@@ -50,8 +56,25 @@ class QueueBLL(object):
|
||||
queue.save()
|
||||
return queue
|
||||
|
||||
def get_by_name(
|
||||
self, company_id: str, queue_name: str, only: Optional[Sequence[str]] = None,
|
||||
) -> Queue:
|
||||
qs = Queue.objects(name=queue_name, company=company_id)
|
||||
if only:
|
||||
qs = qs.only(*only)
|
||||
|
||||
return qs.first()
|
||||
|
||||
@staticmethod
|
||||
def _get_task_entries_projection(max_task_entries: int) -> dict:
|
||||
return dict(slice__entries=max_task_entries)
|
||||
|
||||
def get_by_id(
|
||||
self, company_id: str, queue_id: str, only: Optional[Sequence[str]] = None
|
||||
self,
|
||||
company_id: str,
|
||||
queue_id: str,
|
||||
only: Optional[Sequence[str]] = None,
|
||||
max_task_entries: int = None,
|
||||
) -> Queue:
|
||||
"""
|
||||
Get queue by id
|
||||
@@ -62,6 +85,8 @@ class QueueBLL(object):
|
||||
qs = Queue.objects(**query)
|
||||
if only:
|
||||
qs = qs.only(*only)
|
||||
if max_task_entries:
|
||||
qs = qs.fields(**self._get_task_entries_projection(max_task_entries))
|
||||
queue = qs.first()
|
||||
if not queue:
|
||||
raise errors.bad_request.InvalidQueueId(**query)
|
||||
@@ -112,24 +137,85 @@ class QueueBLL(object):
|
||||
self.get_by_id(company_id=company_id, queue_id=queue_id, only=("id",))
|
||||
return Queue.safe_update(company_id, queue_id, update_fields)
|
||||
|
||||
def delete(self, company_id: str, queue_id: str, force: bool) -> None:
|
||||
def _update_task_status_on_removal_from_queue(
|
||||
self,
|
||||
company_id: str,
|
||||
user_id: str,
|
||||
task_ids: Iterable[str],
|
||||
queue_id: str,
|
||||
reason: str
|
||||
) -> Sequence[str]:
|
||||
from apiserver.bll.task import ChangeStatusRequest
|
||||
tasks = []
|
||||
for task_id in task_ids:
|
||||
try:
|
||||
task = Task.get(
|
||||
company=company_id,
|
||||
id=task_id,
|
||||
execution__queue=queue_id,
|
||||
_only=[
|
||||
"id",
|
||||
"company",
|
||||
"status",
|
||||
"enqueue_status",
|
||||
"project",
|
||||
],
|
||||
)
|
||||
if not task:
|
||||
continue
|
||||
|
||||
tasks.append(task.id)
|
||||
ChangeStatusRequest(
|
||||
task=task,
|
||||
new_status=task.enqueue_status or TaskStatus.created,
|
||||
status_reason=reason,
|
||||
status_message="",
|
||||
user_id=user_id,
|
||||
force=True,
|
||||
).execute(
|
||||
enqueue_status=None,
|
||||
unset__execution__queue=1,
|
||||
)
|
||||
except Exception as ex:
|
||||
log.error(
|
||||
f"Failed updating task {task_id} status on removal from queue: {queue_id}, {str(ex)}"
|
||||
)
|
||||
|
||||
return tasks
|
||||
|
||||
def delete(self, company_id: str, user_id: str, queue_id: str, force: bool) -> Sequence[str]:
|
||||
"""
|
||||
Delete the queue
|
||||
:raise errors.bad_request.InvalidQueueId: if the queue is not found
|
||||
:raise errors.bad_request.QueueNotEmpty: if the queue is not empty and 'force' not set
|
||||
"""
|
||||
with translate_errors_context():
|
||||
queue = self.get_by_id(company_id=company_id, queue_id=queue_id)
|
||||
if queue.entries and not force:
|
||||
raise errors.bad_request.QueueNotEmpty(
|
||||
"use force=true to delete", id=queue_id
|
||||
)
|
||||
queue = self.get_by_id(company_id=company_id, queue_id=queue_id)
|
||||
if not queue.entries:
|
||||
queue.delete()
|
||||
return []
|
||||
|
||||
if not force:
|
||||
raise errors.bad_request.QueueNotEmpty(
|
||||
"use force=true to delete", id=queue_id
|
||||
)
|
||||
|
||||
tasks = self._update_task_status_on_removal_from_queue(
|
||||
company_id=company_id,
|
||||
user_id=user_id,
|
||||
task_ids={item.task for item in queue.entries},
|
||||
queue_id=queue_id,
|
||||
reason=f"Queue {queue_id} was deleted",
|
||||
)
|
||||
|
||||
queue.delete()
|
||||
return tasks
|
||||
|
||||
def get_all(
|
||||
self,
|
||||
company_id: str,
|
||||
query_dict: dict,
|
||||
query: Q = None,
|
||||
max_task_entries: int = None,
|
||||
ret_params: dict = None,
|
||||
) -> Sequence[dict]:
|
||||
"""Get all the queues according to the query"""
|
||||
@@ -138,13 +224,26 @@ class QueueBLL(object):
|
||||
company=company_id,
|
||||
parameters=query_dict,
|
||||
query_dict=query_dict,
|
||||
query=query,
|
||||
projection_fields=self._get_task_entries_projection(max_task_entries)
|
||||
if max_task_entries
|
||||
else None,
|
||||
ret_params=ret_params,
|
||||
)
|
||||
|
||||
def check_for_workers(self, company_id: str, queue_id: str) -> bool:
|
||||
for worker in self.worker_bll.get_all(company_id):
|
||||
if queue_id in worker.queues:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def get_queue_infos(
|
||||
self,
|
||||
company_id: str,
|
||||
query_dict: dict,
|
||||
query: Q = None,
|
||||
max_task_entries: int = None,
|
||||
ret_params: dict = None,
|
||||
) -> Sequence[dict]:
|
||||
"""
|
||||
@@ -155,7 +254,11 @@ class QueueBLL(object):
|
||||
res = Queue.get_many_with_join(
|
||||
company=company_id,
|
||||
query_dict=query_dict,
|
||||
query=query,
|
||||
override_projection=projection,
|
||||
projection_fields=self._get_task_entries_projection(max_task_entries)
|
||||
if max_task_entries
|
||||
else None,
|
||||
ret_params=ret_params,
|
||||
)
|
||||
|
||||
@@ -169,6 +272,7 @@ class QueueBLL(object):
|
||||
{
|
||||
"name": w.id,
|
||||
"ip": w.ip,
|
||||
"key": w.key,
|
||||
"task": w.task.to_struct() if w.task else None,
|
||||
}
|
||||
for w in queue_workers.get(item["id"], [])
|
||||
@@ -203,16 +307,22 @@ class QueueBLL(object):
|
||||
|
||||
return res
|
||||
|
||||
def get_next_task(self, company_id: str, queue_id: str) -> Optional[Entry]:
|
||||
def get_next_task(
|
||||
self, company_id: str, queue_id: str, task_id: str = None
|
||||
) -> Optional[Entry]:
|
||||
"""
|
||||
Atomically pop and return the first task from the queue (or None)
|
||||
:raise errors.bad_request.InvalidQueueId: if the queue does not exist
|
||||
"""
|
||||
with translate_errors_context():
|
||||
query = dict(id=queue_id, company=company_id)
|
||||
queue = Queue.objects(**query).modify(pop__entries=-1, upsert=False)
|
||||
queue = Queue.objects(
|
||||
**query, **({"entries__0__task": task_id} if task_id else {})
|
||||
).modify(pop__entries=-1, upsert=False)
|
||||
if not queue:
|
||||
raise errors.bad_request.InvalidQueueId(**query)
|
||||
if not task_id or not Queue.objects(**query).first():
|
||||
raise errors.bad_request.InvalidQueueId(**query)
|
||||
return
|
||||
|
||||
self.metrics.log_queue_metrics_to_es(company_id, queues=[queue])
|
||||
|
||||
@@ -226,7 +336,36 @@ class QueueBLL(object):
|
||||
|
||||
return queue.entries[0]
|
||||
|
||||
def remove_task(self, company_id: str, queue_id: str, task_id: str) -> int:
|
||||
def clear_queue(
|
||||
self,
|
||||
company_id: str,
|
||||
user_id: str,
|
||||
queue_id: str,
|
||||
):
|
||||
queue = Queue.objects(company=company_id, id=queue_id).first()
|
||||
if not queue:
|
||||
raise errors.bad_request.InvalidQueueId(
|
||||
queue=queue_id
|
||||
)
|
||||
|
||||
if not queue.entries:
|
||||
return []
|
||||
|
||||
tasks = self._update_task_status_on_removal_from_queue(
|
||||
company_id=company_id,
|
||||
user_id=user_id,
|
||||
task_ids={item.task for item in queue.entries},
|
||||
queue_id=queue_id,
|
||||
reason=f"Queue {queue_id} was cleared",
|
||||
)
|
||||
|
||||
queue.update(entries=[])
|
||||
queue.reload()
|
||||
self.metrics.log_queue_metrics_to_es(company_id=company_id, queues=[queue])
|
||||
|
||||
return tasks
|
||||
|
||||
def remove_task(self, company_id: str, user_id: str, queue_id: str, task_id: str, update_task_status: bool = False) -> int:
|
||||
"""
|
||||
Removes the task from the queue and returns the number of removed items
|
||||
:raise errors.bad_request.InvalidQueueOrTaskNotQueued: if the task is not found in the queue
|
||||
@@ -241,6 +380,14 @@ class QueueBLL(object):
|
||||
res = Queue.objects(entries__task=task_id, **query).update_one(
|
||||
pull_all__entries=entries_to_remove, last_update=datetime.utcnow()
|
||||
)
|
||||
if res and update_task_status:
|
||||
self._update_task_status_on_removal_from_queue(
|
||||
company_id=company_id,
|
||||
user_id=user_id,
|
||||
task_ids=[task_id],
|
||||
queue_id=queue_id,
|
||||
reason=f"Task was removed from the queue {queue_id}",
|
||||
)
|
||||
|
||||
queue.reload()
|
||||
self.metrics.log_queue_metrics_to_es(company_id=company_id, queues=[queue])
|
||||
@@ -248,43 +395,147 @@ class QueueBLL(object):
|
||||
return len(entries_to_remove) if res else 0
|
||||
|
||||
def reposition_task(
|
||||
self,
|
||||
company_id: str,
|
||||
queue_id: str,
|
||||
task_id: str,
|
||||
pos_func: Callable[[int], int],
|
||||
self, company_id: str, queue_id: str, task_id: str, move_count: Union[int, str],
|
||||
) -> int:
|
||||
"""
|
||||
Moves the task in the queue to the position calculated by pos_func
|
||||
Returns the updated task position in the queue
|
||||
"""
|
||||
with translate_errors_context():
|
||||
queue = self.get_queue_with_task(
|
||||
|
||||
def get_queue_and_task_position():
|
||||
q = self.get_queue_with_task(
|
||||
company_id=company_id, queue_id=queue_id, task_id=task_id
|
||||
)
|
||||
return q, next(i for i, e in enumerate(q.entries) if e.task == task_id)
|
||||
|
||||
position = next(i for i, e in enumerate(queue.entries) if e.task == task_id)
|
||||
new_position = pos_func(position)
|
||||
with translate_errors_context():
|
||||
queue, position = get_queue_and_task_position()
|
||||
if move_count == MOVE_FIRST:
|
||||
new_position = 0
|
||||
elif move_count == MOVE_LAST:
|
||||
new_position = len(queue.entries) - 1
|
||||
else:
|
||||
new_position = position + move_count
|
||||
if new_position == position:
|
||||
return new_position
|
||||
|
||||
if new_position != position:
|
||||
entry = queue.entries[position]
|
||||
query = dict(id=queue_id, company=company_id)
|
||||
updated = Queue.objects(entries__task=task_id, **query).update_one(
|
||||
pull__entries=entry, last_update=datetime.utcnow()
|
||||
)
|
||||
if not updated:
|
||||
raise errors.bad_request.RemovedDuringReposition(
|
||||
task=task_id, **query
|
||||
)
|
||||
inst = {"$push": {"entries": {"$each": [entry.to_proper_dict()]}}}
|
||||
if new_position >= 0:
|
||||
inst["$push"]["entries"]["$position"] = new_position
|
||||
res = Queue.objects(entries__task__ne=task_id, **query).update_one(
|
||||
__raw__=inst
|
||||
)
|
||||
if not res:
|
||||
raise errors.bad_request.FailedAddingDuringReposition(
|
||||
task=task_id, **query
|
||||
)
|
||||
without_entry = {
|
||||
"$filter": {
|
||||
"input": "$entries",
|
||||
"as": "entry",
|
||||
"cond": {"$ne": ["$$entry.task", task_id]},
|
||||
}
|
||||
}
|
||||
task_entry = {
|
||||
"$filter": {
|
||||
"input": "$entries",
|
||||
"as": "entry",
|
||||
"cond": {"$eq": ["$$entry.task", task_id]},
|
||||
}
|
||||
}
|
||||
if move_count == MOVE_FIRST:
|
||||
operations = [
|
||||
{
|
||||
"$set": {
|
||||
"entries": {"$concatArrays": [task_entry, without_entry]}
|
||||
}
|
||||
}
|
||||
]
|
||||
elif move_count == MOVE_LAST:
|
||||
operations = [
|
||||
{
|
||||
"$set": {
|
||||
"entries": {"$concatArrays": [without_entry, task_entry]}
|
||||
}
|
||||
}
|
||||
]
|
||||
else:
|
||||
operations = [
|
||||
{
|
||||
"$set": {
|
||||
"new_pos": {
|
||||
"$add": [
|
||||
{"$indexOfArray": ["$entries.task", task_id]},
|
||||
move_count,
|
||||
]
|
||||
},
|
||||
"without_entry": without_entry,
|
||||
"task_entry": task_entry,
|
||||
}
|
||||
},
|
||||
{
|
||||
"$set": {
|
||||
"entries": {
|
||||
"$switch": {
|
||||
"branches": [
|
||||
{
|
||||
"case": {"$lte": ["$new_pos", 0]},
|
||||
"then": {
|
||||
"$concatArrays": [
|
||||
"$task_entry",
|
||||
"$without_entry",
|
||||
]
|
||||
},
|
||||
},
|
||||
{
|
||||
"case": {
|
||||
"$gte": [
|
||||
"$new_pos",
|
||||
{"$size": "$without_entry"},
|
||||
]
|
||||
},
|
||||
"then": {
|
||||
"$concatArrays": [
|
||||
"$without_entry",
|
||||
"$task_entry",
|
||||
]
|
||||
},
|
||||
},
|
||||
],
|
||||
"default": {
|
||||
"$concatArrays": [
|
||||
{"$slice": ["$without_entry", "$new_pos"]},
|
||||
"$task_entry",
|
||||
{
|
||||
"$slice": [
|
||||
"$without_entry",
|
||||
"$new_pos",
|
||||
{"$size": "$without_entry"},
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{"$unset": ["new_pos", "without_entry", "task_entry"]},
|
||||
]
|
||||
|
||||
return new_position
|
||||
updated = Queue.objects(
|
||||
id=queue_id, company=company_id, entries__task=task_id
|
||||
).update_one(__raw__=operations)
|
||||
|
||||
if not updated:
|
||||
raise errors.bad_request.FailedAddingDuringReposition(task=task_id)
|
||||
|
||||
return get_queue_and_task_position()[1]
|
||||
|
||||
def count_entries(self, company: str, queue_id: str) -> Optional[int]:
|
||||
res = next(
|
||||
Queue.aggregate(
|
||||
[
|
||||
{
|
||||
"$match": {
|
||||
"company": {"$in": ["", company]},
|
||||
"_id": queue_id,
|
||||
}
|
||||
},
|
||||
{"$project": {"count": {"$size": "$entries"}}},
|
||||
]
|
||||
),
|
||||
None,
|
||||
)
|
||||
if res is None:
|
||||
raise errors.bad_request.InvalidQueueId(queue_id=queue_id)
|
||||
return int(res.get("count"))
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from time import sleep
|
||||
from typing import Sequence
|
||||
|
||||
import elasticsearch.helpers
|
||||
from boltons.typeutils import classproperty
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
from apiserver.es_factory import es_factory
|
||||
@@ -11,25 +13,30 @@ from apiserver.bll.query import Builder as QueryBuilder
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.database.model.queue import Queue, Entry
|
||||
from apiserver.timing_context import TimingContext
|
||||
from apiserver.redis_manager import redman
|
||||
from apiserver.utilities.threads_manager import ThreadsManager
|
||||
|
||||
log = config.logger(__file__)
|
||||
_conf = config.get("services.queues")
|
||||
_queue_metrics_key_pattern = "queue_metrics_{queue}"
|
||||
redis = redman.connection("apiserver")
|
||||
|
||||
|
||||
class EsKeys:
|
||||
WAITING_TIME_FIELD = "average_waiting_time"
|
||||
QUEUE_LENGTH_FIELD = "queue_length"
|
||||
TIMESTAMP_FIELD = "timestamp"
|
||||
QUEUE_FIELD = "queue"
|
||||
|
||||
|
||||
class QueueMetrics:
|
||||
class EsKeys:
|
||||
WAITING_TIME_FIELD = "average_waiting_time"
|
||||
QUEUE_LENGTH_FIELD = "queue_length"
|
||||
TIMESTAMP_FIELD = "timestamp"
|
||||
QUEUE_FIELD = "queue"
|
||||
|
||||
def __init__(self, es: Elasticsearch):
|
||||
self.es = es
|
||||
|
||||
@staticmethod
|
||||
def _queue_metrics_prefix_for_company(company_id: str) -> str:
|
||||
"""Returns the es index prefix for the company"""
|
||||
return f"queue_metrics_{company_id}_"
|
||||
return f"queue_metrics_{company_id.lower()}_"
|
||||
|
||||
@staticmethod
|
||||
def _get_es_index_suffix():
|
||||
@@ -49,7 +56,7 @@ class QueueMetrics:
|
||||
total_waiting_in_secs = sum((now - e.added).total_seconds() for e in entries)
|
||||
return total_waiting_in_secs / len(entries)
|
||||
|
||||
def log_queue_metrics_to_es(self, company_id: str, queues: Sequence[Queue]) -> bool:
|
||||
def log_queue_metrics_to_es(self, company_id: str, queues: Sequence[Queue]) -> int:
|
||||
"""
|
||||
Calculate and write queue statistics (avg waiting time and queue length) to Elastic
|
||||
:return: True if the write to es was successful, false otherwise
|
||||
@@ -63,23 +70,22 @@ class QueueMetrics:
|
||||
|
||||
def make_doc(queue: Queue) -> dict:
|
||||
entries = [e for e in queue.entries if e.added]
|
||||
return dict(
|
||||
_index=es_index,
|
||||
_source={
|
||||
self.EsKeys.TIMESTAMP_FIELD: timestamp,
|
||||
self.EsKeys.QUEUE_FIELD: queue.id,
|
||||
self.EsKeys.WAITING_TIME_FIELD: self._calc_avg_waiting_time(
|
||||
entries
|
||||
),
|
||||
self.EsKeys.QUEUE_LENGTH_FIELD: len(entries),
|
||||
},
|
||||
)
|
||||
return {
|
||||
EsKeys.TIMESTAMP_FIELD: timestamp,
|
||||
EsKeys.QUEUE_FIELD: queue.id,
|
||||
EsKeys.WAITING_TIME_FIELD: self._calc_avg_waiting_time(entries),
|
||||
EsKeys.QUEUE_LENGTH_FIELD: len(entries),
|
||||
}
|
||||
|
||||
actions = list(map(make_doc, queues))
|
||||
logged = 0
|
||||
for q in queues:
|
||||
queue_doc = make_doc(q)
|
||||
self.es.index(index=es_index, document=queue_doc)
|
||||
redis_key = _queue_metrics_key_pattern.format(queue=q.id)
|
||||
redis.set(redis_key, json.dumps(queue_doc))
|
||||
logged += 1
|
||||
|
||||
es_res = elasticsearch.helpers.bulk(self.es, actions)
|
||||
added, errors = es_res[:2]
|
||||
return (added == len(actions)) and not errors
|
||||
return logged
|
||||
|
||||
def _log_current_metrics(self, company_id: str, queue_ids=Sequence[str]):
|
||||
query = dict(company=company_id)
|
||||
@@ -90,8 +96,7 @@ class QueueMetrics:
|
||||
|
||||
def _search_company_metrics(self, company_id: str, es_req: dict) -> dict:
|
||||
return self.es.search(
|
||||
index=f"{self._queue_metrics_prefix_for_company(company_id)}*",
|
||||
body=es_req,
|
||||
index=f"{self._queue_metrics_prefix_for_company(company_id)}*", body=es_req,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -105,13 +110,13 @@ class QueueMetrics:
|
||||
return {
|
||||
"dates": {
|
||||
"date_histogram": {
|
||||
"field": cls.EsKeys.TIMESTAMP_FIELD,
|
||||
"field": EsKeys.TIMESTAMP_FIELD,
|
||||
"fixed_interval": f"{interval}s",
|
||||
"min_doc_count": 1,
|
||||
},
|
||||
"aggs": {
|
||||
"queues": {
|
||||
"terms": {"field": cls.EsKeys.QUEUE_FIELD},
|
||||
"terms": {"field": EsKeys.QUEUE_FIELD},
|
||||
"aggs": cls._get_top_waiting_agg(),
|
||||
}
|
||||
},
|
||||
@@ -128,13 +133,13 @@ class QueueMetrics:
|
||||
"top_avg_waiting": {
|
||||
"top_hits": {
|
||||
"sort": [
|
||||
{cls.EsKeys.WAITING_TIME_FIELD: {"order": "desc"}},
|
||||
{cls.EsKeys.QUEUE_LENGTH_FIELD: {"order": "desc"}},
|
||||
{EsKeys.WAITING_TIME_FIELD: {"order": "desc"}},
|
||||
{EsKeys.QUEUE_LENGTH_FIELD: {"order": "desc"}},
|
||||
],
|
||||
"_source": {
|
||||
"includes": [
|
||||
cls.EsKeys.WAITING_TIME_FIELD,
|
||||
cls.EsKeys.QUEUE_LENGTH_FIELD,
|
||||
EsKeys.WAITING_TIME_FIELD,
|
||||
EsKeys.QUEUE_LENGTH_FIELD,
|
||||
]
|
||||
},
|
||||
"size": 1,
|
||||
@@ -149,6 +154,7 @@ class QueueMetrics:
|
||||
to_date: float,
|
||||
interval: int,
|
||||
queue_ids: Sequence[str],
|
||||
refresh: bool = False,
|
||||
) -> dict:
|
||||
"""
|
||||
Get the company queue metrics in the specified time range.
|
||||
@@ -158,7 +164,8 @@ class QueueMetrics:
|
||||
In case no queue ids are specified the avg across all the
|
||||
company queues is calculated for each metric
|
||||
"""
|
||||
# self._log_current_metrics(company, queue_ids=queue_ids)
|
||||
if refresh:
|
||||
self._log_current_metrics(company_id, queue_ids=queue_ids)
|
||||
|
||||
if from_date >= to_date:
|
||||
raise bad_request.FieldsValueError("from_date must be less than to_date")
|
||||
@@ -174,7 +181,7 @@ class QueueMetrics:
|
||||
"aggs": self._get_dates_agg(interval),
|
||||
}
|
||||
|
||||
with translate_errors_context(), TimingContext("es", "get_queue_metrics"):
|
||||
with translate_errors_context():
|
||||
res = self._search_company_metrics(company_id, es_req)
|
||||
|
||||
if "aggregations" not in res:
|
||||
@@ -256,7 +263,52 @@ class QueueMetrics:
|
||||
continue
|
||||
res = queue_data["top_avg_waiting"]["hits"]["hits"][0]["_source"]
|
||||
queue_metrics[queue_data["key"]] = {
|
||||
"queue_length": res[cls.EsKeys.QUEUE_LENGTH_FIELD],
|
||||
"avg_waiting_time": res[cls.EsKeys.WAITING_TIME_FIELD],
|
||||
"queue_length": res[EsKeys.QUEUE_LENGTH_FIELD],
|
||||
"avg_waiting_time": res[EsKeys.WAITING_TIME_FIELD],
|
||||
}
|
||||
return queue_metrics
|
||||
|
||||
|
||||
class MetricsRefresher:
|
||||
threads = ThreadsManager()
|
||||
|
||||
@classproperty
|
||||
def watch_interval_sec(self):
|
||||
return _conf.get("metrics_refresh_interval_sec", 300)
|
||||
|
||||
@classmethod
|
||||
@threads.register("queue_metrics_refresh_watchdog", daemon=True)
|
||||
def start(cls, queue_metrics: QueueMetrics = None):
|
||||
if not cls.watch_interval_sec:
|
||||
return
|
||||
|
||||
if not queue_metrics:
|
||||
from .queue_bll import QueueBLL
|
||||
|
||||
queue_metrics = QueueBLL().metrics
|
||||
|
||||
sleep(10)
|
||||
while True:
|
||||
try:
|
||||
for queue in Queue.objects():
|
||||
timestamp = es_factory.get_timestamp_millis()
|
||||
doc_time = 0
|
||||
try:
|
||||
redis_key = _queue_metrics_key_pattern.format(queue=queue.id)
|
||||
data = redis.get(redis_key)
|
||||
if data:
|
||||
queue_doc = json.loads(data)
|
||||
doc_time = int(queue_doc.get(EsKeys.TIMESTAMP_FIELD))
|
||||
except Exception as ex:
|
||||
log.exception(
|
||||
f"Error reading queue metrics data for queue {queue.id}: {str(ex)}"
|
||||
)
|
||||
|
||||
if (
|
||||
not doc_time
|
||||
or (timestamp - doc_time) > cls.watch_interval_sec * 1000
|
||||
):
|
||||
queue_metrics.log_queue_metrics_to_es(queue.company, [queue])
|
||||
except Exception as ex:
|
||||
log.exception(f"Failed collecting queue metrics: {str(ex)}")
|
||||
sleep(60)
|
||||
|
||||
@@ -4,7 +4,6 @@ from typing import Optional, TypeVar, Generic, Type, Callable
|
||||
from redis import StrictRedis
|
||||
|
||||
from apiserver import database
|
||||
from apiserver.timing_context import TimingContext
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
@@ -31,20 +30,17 @@ class RedisCacheManager(Generic[T]):
|
||||
|
||||
def set_state(self, state: T) -> None:
|
||||
redis_key = self._get_redis_key(state.id)
|
||||
with TimingContext("redis", "cache_set_state"):
|
||||
self.redis.set(redis_key, state.to_json())
|
||||
self.redis.expire(redis_key, self.expiration_interval)
|
||||
self.redis.set(redis_key, state.to_json())
|
||||
self.redis.expire(redis_key, self.expiration_interval)
|
||||
|
||||
def get_state(self, state_id) -> Optional[T]:
|
||||
redis_key = self._get_redis_key(state_id)
|
||||
with TimingContext("redis", "cache_get_state"):
|
||||
response = self.redis.get(redis_key)
|
||||
response = self.redis.get(redis_key)
|
||||
if response:
|
||||
return self.state_class.from_json(response)
|
||||
|
||||
def delete_state(self, state_id) -> None:
|
||||
with TimingContext("redis", "cache_delete_state"):
|
||||
self.redis.delete(self._get_redis_key(state_id))
|
||||
self.redis.delete(self._get_redis_key(state_id))
|
||||
|
||||
def _get_redis_key(self, state_id):
|
||||
return f"{self.state_class}/{state_id}"
|
||||
|
||||
376
apiserver/bll/serving/__init__.py
Normal file
376
apiserver/bll/serving/__init__.py
Normal file
@@ -0,0 +1,376 @@
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum, auto
|
||||
from operator import attrgetter
|
||||
from time import time
|
||||
from typing import Optional, Sequence, Union
|
||||
|
||||
import attr
|
||||
from boltons.iterutils import chunked_iter, bucketize
|
||||
from pyhocon import ConfigTree
|
||||
|
||||
from apiserver.apimodels.serving import (
|
||||
ServingContainerEntry,
|
||||
RegisterRequest,
|
||||
StatusReportRequest,
|
||||
)
|
||||
from apiserver.apimodels.workers import MachineStats
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.redis_manager import redman
|
||||
from .stats import ServingStats
|
||||
|
||||
|
||||
log = config.logger(__file__)
|
||||
|
||||
|
||||
class ServingBLL:
|
||||
def __init__(self, redis=None):
|
||||
self.conf = config.get("services.serving", ConfigTree())
|
||||
self.redis = redis or redman.connection("workers")
|
||||
|
||||
@staticmethod
|
||||
def _get_url_key(company: str, url: str):
|
||||
return f"serving_url_{company}_{url}"
|
||||
|
||||
@staticmethod
|
||||
def _get_container_key(company: str, container_id: str) -> str:
|
||||
"""Build redis key from company and container_id"""
|
||||
return f"serving_container_{company}_{container_id}"
|
||||
|
||||
def _save_serving_container_entry(self, entry: ServingContainerEntry):
|
||||
self.redis.setex(
|
||||
entry.key, timedelta(seconds=entry.register_timeout), entry.to_json()
|
||||
)
|
||||
|
||||
url_key = self._get_url_key(entry.company_id, entry.endpoint_url)
|
||||
expiration = int(time()) + entry.register_timeout
|
||||
container_item = {entry.key: expiration}
|
||||
self.redis.zadd(url_key, container_item)
|
||||
# make sure that url set will not get stuck in redis
|
||||
# indefinitely in case no more containers report to it
|
||||
self.redis.expire(url_key, max(3600, entry.register_timeout))
|
||||
|
||||
def _get_serving_container_entry(
|
||||
self, company_id: str, container_id: str
|
||||
) -> Optional[ServingContainerEntry]:
|
||||
"""
|
||||
Get a container entry for the provided container ID.
|
||||
"""
|
||||
key = self._get_container_key(company_id, container_id)
|
||||
data = self.redis.get(key)
|
||||
if not data:
|
||||
return
|
||||
|
||||
try:
|
||||
entry = ServingContainerEntry.from_json(data)
|
||||
return entry
|
||||
except Exception as e:
|
||||
msg = "Failed parsing container entry"
|
||||
log.exception(f"{msg}: {str(e)}")
|
||||
|
||||
def register_serving_container(
|
||||
self,
|
||||
company_id: str,
|
||||
request: RegisterRequest,
|
||||
ip: str = "",
|
||||
) -> ServingContainerEntry:
|
||||
"""
|
||||
Register a serving container
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
key = self._get_container_key(company_id, request.container_id)
|
||||
entry = ServingContainerEntry(
|
||||
**request.to_struct(),
|
||||
key=key,
|
||||
company_id=company_id,
|
||||
ip=ip,
|
||||
register_time=now,
|
||||
register_timeout=request.timeout,
|
||||
last_activity_time=now,
|
||||
)
|
||||
self._save_serving_container_entry(entry)
|
||||
return entry
|
||||
|
||||
def unregister_serving_container(
|
||||
self,
|
||||
company_id: str,
|
||||
container_id: str,
|
||||
) -> None:
|
||||
"""
|
||||
Unregister a serving container
|
||||
"""
|
||||
entry = self._get_serving_container_entry(company_id, container_id)
|
||||
if entry:
|
||||
url_key = self._get_url_key(entry.company_id, entry.endpoint_url)
|
||||
self.redis.zrem(url_key, entry.key)
|
||||
|
||||
key = self._get_container_key(company_id, container_id)
|
||||
res = self.redis.delete(key)
|
||||
if res:
|
||||
return
|
||||
|
||||
if not self.conf.get("container_auto_unregister", True):
|
||||
raise errors.bad_request.ContainerNotRegistered(container=container_id)
|
||||
|
||||
def container_status_report(
|
||||
self,
|
||||
company_id: str,
|
||||
report: StatusReportRequest,
|
||||
ip: str = "",
|
||||
) -> None:
|
||||
"""
|
||||
Serving container status report
|
||||
"""
|
||||
container_id = report.container_id
|
||||
now = datetime.now(timezone.utc)
|
||||
entry = self._get_serving_container_entry(company_id, container_id)
|
||||
if entry:
|
||||
ip = ip or entry.ip
|
||||
register_time = entry.register_time
|
||||
register_timeout = entry.register_timeout
|
||||
else:
|
||||
if not self.conf.get("container_auto_register", True):
|
||||
raise errors.bad_request.ContainerNotRegistered(container=container_id)
|
||||
ip = ip
|
||||
register_time = now
|
||||
register_timeout = int(
|
||||
self.conf.get("default_container_timeout_sec", 10 * 60)
|
||||
)
|
||||
|
||||
key = self._get_container_key(company_id, container_id)
|
||||
entry = ServingContainerEntry(
|
||||
**report.to_struct(),
|
||||
key=key,
|
||||
company_id=company_id,
|
||||
ip=ip,
|
||||
register_time=register_time,
|
||||
register_timeout=register_timeout,
|
||||
last_activity_time=now,
|
||||
)
|
||||
self._save_serving_container_entry(entry)
|
||||
ServingStats.log_stats_to_es(entry)
|
||||
|
||||
def _get_all(
|
||||
self,
|
||||
company_id: str,
|
||||
) -> Sequence[ServingContainerEntry]:
|
||||
keys = list(self.redis.scan_iter(self._get_container_key(company_id, "*")))
|
||||
entries = []
|
||||
for keys in chunked_iter(keys, 1000):
|
||||
data = self.redis.mget(keys)
|
||||
if not data:
|
||||
continue
|
||||
for d in data:
|
||||
try:
|
||||
entries.append(ServingContainerEntry.from_json(d))
|
||||
except Exception as ex:
|
||||
log.error(f"Failed parsing container entry {str(ex)}")
|
||||
|
||||
return entries
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class Counter:
|
||||
class AggType(Enum):
|
||||
avg = auto()
|
||||
max = auto()
|
||||
total = auto()
|
||||
count = auto()
|
||||
|
||||
name: str
|
||||
field: str
|
||||
agg_type: AggType
|
||||
float_precision: int = None
|
||||
|
||||
_max: Union[int, float, datetime] = attr.field(init=False, default=None)
|
||||
_total: Union[int, float] = attr.field(init=False, default=0)
|
||||
_count: int = attr.field(init=False, default=0)
|
||||
|
||||
def add(self, entry: ServingContainerEntry):
|
||||
value = getattr(entry, self.field, None)
|
||||
if value is None:
|
||||
return
|
||||
|
||||
self._count += 1
|
||||
if self.agg_type == self.AggType.max:
|
||||
self._max = value if self._max is None else max(self._max, value)
|
||||
else:
|
||||
self._total += value
|
||||
|
||||
def __call__(self):
|
||||
if self.agg_type == self.AggType.count:
|
||||
return self._count
|
||||
|
||||
if self.agg_type == self.AggType.max:
|
||||
return self._max
|
||||
|
||||
if self.agg_type == self.AggType.total:
|
||||
return self._total
|
||||
|
||||
if not self._count:
|
||||
return None
|
||||
avg = self._total / self._count
|
||||
return (
|
||||
round(avg, self.float_precision) if self.float_precision else round(avg)
|
||||
)
|
||||
|
||||
def _get_summary(self, entries: Sequence[ServingContainerEntry]) -> dict:
|
||||
counters = [
|
||||
self.Counter(
|
||||
name="uptime_sec",
|
||||
field="uptime_sec",
|
||||
agg_type=self.Counter.AggType.max,
|
||||
),
|
||||
self.Counter(
|
||||
name="requests",
|
||||
field="requests_num",
|
||||
agg_type=self.Counter.AggType.total,
|
||||
),
|
||||
self.Counter(
|
||||
name="requests_min",
|
||||
field="requests_min",
|
||||
agg_type=self.Counter.AggType.avg,
|
||||
float_precision=2,
|
||||
),
|
||||
self.Counter(
|
||||
name="latency_ms",
|
||||
field="latency_ms",
|
||||
agg_type=self.Counter.AggType.avg,
|
||||
),
|
||||
self.Counter(
|
||||
name="last_update",
|
||||
field="last_activity_time",
|
||||
agg_type=self.Counter.AggType.max,
|
||||
),
|
||||
]
|
||||
for entry in entries:
|
||||
for counter in counters:
|
||||
counter.add(entry)
|
||||
|
||||
first_entry = entries[0]
|
||||
ret = {
|
||||
"endpoint": first_entry.endpoint_name,
|
||||
"model": first_entry.model_name,
|
||||
"url": first_entry.endpoint_url,
|
||||
"instances": len(entries),
|
||||
**{counter.name: counter() for counter in counters},
|
||||
}
|
||||
ret["last_update"] = ret.get("last_update")
|
||||
return ret
|
||||
|
||||
def get_endpoints(self, company_id: str):
|
||||
"""
|
||||
Group instances by urls and return a summary for each url
|
||||
Do not return data for "loading" instances that have no url
|
||||
"""
|
||||
entries = self._get_all(company_id)
|
||||
by_url = bucketize(entries, key=attrgetter("endpoint_url"))
|
||||
by_url.pop(None, None)
|
||||
return [self._get_summary(url_entries) for url_entries in by_url.values()]
|
||||
|
||||
def _get_endpoint_entries(
|
||||
self, company_id, endpoint_url: Union[str, None]
|
||||
) -> Sequence[ServingContainerEntry]:
|
||||
url_key = self._get_url_key(company_id, endpoint_url)
|
||||
timestamp = int(time())
|
||||
self.redis.zremrangebyscore(url_key, min=0, max=timestamp)
|
||||
container_keys = {key.decode() for key in self.redis.zrange(url_key, 0, -1)}
|
||||
if not container_keys:
|
||||
return []
|
||||
|
||||
entries = []
|
||||
found_keys = set()
|
||||
data = self.redis.mget(container_keys) or []
|
||||
for d in data:
|
||||
try:
|
||||
entry = ServingContainerEntry.from_json(d)
|
||||
if entry.endpoint_url == endpoint_url:
|
||||
entries.append(entry)
|
||||
found_keys.add(entry.key)
|
||||
except Exception as ex:
|
||||
log.error(f"Failed parsing container entry {str(ex)}")
|
||||
|
||||
missing_keys = container_keys - found_keys
|
||||
if missing_keys:
|
||||
self.redis.zrem(url_key, *missing_keys)
|
||||
|
||||
return entries
|
||||
|
||||
def get_loading_instances(self, company_id: str):
|
||||
entries = self._get_endpoint_entries(company_id, None)
|
||||
return [
|
||||
{
|
||||
"id": entry.container_id,
|
||||
"endpoint": entry.endpoint_name,
|
||||
"url": entry.endpoint_url,
|
||||
"model": entry.model_name,
|
||||
"model_source": entry.model_source,
|
||||
"model_version": entry.model_version,
|
||||
"preprocess_artifact": entry.preprocess_artifact,
|
||||
"input_type": entry.input_type,
|
||||
"input_size": entry.input_size,
|
||||
"uptime_sec": entry.uptime_sec,
|
||||
"age_sec": int((datetime.now(timezone.utc) - entry.register_time).total_seconds()),
|
||||
"last_update": entry.last_activity_time,
|
||||
}
|
||||
for entry in entries
|
||||
]
|
||||
|
||||
def get_endpoint_details(self, company_id, endpoint_url: str) -> dict:
|
||||
entries = self._get_endpoint_entries(company_id, endpoint_url)
|
||||
if not entries:
|
||||
raise errors.bad_request.NoContainersForUrl(url=endpoint_url)
|
||||
|
||||
instances = []
|
||||
entry: ServingContainerEntry
|
||||
for entry in entries:
|
||||
instances.append(
|
||||
{
|
||||
"endpoint": entry.endpoint_name,
|
||||
"model": entry.model_name,
|
||||
"url": entry.endpoint_url,
|
||||
}
|
||||
)
|
||||
|
||||
def get_machine_stats_data(machine_stats: MachineStats) -> dict:
|
||||
ret = {"cpu_count": 0, "gpu_count": 0}
|
||||
if not machine_stats:
|
||||
return ret
|
||||
|
||||
for value, field in (
|
||||
(machine_stats.cpu_usage, "cpu_count"),
|
||||
(machine_stats.gpu_usage, "gpu_count"),
|
||||
):
|
||||
if value is None:
|
||||
continue
|
||||
ret[field] = len(value) if isinstance(value, (list, tuple)) else 1
|
||||
|
||||
return ret
|
||||
|
||||
first_entry = entries[0]
|
||||
return {
|
||||
"endpoint": first_entry.endpoint_name,
|
||||
"model": first_entry.model_name,
|
||||
"url": first_entry.endpoint_url,
|
||||
"preprocess_artifact": first_entry.preprocess_artifact,
|
||||
"input_type": first_entry.input_type,
|
||||
"input_size": first_entry.input_size,
|
||||
"model_source": first_entry.model_source,
|
||||
"model_version": first_entry.model_version,
|
||||
"uptime_sec": max(e.uptime_sec for e in entries),
|
||||
"last_update": max(e.last_activity_time for e in entries),
|
||||
"instances": [
|
||||
{
|
||||
"id": entry.container_id,
|
||||
"uptime_sec": entry.uptime_sec,
|
||||
"requests": entry.requests_num,
|
||||
"requests_min": entry.requests_min,
|
||||
"latency_ms": entry.latency_ms,
|
||||
"last_update": entry.last_activity_time,
|
||||
"reference": [ref.to_struct() for ref in entry.reference]
|
||||
if isinstance(entry.reference, list)
|
||||
else entry.reference,
|
||||
**get_machine_stats_data(entry.machine_stats),
|
||||
}
|
||||
for entry in entries
|
||||
],
|
||||
}
|
||||
340
apiserver/bll/serving/stats.py
Normal file
340
apiserver/bll/serving/stats.py
Normal file
@@ -0,0 +1,340 @@
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
|
||||
from typing import Tuple, Optional, Sequence
|
||||
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
from apiserver.apimodels.serving import (
|
||||
ServingContainerEntry,
|
||||
GetEndpointMetricsHistoryRequest,
|
||||
MetricType,
|
||||
)
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.utilities.dicts import nested_get
|
||||
from apiserver.bll.query import Builder as QueryBuilder
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.es_factory import es_factory
|
||||
|
||||
|
||||
class _AggregationType(Enum):
|
||||
avg = "avg"
|
||||
sum = "sum"
|
||||
|
||||
|
||||
class ServingStats:
|
||||
min_chart_interval = config.get("services.serving.min_chart_interval_sec", 40)
|
||||
es: Elasticsearch = es_factory.connect("workers")
|
||||
|
||||
@classmethod
|
||||
def _serving_stats_prefix(cls, company_id: str) -> str:
|
||||
"""Returns the es index prefix for the company"""
|
||||
return f"serving_stats_{company_id.lower()}_"
|
||||
|
||||
@staticmethod
|
||||
def _get_es_index_suffix():
|
||||
"""Get the index name suffix for storing current month data"""
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m")
|
||||
|
||||
@staticmethod
|
||||
def _get_average_value(value) -> Tuple[Optional[float], Optional[int]]:
|
||||
if value is None:
|
||||
return None, None
|
||||
|
||||
if isinstance(value, (list, tuple)):
|
||||
count = len(value)
|
||||
if not count:
|
||||
return None, None
|
||||
return sum(value) / count, count
|
||||
|
||||
return value, 1
|
||||
|
||||
@classmethod
|
||||
def log_stats_to_es(
|
||||
cls,
|
||||
entry: ServingContainerEntry,
|
||||
) -> int:
|
||||
"""
|
||||
Actually writing the worker statistics to Elastic
|
||||
:return: The amount of logged documents
|
||||
"""
|
||||
company_id = entry.company_id
|
||||
es_index = (
|
||||
f"{cls._serving_stats_prefix(company_id)}" f"{cls._get_es_index_suffix()}"
|
||||
)
|
||||
|
||||
entry_data = entry.to_struct()
|
||||
doc = {
|
||||
"timestamp": es_factory.get_timestamp_millis(),
|
||||
**{
|
||||
field: entry_data.get(field)
|
||||
for field in (
|
||||
"container_id",
|
||||
"company_id",
|
||||
"endpoint_url",
|
||||
"requests_num",
|
||||
"requests_min",
|
||||
"uptime_sec",
|
||||
"latency_ms",
|
||||
)
|
||||
},
|
||||
}
|
||||
|
||||
stats = entry_data.get("machine_stats")
|
||||
if stats:
|
||||
for category in ("cpu", "gpu"):
|
||||
usage, num = cls._get_average_value(stats.get(f"{category}_usage"))
|
||||
doc.update({f"{category}_usage": usage, f"{category}_num": num})
|
||||
|
||||
for category in ("memory", "gpu_memory"):
|
||||
free, _ = cls._get_average_value(stats.get(f"{category}_free"))
|
||||
used, _ = cls._get_average_value(stats.get(f"{category}_used"))
|
||||
doc.update(
|
||||
{
|
||||
f"{category}_free": free,
|
||||
f"{category}_used": used,
|
||||
f"{category}_total": round((free or 0) + (used or 0), 3),
|
||||
}
|
||||
)
|
||||
|
||||
doc.update(
|
||||
{
|
||||
field: stats.get(field)
|
||||
for field in ("disk_free_home", "network_rx", "network_tx")
|
||||
}
|
||||
)
|
||||
|
||||
cls.es.index(index=es_index, document=doc)
|
||||
|
||||
return 1
|
||||
|
||||
@staticmethod
|
||||
def round_series(values: Sequence, koeff) -> list:
|
||||
return [round(v * koeff, 2) if v else 0 for v in values]
|
||||
|
||||
_mb_to_gb = 1 / 1024
|
||||
agg_fields = {
|
||||
MetricType.requests: (
|
||||
"requests_num",
|
||||
"Number of Requests",
|
||||
_AggregationType.sum,
|
||||
None,
|
||||
),
|
||||
MetricType.requests_min: (
|
||||
"requests_min",
|
||||
"Requests per Minute",
|
||||
_AggregationType.sum,
|
||||
None,
|
||||
),
|
||||
MetricType.latency_ms: (
|
||||
"latency_ms",
|
||||
"Average Latency (ms)",
|
||||
_AggregationType.avg,
|
||||
None,
|
||||
),
|
||||
MetricType.cpu_count: ("cpu_num", "CPU Count", _AggregationType.sum, None),
|
||||
MetricType.gpu_count: ("gpu_num", "GPU Count", _AggregationType.sum, None),
|
||||
MetricType.cpu_util: (
|
||||
"cpu_usage",
|
||||
"Average CPU Load (%)",
|
||||
_AggregationType.avg,
|
||||
None,
|
||||
),
|
||||
MetricType.gpu_util: (
|
||||
"gpu_usage",
|
||||
"Average GPU Utilization (%)",
|
||||
_AggregationType.avg,
|
||||
None,
|
||||
),
|
||||
MetricType.ram_total: (
|
||||
"memory_total",
|
||||
"RAM Total (GB)",
|
||||
_AggregationType.sum,
|
||||
_mb_to_gb,
|
||||
),
|
||||
MetricType.ram_used: (
|
||||
"memory_used",
|
||||
"RAM Used (GB)",
|
||||
_AggregationType.sum,
|
||||
_mb_to_gb,
|
||||
),
|
||||
MetricType.ram_free: (
|
||||
"memory_free",
|
||||
"RAM Free (GB)",
|
||||
_AggregationType.sum,
|
||||
_mb_to_gb,
|
||||
),
|
||||
MetricType.gpu_ram_total: (
|
||||
"gpu_memory_total",
|
||||
"GPU RAM Total (GB)",
|
||||
_AggregationType.sum,
|
||||
_mb_to_gb,
|
||||
),
|
||||
MetricType.gpu_ram_used: (
|
||||
"gpu_memory_used",
|
||||
"GPU RAM Used (GB)",
|
||||
_AggregationType.sum,
|
||||
_mb_to_gb,
|
||||
),
|
||||
MetricType.gpu_ram_free: (
|
||||
"gpu_memory_free",
|
||||
"GPU RAM Free (GB)",
|
||||
_AggregationType.sum,
|
||||
_mb_to_gb,
|
||||
),
|
||||
MetricType.network_rx: (
|
||||
"network_rx",
|
||||
"Network Throughput RX (MBps)",
|
||||
_AggregationType.sum,
|
||||
None,
|
||||
),
|
||||
MetricType.network_tx: (
|
||||
"network_tx",
|
||||
"Network Throughput TX (MBps)",
|
||||
_AggregationType.sum,
|
||||
None,
|
||||
),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_endpoint_metrics(
|
||||
cls,
|
||||
company_id: str,
|
||||
metrics_request: GetEndpointMetricsHistoryRequest,
|
||||
) -> dict:
|
||||
from_date = metrics_request.from_date
|
||||
to_date = metrics_request.to_date
|
||||
if from_date >= to_date:
|
||||
raise errors.bad_request.FieldsValueError(
|
||||
"from_date must be less than to_date"
|
||||
)
|
||||
|
||||
metric_type = metrics_request.metric_type
|
||||
agg_data = cls.agg_fields.get(metric_type)
|
||||
if not agg_data:
|
||||
raise NotImplemented(f"Charts for {metric_type} not implemented")
|
||||
|
||||
agg_field, title, agg_type, multiplier = agg_data
|
||||
if agg_type == _AggregationType.sum:
|
||||
instance_sum_type = "sum_bucket"
|
||||
else:
|
||||
instance_sum_type = "avg_bucket"
|
||||
|
||||
interval = max(metrics_request.interval, cls.min_chart_interval)
|
||||
endpoint_url = metrics_request.endpoint_url
|
||||
hist_ret = {
|
||||
"computed_interval": interval,
|
||||
"total": {
|
||||
"title": title,
|
||||
"dates": [],
|
||||
"values": [],
|
||||
},
|
||||
"instances": {},
|
||||
}
|
||||
must_conditions = [
|
||||
QueryBuilder.term("company_id", company_id),
|
||||
QueryBuilder.term("endpoint_url", endpoint_url),
|
||||
QueryBuilder.dates_range(from_date, to_date),
|
||||
]
|
||||
query = {"bool": {"must": must_conditions}}
|
||||
es_index = f"{cls._serving_stats_prefix(company_id)}*"
|
||||
res = cls.es.search(
|
||||
index=es_index,
|
||||
size=0,
|
||||
query=query,
|
||||
aggs={"instances": {"terms": {"field": "container_id"}}},
|
||||
)
|
||||
instance_buckets = nested_get(res, ("aggregations", "instances", "buckets"))
|
||||
if not instance_buckets:
|
||||
return hist_ret
|
||||
|
||||
instance_keys = {ib["key"] for ib in instance_buckets}
|
||||
must_conditions.append(QueryBuilder.terms("container_id", instance_keys))
|
||||
query = {"bool": {"must": must_conditions}}
|
||||
sample_func = "avg" if metric_type != MetricType.requests else "max"
|
||||
aggs = {
|
||||
"instances": {
|
||||
"terms": {
|
||||
"field": "container_id",
|
||||
"size": max(len(instance_keys), 10),
|
||||
},
|
||||
"aggs": {
|
||||
"sample": {sample_func: {"field": agg_field}},
|
||||
},
|
||||
},
|
||||
"total_instances": {
|
||||
instance_sum_type: {
|
||||
"gap_policy": "insert_zeros",
|
||||
"buckets_path": "instances>sample",
|
||||
}
|
||||
},
|
||||
}
|
||||
hist_params = {}
|
||||
if metric_type == MetricType.requests:
|
||||
hist_params["min_doc_count"] = 1
|
||||
else:
|
||||
hist_params["extended_bounds"] = {
|
||||
"min": int(from_date) * 1000,
|
||||
"max": int(to_date) * 1000,
|
||||
}
|
||||
aggs = {
|
||||
"dates": {
|
||||
"date_histogram": {
|
||||
"field": "timestamp",
|
||||
"fixed_interval": f"{interval}s",
|
||||
**hist_params,
|
||||
},
|
||||
"aggs": aggs,
|
||||
}
|
||||
}
|
||||
|
||||
filter_path = None
|
||||
if not metrics_request.instance_charts:
|
||||
filter_path = "aggregations.dates.buckets.total_instances"
|
||||
|
||||
data = cls.es.search(
|
||||
index=es_index,
|
||||
size=0,
|
||||
query=query,
|
||||
aggs=aggs,
|
||||
filter_path=filter_path,
|
||||
)
|
||||
agg_res = data.get("aggregations")
|
||||
if not agg_res:
|
||||
return hist_ret
|
||||
|
||||
dates_ = []
|
||||
total = []
|
||||
instances = defaultdict(list)
|
||||
# remove last interval if it's incomplete. Allow 10% tolerance
|
||||
last_valid_timestamp = (to_date - 0.9 * interval) * 1000
|
||||
for point in agg_res["dates"]["buckets"]:
|
||||
date_ = point["key"]
|
||||
if date_ > last_valid_timestamp:
|
||||
break
|
||||
dates_.append(date_)
|
||||
total.append(nested_get(point, ("total_instances", "value"), 0))
|
||||
if metrics_request.instance_charts:
|
||||
found_keys = set()
|
||||
for instance in nested_get(point, ("instances", "buckets"), []):
|
||||
instances[instance["key"]].append(
|
||||
nested_get(instance, ("sample", "value"), 0)
|
||||
)
|
||||
found_keys.add(instance["key"])
|
||||
for missing_key in instance_keys - found_keys:
|
||||
instances[missing_key].append(0)
|
||||
|
||||
koeff = multiplier if multiplier else 1.0
|
||||
hist_ret["total"]["dates"] = dates_
|
||||
hist_ret["total"]["values"] = cls.round_series(total, koeff)
|
||||
hist_ret["instances"] = {
|
||||
key: {
|
||||
"title": key,
|
||||
"dates": dates_,
|
||||
"values": cls.round_series(values, koeff),
|
||||
}
|
||||
for key, values in sorted(instances.items(), key=lambda p: p[0])
|
||||
}
|
||||
|
||||
return hist_ret
|
||||
@@ -1,6 +1,6 @@
|
||||
from datetime import datetime
|
||||
import operator
|
||||
from threading import Thread, Lock
|
||||
from threading import Lock
|
||||
from time import sleep
|
||||
|
||||
import attr
|
||||
@@ -9,76 +9,83 @@ import psutil
|
||||
from apiserver.utilities.threads_manager import ThreadsManager
|
||||
|
||||
|
||||
class ResourceMonitor(Thread):
|
||||
@attr.s(auto_attribs=True)
|
||||
class Sample:
|
||||
cpu_usage: float = 0.0
|
||||
mem_used_gb: float = 0
|
||||
mem_free_gb: float = 0
|
||||
stat_threads = ThreadsManager("Statistics")
|
||||
|
||||
@classmethod
|
||||
def _apply(cls, op, *samples):
|
||||
return cls(
|
||||
**{
|
||||
field: op(*(getattr(sample, field) for sample in samples))
|
||||
for field in attr.fields_dict(cls)
|
||||
}
|
||||
)
|
||||
|
||||
def min(self, sample):
|
||||
return self._apply(min, self, sample)
|
||||
|
||||
def max(self, sample):
|
||||
return self._apply(max, self, sample)
|
||||
|
||||
def avg(self, sample, count):
|
||||
res = self._apply(lambda x: x * count, self)
|
||||
res = self._apply(operator.add, res, sample)
|
||||
res = self._apply(lambda x: x / (count + 1), res)
|
||||
return res
|
||||
|
||||
def __init__(self, sample_interval_sec=5):
|
||||
super(ResourceMonitor, self).__init__(daemon=True)
|
||||
self.sample_interval_sec = sample_interval_sec
|
||||
self._lock = Lock()
|
||||
self._clear()
|
||||
|
||||
def _clear(self):
|
||||
sample = self._get_sample()
|
||||
self._avg = sample
|
||||
self._min = sample
|
||||
self._max = sample
|
||||
self._clear_time = datetime.utcnow()
|
||||
self._count = 1
|
||||
@attr.s(auto_attribs=True)
|
||||
class Sample:
|
||||
cpu_usage: float = 0.0
|
||||
mem_used_gb: float = 0
|
||||
mem_free_gb: float = 0
|
||||
|
||||
@classmethod
|
||||
def _get_sample(cls) -> Sample:
|
||||
return cls.Sample(
|
||||
def _apply(cls, op, *samples):
|
||||
return cls(
|
||||
**{
|
||||
field: op(*(getattr(sample, field) for sample in samples))
|
||||
for field in attr.fields_dict(cls)
|
||||
}
|
||||
)
|
||||
|
||||
def min(self, sample):
|
||||
return self._apply(min, self, sample)
|
||||
|
||||
def max(self, sample):
|
||||
return self._apply(max, self, sample)
|
||||
|
||||
def avg(self, sample, count):
|
||||
res = self._apply(lambda x: x * count, self)
|
||||
res = self._apply(operator.add, res, sample)
|
||||
res = self._apply(lambda x: x / (count + 1), res)
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
def get_current_sample(cls) -> "Sample":
|
||||
return cls(
|
||||
cpu_usage=psutil.cpu_percent(),
|
||||
mem_used_gb=psutil.virtual_memory().used / (1024 ** 3),
|
||||
mem_free_gb=psutil.virtual_memory().free / (1024 ** 3),
|
||||
)
|
||||
|
||||
def run(self):
|
||||
while not ThreadsManager.terminating:
|
||||
sleep(self.sample_interval_sec)
|
||||
|
||||
sample = self._get_sample()
|
||||
class ResourceMonitor:
|
||||
class Accumulator:
|
||||
def __init__(self):
|
||||
sample = Sample.get_current_sample()
|
||||
self.avg = sample
|
||||
self.min = sample
|
||||
self.max = sample
|
||||
self.time = datetime.utcnow()
|
||||
self.count = 1
|
||||
|
||||
with self._lock:
|
||||
self._min = self._min.min(sample)
|
||||
self._max = self._max.max(sample)
|
||||
self._avg = self._avg.avg(sample, self._count)
|
||||
self._count += 1
|
||||
def add_sample(self, sample: Sample):
|
||||
self.min = self.min.min(sample)
|
||||
self.max = self.max.max(sample)
|
||||
self.avg = self.avg.avg(sample, self.count)
|
||||
self.count += 1
|
||||
|
||||
def get_stats(self) -> dict:
|
||||
sample_interval_sec = 5
|
||||
_lock = Lock()
|
||||
accumulator = Accumulator()
|
||||
|
||||
@classmethod
|
||||
@stat_threads.register("resource_monitor", daemon=True)
|
||||
def start(cls):
|
||||
while True:
|
||||
sleep(cls.sample_interval_sec)
|
||||
sample = Sample.get_current_sample()
|
||||
with cls._lock:
|
||||
cls.accumulator.add_sample(sample)
|
||||
|
||||
@classmethod
|
||||
def get_stats(cls) -> dict:
|
||||
""" Returns current resource statistics and clears internal resource statistics """
|
||||
with self._lock:
|
||||
min_ = attr.asdict(self._min)
|
||||
max_ = attr.asdict(self._max)
|
||||
avg = attr.asdict(self._avg)
|
||||
interval = datetime.utcnow() - self._clear_time
|
||||
self._clear()
|
||||
with cls._lock:
|
||||
min_ = attr.asdict(cls.accumulator.min)
|
||||
max_ = attr.asdict(cls.accumulator.max)
|
||||
avg = attr.asdict(cls.accumulator.avg)
|
||||
interval = datetime.utcnow() - cls.accumulator.time
|
||||
cls.accumulator = cls.Accumulator()
|
||||
|
||||
return {
|
||||
"interval_sec": interval.total_seconds(),
|
||||
|
||||
@@ -8,8 +8,7 @@ from typing import Sequence, Optional
|
||||
|
||||
import dpath
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
from requests.packages.urllib3.util.retry import Retry
|
||||
from requests.adapters import HTTPAdapter, Retry
|
||||
|
||||
from apiserver.bll.query import Builder as QueryBuilder
|
||||
from apiserver.bll.util import get_server_uuid
|
||||
@@ -19,11 +18,10 @@ from apiserver.config.info import get_deployment_type
|
||||
from apiserver.database.model import Company, User
|
||||
from apiserver.database.model.queue import Queue
|
||||
from apiserver.database.model.task.task import Task
|
||||
from apiserver.tools import safe_get
|
||||
from apiserver.utilities.dicts import nested_get
|
||||
from apiserver.utilities.json import dumps
|
||||
from apiserver.utilities.threads_manager import ThreadsManager
|
||||
from apiserver.version import __version__ as current_version
|
||||
from .resource_monitor import ResourceMonitor
|
||||
from .resource_monitor import ResourceMonitor, stat_threads
|
||||
|
||||
log = config.logger(__file__)
|
||||
|
||||
@@ -31,17 +29,19 @@ worker_bll = WorkerBLL()
|
||||
|
||||
|
||||
class StatisticsReporter:
|
||||
threads = ThreadsManager("Statistics", resource_monitor=ResourceMonitor)
|
||||
send_queue = queue.Queue()
|
||||
supported = config.get("apiserver.statistics.supported", True)
|
||||
|
||||
@classmethod
|
||||
def start(cls):
|
||||
if not cls.supported:
|
||||
return
|
||||
ResourceMonitor.start()
|
||||
cls.start_sender()
|
||||
cls.start_reporter()
|
||||
|
||||
@classmethod
|
||||
@threads.register("reporter", daemon=True)
|
||||
@stat_threads.register("reporter", daemon=True)
|
||||
def start_reporter(cls):
|
||||
"""
|
||||
Periodically send statistics reports for companies who have opted in.
|
||||
@@ -54,7 +54,7 @@ class StatisticsReporter:
|
||||
hours=config.get("apiserver.statistics.report_interval_hours", 24)
|
||||
)
|
||||
sleep(report_interval.total_seconds())
|
||||
while not ThreadsManager.terminating:
|
||||
while True:
|
||||
try:
|
||||
for company in Company.objects(
|
||||
defaults__stats_option__enabled=True
|
||||
@@ -68,7 +68,7 @@ class StatisticsReporter:
|
||||
sleep(report_interval.total_seconds())
|
||||
|
||||
@classmethod
|
||||
@threads.register("sender", daemon=True)
|
||||
@stat_threads.register("sender", daemon=True)
|
||||
def start_sender(cls):
|
||||
if not cls.supported:
|
||||
return
|
||||
@@ -85,7 +85,7 @@ class StatisticsReporter:
|
||||
|
||||
WarningFilter.attach()
|
||||
|
||||
while not ThreadsManager.terminating:
|
||||
while True:
|
||||
try:
|
||||
report = cls.send_queue.get()
|
||||
|
||||
@@ -111,7 +111,7 @@ class StatisticsReporter:
|
||||
"uuid": get_server_uuid(),
|
||||
"queues": {"count": Queue.objects(company=company_id).count()},
|
||||
"users": {"count": User.objects(company=company_id).count()},
|
||||
"resources": cls.threads.resource_monitor.get_stats(),
|
||||
"resources": ResourceMonitor.get_stats(),
|
||||
"experiments": next(
|
||||
iter(cls._get_experiments_stats(company_id).values()), {}
|
||||
),
|
||||
@@ -162,7 +162,7 @@ class StatisticsReporter:
|
||||
def _get_cardinality_fields(categories: Sequence[dict]) -> dict:
|
||||
names = {"cpu": "num_cores"}
|
||||
return {
|
||||
names[c["key"]]: safe_get(c, "count/value")
|
||||
names[c["key"]]: nested_get(c, ("count", "value"))
|
||||
for c in categories
|
||||
if c["key"] in names
|
||||
}
|
||||
@@ -175,21 +175,21 @@ class StatisticsReporter:
|
||||
}
|
||||
return {
|
||||
names[m["key"]]: {
|
||||
"min": safe_get(m, "min/value"),
|
||||
"max": safe_get(m, "max/value"),
|
||||
"avg": safe_get(m, "avg/value"),
|
||||
"min": nested_get(m, ("min", "value")),
|
||||
"max": nested_get(m, ("max", "value")),
|
||||
"avg": nested_get(m, ("avg", "value")),
|
||||
}
|
||||
for m in metrics
|
||||
if m["key"] in names
|
||||
}
|
||||
|
||||
buckets = safe_get(res, "aggregations/workers/buckets", default=[])
|
||||
buckets = nested_get(res, ("aggregations", "workers", "buckets"), default=[])
|
||||
return {
|
||||
b["key"]: {
|
||||
key: {
|
||||
"interval_sec": agent_resource_threshold_sec,
|
||||
**_get_cardinality_fields(safe_get(b, "categories/buckets", [])),
|
||||
**_get_metric_fields(safe_get(b, "metrics/buckets", [])),
|
||||
**_get_cardinality_fields(nested_get(b, ("categories", "buckets"), [])),
|
||||
**_get_metric_fields(nested_get(b, ("metrics", "buckets"), [])),
|
||||
}
|
||||
}
|
||||
for b in buckets
|
||||
@@ -227,7 +227,7 @@ class StatisticsReporter:
|
||||
},
|
||||
}
|
||||
res = cls._run_worker_stats_query(company_id, es_req)
|
||||
buckets = safe_get(res, "aggregations/workers/buckets", default=[])
|
||||
buckets = nested_get(res, ("aggregations", "workers", "buckets"), default=[])
|
||||
return {
|
||||
b["key"]: {"last_activity_time": b["last_activity_time"]["value"]}
|
||||
for b in buckets
|
||||
@@ -254,6 +254,14 @@ class StatisticsReporter:
|
||||
**({"last_worker": {"$in": workers}} if workers else {}),
|
||||
}
|
||||
},
|
||||
{
|
||||
"$project": {
|
||||
"last_worker": 1,
|
||||
"last_update": 1,
|
||||
"started": 1,
|
||||
"last_iteration": 1,
|
||||
}
|
||||
},
|
||||
{
|
||||
"$group": {
|
||||
"_id": "$last_worker" if workers else None,
|
||||
|
||||
273
apiserver/bll/storage/__init__.py
Normal file
273
apiserver/bll/storage/__init__.py
Normal file
@@ -0,0 +1,273 @@
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from copy import copy
|
||||
from datetime import datetime
|
||||
from typing import Optional, Sequence
|
||||
|
||||
import attr
|
||||
from boltons.cacheutils import cachedproperty
|
||||
from clearml.backend_config.bucket_config import (
|
||||
S3BucketConfigurations,
|
||||
AzureContainerConfigurations,
|
||||
GSBucketConfigurations,
|
||||
AzureContainerConfig,
|
||||
GSBucketConfig,
|
||||
S3BucketConfig,
|
||||
)
|
||||
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.apimodels.storage import SetSettingsRequest
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.model.storage_settings import (
|
||||
StorageSettings,
|
||||
GoogleBucketSettings,
|
||||
AWSSettings,
|
||||
AzureStorageSettings,
|
||||
GoogleStorageSettings,
|
||||
)
|
||||
from apiserver.database.utils import id as db_id
|
||||
|
||||
log = config.logger(__file__)
|
||||
|
||||
|
||||
class StorageBLL:
|
||||
default_aws_configs: S3BucketConfigurations = None
|
||||
conf = config.get("services.storage_credentials")
|
||||
|
||||
@cachedproperty
|
||||
def _default_aws_configs(self) -> S3BucketConfigurations:
|
||||
return S3BucketConfigurations.from_config(self.conf.get("aws.s3"))
|
||||
|
||||
@cachedproperty
|
||||
def _default_azure_configs(self) -> AzureContainerConfigurations:
|
||||
return AzureContainerConfigurations.from_config(self.conf.get("azure.storage"))
|
||||
|
||||
@cachedproperty
|
||||
def _default_gs_configs(self) -> GSBucketConfigurations:
|
||||
return GSBucketConfigurations.from_config(self.conf.get("google.storage"))
|
||||
|
||||
def get_azure_settings_for_company(
|
||||
self,
|
||||
company_id: str,
|
||||
db_settings: StorageSettings = None,
|
||||
query_db: bool = True,
|
||||
) -> AzureContainerConfigurations:
|
||||
if not db_settings and query_db:
|
||||
db_settings = (
|
||||
StorageSettings.objects(company=company_id).only("azure").first()
|
||||
)
|
||||
|
||||
if not db_settings or not db_settings.azure:
|
||||
return copy(self._default_azure_configs)
|
||||
|
||||
azure = db_settings.azure
|
||||
return AzureContainerConfigurations(
|
||||
container_configs=[
|
||||
AzureContainerConfig(**entry.to_proper_dict())
|
||||
for entry in (azure.containers or [])
|
||||
]
|
||||
)
|
||||
|
||||
def get_gs_settings_for_company(
|
||||
self,
|
||||
company_id: str,
|
||||
db_settings: StorageSettings = None,
|
||||
query_db: bool = True,
|
||||
json_string: bool = False,
|
||||
) -> GSBucketConfigurations:
|
||||
if not db_settings and query_db:
|
||||
db_settings = (
|
||||
StorageSettings.objects(company=company_id).only("google").first()
|
||||
)
|
||||
|
||||
if not db_settings or not db_settings.google:
|
||||
if not json_string:
|
||||
return copy(self._default_gs_configs)
|
||||
|
||||
if self._default_gs_configs._buckets:
|
||||
buckets = [
|
||||
attr.evolve(
|
||||
b,
|
||||
credentials_json=self._assure_json_string(b.credentials_json),
|
||||
)
|
||||
for b in self._default_gs_configs._buckets
|
||||
]
|
||||
else:
|
||||
buckets = self._default_gs_configs._buckets
|
||||
|
||||
return GSBucketConfigurations(
|
||||
buckets=buckets,
|
||||
default_project=self._default_gs_configs._default_project,
|
||||
default_credentials=self._assure_json_string(
|
||||
self._default_gs_configs._default_credentials
|
||||
),
|
||||
)
|
||||
|
||||
def get_bucket_config(bc: GoogleBucketSettings) -> GSBucketConfig:
|
||||
data = bc.to_proper_dict()
|
||||
if not json_string and bc.credentials_json:
|
||||
data["credentials_json"] = self._assure_json_file(bc.credentials_json)
|
||||
return GSBucketConfig(**data)
|
||||
|
||||
google = db_settings.google
|
||||
buckets_configs = [get_bucket_config(b) for b in (google.buckets or [])]
|
||||
return GSBucketConfigurations(
|
||||
buckets=buckets_configs,
|
||||
default_project=google.project,
|
||||
default_credentials=google.credentials_json
|
||||
if json_string
|
||||
else self._assure_json_file(google.credentials_json),
|
||||
)
|
||||
|
||||
def get_aws_settings_for_company(
|
||||
self,
|
||||
company_id: str,
|
||||
db_settings: StorageSettings = None,
|
||||
query_db: bool = True,
|
||||
) -> S3BucketConfigurations:
|
||||
if not db_settings and query_db:
|
||||
db_settings = (
|
||||
StorageSettings.objects(company=company_id).only("aws").first()
|
||||
)
|
||||
if not db_settings or not db_settings.aws:
|
||||
return copy(self._default_aws_configs)
|
||||
|
||||
aws = db_settings.aws
|
||||
buckets_configs = S3BucketConfig.from_list(
|
||||
[b.to_proper_dict() for b in (aws.buckets or [])]
|
||||
)
|
||||
return S3BucketConfigurations(
|
||||
buckets=buckets_configs,
|
||||
default_key=aws.key,
|
||||
default_secret=aws.secret,
|
||||
default_region=aws.region,
|
||||
default_use_credentials_chain=aws.use_credentials_chain,
|
||||
default_token=aws.token,
|
||||
default_extra_args={},
|
||||
)
|
||||
|
||||
def _assure_json_file(self, name_or_content: str) -> str:
|
||||
if not name_or_content:
|
||||
return name_or_content
|
||||
|
||||
if name_or_content.endswith(".json") or os.path.exists(name_or_content):
|
||||
return name_or_content
|
||||
|
||||
try:
|
||||
json.loads(name_or_content)
|
||||
except Exception:
|
||||
return name_or_content
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="wt", delete=False, suffix=".json"
|
||||
) as tmp:
|
||||
tmp.write(name_or_content)
|
||||
|
||||
return tmp.name
|
||||
|
||||
def _assure_json_string(self, name_or_content: str) -> Optional[str]:
|
||||
if not name_or_content:
|
||||
return name_or_content
|
||||
|
||||
try:
|
||||
json.loads(name_or_content)
|
||||
return name_or_content
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
with open(name_or_content) as fp:
|
||||
return fp.read()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def get_company_settings(self, company_id: str) -> dict:
|
||||
db_settings = StorageSettings.objects(company=company_id).first()
|
||||
aws = self.get_aws_settings_for_company(company_id, db_settings, query_db=False)
|
||||
aws_dict = {
|
||||
"key": aws._default_key,
|
||||
"secret": aws._default_secret,
|
||||
"token": aws._default_token,
|
||||
"region": aws._default_region,
|
||||
"use_credentials_chain": aws._default_use_credentials_chain,
|
||||
"buckets": [attr.asdict(b) for b in aws._buckets],
|
||||
}
|
||||
|
||||
gs = self.get_gs_settings_for_company(
|
||||
company_id, db_settings, query_db=False, json_string=True
|
||||
)
|
||||
gs_dict = {
|
||||
"project": gs._default_project,
|
||||
"credentials_json": gs._default_credentials,
|
||||
"buckets": [attr.asdict(b) for b in gs._buckets],
|
||||
}
|
||||
|
||||
azure = self.get_azure_settings_for_company(company_id, db_settings)
|
||||
azure_dict = {
|
||||
"containers": [attr.asdict(ac) for ac in azure._container_configs],
|
||||
}
|
||||
|
||||
return {
|
||||
"aws": aws_dict,
|
||||
"google": gs_dict,
|
||||
"azure": azure_dict,
|
||||
"last_update": db_settings.last_update if db_settings else None,
|
||||
}
|
||||
|
||||
def set_company_settings(
|
||||
self, company_id: str, settings: SetSettingsRequest
|
||||
) -> int:
|
||||
update_dict = {}
|
||||
if settings.aws:
|
||||
update_dict["aws"] = {
|
||||
**{
|
||||
k: v
|
||||
for k, v in settings.aws.to_struct().items()
|
||||
if k in AWSSettings.get_fields()
|
||||
}
|
||||
}
|
||||
|
||||
if settings.azure:
|
||||
update_dict["azure"] = {
|
||||
**{
|
||||
k: v
|
||||
for k, v in settings.azure.to_struct().items()
|
||||
if k in AzureStorageSettings.get_fields()
|
||||
}
|
||||
}
|
||||
|
||||
if settings.google:
|
||||
update_dict["google"] = {
|
||||
**{
|
||||
k: v
|
||||
for k, v in settings.google.to_struct().items()
|
||||
if k in GoogleStorageSettings.get_fields()
|
||||
}
|
||||
}
|
||||
cred_json = update_dict["google"].get("credentials_json")
|
||||
if cred_json:
|
||||
try:
|
||||
json.loads(cred_json)
|
||||
except Exception as ex:
|
||||
raise errors.bad_request.ValidationError(
|
||||
f"Invalid json credentials: {str(ex)}"
|
||||
)
|
||||
|
||||
if not update_dict:
|
||||
raise errors.bad_request.ValidationError("No settings were provided")
|
||||
|
||||
settings = StorageSettings.objects(company=company_id).only("id").first()
|
||||
settings_id = settings.id if settings else db_id()
|
||||
return StorageSettings.objects(id=settings_id).update(
|
||||
upsert=True,
|
||||
id=settings_id,
|
||||
company=company_id,
|
||||
last_update=datetime.utcnow(),
|
||||
**update_dict,
|
||||
)
|
||||
|
||||
def reset_company_settings(self, company_id: str, keys: Sequence[str]) -> int:
|
||||
return StorageSettings.objects(company=company_id).update(
|
||||
last_update=datetime.utcnow(), **{f"unset__{k}": 1 for k in keys}
|
||||
)
|
||||
@@ -1,6 +1,5 @@
|
||||
from .task_bll import TaskBLL
|
||||
from .utils import (
|
||||
ChangeStatusRequest,
|
||||
update_project_time,
|
||||
validate_status_change,
|
||||
)
|
||||
|
||||
@@ -5,7 +5,7 @@ from apiserver.apimodels.tasks import Artifact as ApiArtifact, ArtifactId
|
||||
from apiserver.bll.task.utils import get_task_for_update, update_task
|
||||
from apiserver.database.model.task.task import DEFAULT_ARTIFACT_MODE, Artifact
|
||||
from apiserver.database.utils import hash_field_name
|
||||
from apiserver.timing_context import TimingContext
|
||||
from apiserver.service_repo.auth import Identity
|
||||
from apiserver.utilities.dicts import nested_get, nested_set
|
||||
from apiserver.utilities.parameter_key_escaper import mongoengine_safe
|
||||
|
||||
@@ -49,49 +49,45 @@ class Artifacts:
|
||||
def add_or_update_artifacts(
|
||||
cls,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
task_id: str,
|
||||
artifacts: Sequence[ApiArtifact],
|
||||
force: bool,
|
||||
) -> int:
|
||||
with TimingContext("mongo", "update_artifacts"):
|
||||
task = get_task_for_update(
|
||||
company_id=company_id,
|
||||
task_id=task_id,
|
||||
force=force,
|
||||
)
|
||||
task = get_task_for_update(
|
||||
company_id=company_id, task_id=task_id, force=force, identity=identity
|
||||
)
|
||||
|
||||
artifacts = {
|
||||
get_artifact_id(a): Artifact(**a)
|
||||
for a in (api_artifact.to_struct() for api_artifact in artifacts)
|
||||
}
|
||||
artifacts = {
|
||||
get_artifact_id(a): Artifact(**a)
|
||||
for a in (api_artifact.to_struct() for api_artifact in artifacts)
|
||||
}
|
||||
|
||||
update_cmds = {
|
||||
f"set__execution__artifacts__{mongoengine_safe(name)}": value
|
||||
for name, value in artifacts.items()
|
||||
}
|
||||
return update_task(task, update_cmds=update_cmds)
|
||||
update_cmds = {
|
||||
f"set__execution__artifacts__{mongoengine_safe(name)}": value
|
||||
for name, value in artifacts.items()
|
||||
}
|
||||
return update_task(task, user_id=identity.user, update_cmds=update_cmds)
|
||||
|
||||
@classmethod
|
||||
def delete_artifacts(
|
||||
cls,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
task_id: str,
|
||||
artifact_ids: Sequence[ArtifactId],
|
||||
force: bool,
|
||||
) -> int:
|
||||
with TimingContext("mongo", "delete_artifacts"):
|
||||
task = get_task_for_update(
|
||||
company_id=company_id,
|
||||
task_id=task_id,
|
||||
force=force,
|
||||
)
|
||||
task = get_task_for_update(
|
||||
company_id=company_id, task_id=task_id, force=force, identity=identity
|
||||
)
|
||||
|
||||
artifact_ids = [
|
||||
get_artifact_id(a)
|
||||
for a in (artifact_id.to_struct() for artifact_id in artifact_ids)
|
||||
]
|
||||
delete_cmds = {
|
||||
f"unset__execution__artifacts__{id_}": 1 for id_ in set(artifact_ids)
|
||||
}
|
||||
artifact_ids = [
|
||||
get_artifact_id(a)
|
||||
for a in (artifact_id.to_struct() for artifact_id in artifact_ids)
|
||||
]
|
||||
delete_cmds = {
|
||||
f"unset__execution__artifacts__{id_}": 1 for id_ in set(artifact_ids)
|
||||
}
|
||||
|
||||
return update_task(task, update_cmds=delete_cmds)
|
||||
return update_task(task, user_id=identity.user, update_cmds=delete_cmds)
|
||||
|
||||
@@ -15,7 +15,7 @@ from apiserver.bll.task import TaskBLL
|
||||
from apiserver.bll.task.utils import get_task_for_update, update_task
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.model.task.task import ParamsItem, Task, ConfigurationItem
|
||||
from apiserver.timing_context import TimingContext
|
||||
from apiserver.service_repo.auth import Identity
|
||||
from apiserver.utilities.parameter_key_escaper import (
|
||||
ParameterKeyEscaper,
|
||||
mongoengine_safe,
|
||||
@@ -32,7 +32,10 @@ class HyperParams:
|
||||
def get_params(cls, company_id: str, task_ids: Sequence[str]) -> Dict[str, dict]:
|
||||
only = ("id", "hyperparams")
|
||||
tasks = task_bll.assert_exists(
|
||||
company_id=company_id, task_ids=task_ids, only=only, allow_public=True,
|
||||
company_id=company_id,
|
||||
task_ids=task_ids,
|
||||
only=only,
|
||||
allow_public=True,
|
||||
)
|
||||
|
||||
return {
|
||||
@@ -64,78 +67,84 @@ class HyperParams:
|
||||
def delete_params(
|
||||
cls,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
task_id: str,
|
||||
hyperparams: Sequence[HyperParamKey],
|
||||
force: bool,
|
||||
) -> int:
|
||||
with TimingContext("mongo", "delete_hyperparams"):
|
||||
properties_only = cls._normalize_params(hyperparams)
|
||||
task = get_task_for_update(
|
||||
company_id=company_id,
|
||||
task_id=task_id,
|
||||
allow_all_statuses=properties_only,
|
||||
force=force,
|
||||
)
|
||||
properties_only = cls._normalize_params(hyperparams)
|
||||
task = get_task_for_update(
|
||||
company_id=company_id,
|
||||
task_id=task_id,
|
||||
allow_all_statuses=properties_only,
|
||||
force=force,
|
||||
identity=identity,
|
||||
)
|
||||
|
||||
with_param, without_param = iterutils.partition(
|
||||
hyperparams, key=lambda p: bool(p.name)
|
||||
)
|
||||
sections_to_delete = {p.section for p in without_param}
|
||||
delete_cmds = {
|
||||
f"unset__hyperparams__{ParameterKeyEscaper.escape(section)}": 1
|
||||
for section in sections_to_delete
|
||||
}
|
||||
with_param, without_param = iterutils.partition(
|
||||
hyperparams, key=lambda p: bool(p.name)
|
||||
)
|
||||
sections_to_delete = {p.section for p in without_param}
|
||||
delete_cmds = {
|
||||
f"unset__hyperparams__{ParameterKeyEscaper.escape(section)}": 1
|
||||
for section in sections_to_delete
|
||||
}
|
||||
|
||||
for item in with_param:
|
||||
section = ParameterKeyEscaper.escape(item.section)
|
||||
if item.section in sections_to_delete:
|
||||
raise errors.bad_request.FieldsConflict(
|
||||
"Cannot delete section field if the whole section was scheduled for deletion"
|
||||
)
|
||||
name = ParameterKeyEscaper.escape(item.name)
|
||||
delete_cmds[f"unset__hyperparams__{section}__{name}"] = 1
|
||||
for item in with_param:
|
||||
section = ParameterKeyEscaper.escape(item.section)
|
||||
if item.section in sections_to_delete:
|
||||
raise errors.bad_request.FieldsConflict(
|
||||
"Cannot delete section field if the whole section was scheduled for deletion"
|
||||
)
|
||||
name = ParameterKeyEscaper.escape(item.name)
|
||||
delete_cmds[f"unset__hyperparams__{section}__{name}"] = 1
|
||||
|
||||
return update_task(
|
||||
task, update_cmds=delete_cmds, set_last_update=not properties_only
|
||||
)
|
||||
return update_task(
|
||||
task,
|
||||
user_id=identity.user,
|
||||
update_cmds=delete_cmds,
|
||||
set_last_update=not properties_only,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def edit_params(
|
||||
cls,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
task_id: str,
|
||||
hyperparams: Sequence[HyperParamItem],
|
||||
replace_hyperparams: str,
|
||||
force: bool,
|
||||
) -> int:
|
||||
with TimingContext("mongo", "edit_hyperparams"):
|
||||
properties_only = cls._normalize_params(hyperparams)
|
||||
task = get_task_for_update(
|
||||
company_id=company_id,
|
||||
task_id=task_id,
|
||||
allow_all_statuses=properties_only,
|
||||
force=force,
|
||||
)
|
||||
properties_only = cls._normalize_params(hyperparams)
|
||||
task = get_task_for_update(
|
||||
company_id=company_id,
|
||||
task_id=task_id,
|
||||
allow_all_statuses=properties_only,
|
||||
force=force,
|
||||
identity=identity,
|
||||
)
|
||||
|
||||
update_cmds = dict()
|
||||
hyperparams = cls._db_dicts_from_list(hyperparams)
|
||||
if replace_hyperparams == ReplaceHyperparams.all:
|
||||
update_cmds["set__hyperparams"] = hyperparams
|
||||
elif replace_hyperparams == ReplaceHyperparams.section:
|
||||
for section, value in hyperparams.items():
|
||||
update_cmds = dict()
|
||||
hyperparams = cls._db_dicts_from_list(hyperparams)
|
||||
if replace_hyperparams == ReplaceHyperparams.all:
|
||||
update_cmds["set__hyperparams"] = hyperparams
|
||||
elif replace_hyperparams == ReplaceHyperparams.section:
|
||||
for section, value in hyperparams.items():
|
||||
update_cmds[f"set__hyperparams__{mongoengine_safe(section)}"] = value
|
||||
else:
|
||||
for section, section_params in hyperparams.items():
|
||||
for name, value in section_params.items():
|
||||
update_cmds[
|
||||
f"set__hyperparams__{mongoengine_safe(section)}"
|
||||
f"set__hyperparams__{section}__{mongoengine_safe(name)}"
|
||||
] = value
|
||||
else:
|
||||
for section, section_params in hyperparams.items():
|
||||
for name, value in section_params.items():
|
||||
update_cmds[
|
||||
f"set__hyperparams__{section}__{mongoengine_safe(name)}"
|
||||
] = value
|
||||
|
||||
return update_task(
|
||||
task, update_cmds=update_cmds, set_last_update=not properties_only
|
||||
)
|
||||
return update_task(
|
||||
task,
|
||||
user_id=identity.user,
|
||||
update_cmds=update_cmds,
|
||||
set_last_update=not properties_only,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _db_dicts_from_list(cls, items: Sequence[HyperParamItem]) -> Dict[str, dict]:
|
||||
@@ -160,7 +169,10 @@ class HyperParams:
|
||||
else:
|
||||
only.append("configuration")
|
||||
tasks = task_bll.assert_exists(
|
||||
company_id=company_id, task_ids=task_ids, only=only, allow_public=True,
|
||||
company_id=company_id,
|
||||
task_ids=task_ids,
|
||||
only=only,
|
||||
allow_public=True,
|
||||
)
|
||||
|
||||
return {
|
||||
@@ -181,7 +193,7 @@ class HyperParams:
|
||||
pipeline = [
|
||||
{
|
||||
"$match": {
|
||||
"company": {"$in": [None, "", company_id]},
|
||||
"company": {"$in": ["", company_id]},
|
||||
"_id": {"$in": task_ids},
|
||||
}
|
||||
},
|
||||
@@ -191,57 +203,60 @@ class HyperParams:
|
||||
{"$group": {"_id": "$_id", "names": {"$addToSet": "$items.k"}}},
|
||||
]
|
||||
|
||||
with TimingContext("mongo", "get_configuration_names"):
|
||||
tasks = Task.aggregate(pipeline)
|
||||
tasks = Task.aggregate(pipeline)
|
||||
|
||||
return {
|
||||
task["_id"]: {
|
||||
"names": sorted(
|
||||
ParameterKeyEscaper.unescape(name) for name in task["names"]
|
||||
)
|
||||
}
|
||||
for task in tasks
|
||||
return {
|
||||
task["_id"]: {
|
||||
"names": sorted(
|
||||
ParameterKeyEscaper.unescape(name) for name in task["names"]
|
||||
)
|
||||
}
|
||||
for task in tasks
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def edit_configuration(
|
||||
cls,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
task_id: str,
|
||||
configuration: Sequence[Configuration],
|
||||
replace_configuration: bool,
|
||||
force: bool,
|
||||
) -> int:
|
||||
with TimingContext("mongo", "edit_configuration"):
|
||||
task = get_task_for_update(
|
||||
company_id=company_id, task_id=task_id, force=force
|
||||
)
|
||||
task = get_task_for_update(
|
||||
company_id=company_id, task_id=task_id, force=force, identity=identity
|
||||
)
|
||||
|
||||
update_cmds = dict()
|
||||
configuration = {
|
||||
ParameterKeyEscaper.escape(c.name): ConfigurationItem(**c.to_struct())
|
||||
for c in configuration
|
||||
}
|
||||
if replace_configuration:
|
||||
update_cmds["set__configuration"] = configuration
|
||||
else:
|
||||
for name, value in configuration.items():
|
||||
update_cmds[f"set__configuration__{mongoengine_safe(name)}"] = value
|
||||
update_cmds = dict()
|
||||
configuration = {
|
||||
ParameterKeyEscaper.escape(c.name): ConfigurationItem(**c.to_struct())
|
||||
for c in configuration
|
||||
}
|
||||
if replace_configuration:
|
||||
update_cmds["set__configuration"] = configuration
|
||||
else:
|
||||
for name, value in configuration.items():
|
||||
update_cmds[f"set__configuration__{mongoengine_safe(name)}"] = value
|
||||
|
||||
return update_task(task, update_cmds=update_cmds)
|
||||
return update_task(task, user_id=identity.user, update_cmds=update_cmds)
|
||||
|
||||
@classmethod
|
||||
def delete_configuration(
|
||||
cls, company_id: str, task_id: str, configuration: Sequence[str], force: bool
|
||||
cls,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
task_id: str,
|
||||
configuration: Sequence[str],
|
||||
force: bool,
|
||||
) -> int:
|
||||
with TimingContext("mongo", "delete_configuration"):
|
||||
task = get_task_for_update(
|
||||
company_id=company_id, task_id=task_id, force=force
|
||||
)
|
||||
task = get_task_for_update(
|
||||
company_id=company_id, task_id=task_id, force=force, identity=identity
|
||||
)
|
||||
|
||||
delete_cmds = {
|
||||
f"unset__configuration__{ParameterKeyEscaper.escape(name)}": 1
|
||||
for name in set(configuration)
|
||||
}
|
||||
delete_cmds = {
|
||||
f"unset__configuration__{ParameterKeyEscaper.escape(name)}": 1
|
||||
for name in set(configuration)
|
||||
}
|
||||
|
||||
return update_task(task, update_cmds=delete_cmds)
|
||||
return update_task(task, user_id=identity.user, update_cmds=delete_cmds)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from datetime import timedelta, datetime
|
||||
from time import sleep
|
||||
|
||||
from apiserver.bll.task import update_project_time
|
||||
from apiserver.bll.util import update_project_time
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.model.task.task import TaskStatus, Task
|
||||
from apiserver.utilities.threads_manager import ThreadsManager
|
||||
@@ -39,7 +39,7 @@ class NonResponsiveTasksWatchdog:
|
||||
@threads.register("non_responsive_tasks_watchdog", daemon=True)
|
||||
def start(cls):
|
||||
sleep(cls.settings.watch_interval_sec)
|
||||
while not ThreadsManager.terminating:
|
||||
while True:
|
||||
watch_interval = cls.settings.watch_interval_sec
|
||||
if cls.settings.enabled:
|
||||
try:
|
||||
@@ -85,6 +85,7 @@ class NonResponsiveTasksWatchdog:
|
||||
status_changed=now,
|
||||
last_update=now,
|
||||
last_change=now,
|
||||
last_changed_by="__apiserver__",
|
||||
)
|
||||
if updated:
|
||||
project_ids.add(task.project)
|
||||
|
||||
@@ -121,18 +121,31 @@ def params_prepare_for_save(fields: dict, previous_task: Task = None):
|
||||
nested_set(fields, new_path, new_param)
|
||||
nested_delete(fields, old_params_field)
|
||||
|
||||
for param_field in ("hyperparams", "configuration"):
|
||||
params = fields.get(param_field)
|
||||
if params:
|
||||
escaped_params = {
|
||||
ParameterKeyEscaper.escape(key): {
|
||||
ParameterKeyEscaper.escape(k): v for k, v in value.items()
|
||||
}
|
||||
if isinstance(value, dict)
|
||||
else value
|
||||
for key, value in params.items()
|
||||
def ensure_non_empty(k: str, desc: str) -> str:
|
||||
if not k:
|
||||
raise errors.bad_request.ValidationError(
|
||||
f"Empty {desc} name is not allowed"
|
||||
)
|
||||
return k
|
||||
|
||||
params = fields.get("hyperparams")
|
||||
if params:
|
||||
escaped_params = {
|
||||
ParameterKeyEscaper.escape(ensure_non_empty(key, "section")): {
|
||||
ParameterKeyEscaper.escape(ensure_non_empty(k, "parameter")): v
|
||||
for k, v in value.items()
|
||||
}
|
||||
fields[param_field] = escaped_params
|
||||
for key, value in params.items()
|
||||
}
|
||||
fields["hyperparams"] = escaped_params
|
||||
|
||||
params = fields.get("configuration")
|
||||
if params:
|
||||
escaped_params = {
|
||||
ParameterKeyEscaper.escape(ensure_non_empty(key, "configuration")): value
|
||||
for key, value in params.items()
|
||||
}
|
||||
fields["configuration"] = escaped_params
|
||||
|
||||
|
||||
def params_unprepare_from_saved(fields, copy_to_legacy=False):
|
||||
@@ -186,7 +199,7 @@ def escape_paths(paths: Sequence[str]) -> Sequence[str]:
|
||||
for old_prefix, new_prefix in (
|
||||
("execution.parameters", f"hyperparams.{hyperparams_default_section}"),
|
||||
("execution.model_desc", "configuration"),
|
||||
("execution.docker_cmd", "container")
|
||||
("execution.docker_cmd", "container"),
|
||||
):
|
||||
path: str
|
||||
paths = [path.replace(old_prefix, new_prefix) for path in paths]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from datetime import datetime
|
||||
from typing import Collection, Sequence, Tuple, Any, Optional, Dict
|
||||
from typing import Collection, Sequence, Tuple, Optional, Dict
|
||||
|
||||
import six
|
||||
from mongoengine import Q
|
||||
@@ -7,11 +7,12 @@ from redis import StrictRedis
|
||||
from six import string_types
|
||||
|
||||
import apiserver.database.utils as dbutils
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.apierrors import errors, APIError
|
||||
from apiserver.apimodels.tasks import TaskInputModel
|
||||
from apiserver.bll.queue import QueueBLL
|
||||
from apiserver.bll.organization import OrgBLL, Tags
|
||||
from apiserver.bll.project import ProjectBLL
|
||||
from apiserver.bll.util import update_project_time
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.database.model.model import Model
|
||||
@@ -30,18 +31,21 @@ from apiserver.database.model.task.task import (
|
||||
TaskModelTypes,
|
||||
)
|
||||
from apiserver.database.model import EntityVisibility
|
||||
from apiserver.database.utils import get_company_or_none_constraint, id as create_id
|
||||
from apiserver.database.model.queue import Queue
|
||||
from apiserver.database.utils import (
|
||||
get_company_or_none_constraint,
|
||||
id as create_id,
|
||||
)
|
||||
from apiserver.es_factory import es_factory
|
||||
from apiserver.redis_manager import redman
|
||||
from apiserver.service_repo import APICall
|
||||
from apiserver.services.utils import validate_tags, escape_dict_field, escape_dict
|
||||
from apiserver.timing_context import TimingContext
|
||||
from apiserver.utilities.dicts import nested_set
|
||||
from .artifacts import artifacts_prepare_for_save
|
||||
from .param_utils import params_prepare_for_save
|
||||
from .utils import (
|
||||
ChangeStatusRequest,
|
||||
update_project_time,
|
||||
deleted_prefix,
|
||||
get_last_metric_updates,
|
||||
)
|
||||
|
||||
log = config.logger(__file__)
|
||||
@@ -55,31 +59,13 @@ class TaskBLL:
|
||||
self.events_es = events_es or es_factory.connect("events")
|
||||
self.redis: StrictRedis = redis or redman.connection("apiserver")
|
||||
|
||||
@staticmethod
|
||||
def get_task_with_access(
|
||||
task_id, company_id, only=None, allow_public=False, requires_write_access=False
|
||||
) -> Task:
|
||||
"""
|
||||
Gets a task that has a required write access
|
||||
:except errors.bad_request.InvalidTaskId: if the task is not found
|
||||
:except errors.forbidden.NoWritePermission: if write_access was required and the task cannot be modified
|
||||
"""
|
||||
with translate_errors_context():
|
||||
query = dict(id=task_id, company=company_id)
|
||||
with TimingContext("mongo", "task_with_access"):
|
||||
if requires_write_access:
|
||||
task = Task.get_for_writing(_only=only, **query)
|
||||
else:
|
||||
task = Task.get(_only=only, **query, include_public=allow_public)
|
||||
|
||||
if not task:
|
||||
raise errors.bad_request.InvalidTaskId(**query)
|
||||
|
||||
return task
|
||||
|
||||
@staticmethod
|
||||
def get_by_id(
|
||||
company_id, task_id, required_status=None, only_fields=None, allow_public=False,
|
||||
company_id,
|
||||
task_id,
|
||||
required_status=None,
|
||||
only_fields=None,
|
||||
allow_public=False,
|
||||
):
|
||||
if only_fields:
|
||||
if isinstance(only_fields, string_types):
|
||||
@@ -88,15 +74,14 @@ class TaskBLL:
|
||||
only_fields = list(only_fields)
|
||||
only_fields = only_fields + ["status"]
|
||||
|
||||
with TimingContext("mongo", "task_by_id_all"):
|
||||
tasks = Task.get_many(
|
||||
company=company_id,
|
||||
query=Q(id=task_id),
|
||||
allow_public=allow_public,
|
||||
override_projection=only_fields,
|
||||
return_dicts=False,
|
||||
)
|
||||
task = None if not tasks else tasks[0]
|
||||
tasks = Task.get_many(
|
||||
company=company_id,
|
||||
query=Q(id=task_id),
|
||||
allow_public=allow_public,
|
||||
override_projection=only_fields,
|
||||
return_dicts=False,
|
||||
)
|
||||
task = None if not tasks else tasks[0]
|
||||
|
||||
if not task:
|
||||
raise errors.bad_request.InvalidTaskId(id=task_id)
|
||||
@@ -111,7 +96,7 @@ class TaskBLL:
|
||||
company_id, task_ids, only=None, allow_public=False, return_tasks=True
|
||||
) -> Optional[Sequence[Task]]:
|
||||
task_ids = [task_ids] if isinstance(task_ids, six.string_types) else task_ids
|
||||
with translate_errors_context(), TimingContext("mongo", "task_exists"):
|
||||
with translate_errors_context():
|
||||
ids = set(task_ids)
|
||||
q = Task.get_many(
|
||||
company=company_id,
|
||||
@@ -131,16 +116,16 @@ class TaskBLL:
|
||||
return list(q)
|
||||
|
||||
@staticmethod
|
||||
def create(call: APICall, fields: dict):
|
||||
identity = call.identity
|
||||
def create(company: str, user: str, fields: dict):
|
||||
now = datetime.utcnow()
|
||||
return Task(
|
||||
id=create_id(),
|
||||
user=identity.user,
|
||||
company=identity.company,
|
||||
user=user,
|
||||
company=company,
|
||||
created=now,
|
||||
last_update=now,
|
||||
last_change=now,
|
||||
last_changed_by=user,
|
||||
**fields,
|
||||
)
|
||||
|
||||
@@ -179,18 +164,36 @@ class TaskBLL:
|
||||
input_models: Optional[Sequence[TaskInputModel]] = None,
|
||||
validate_references: bool = False,
|
||||
new_project_name: str = None,
|
||||
hyperparams_overrides: Optional[dict] = None,
|
||||
configuration_overrides: Optional[dict] = None,
|
||||
) -> Tuple[Task, dict]:
|
||||
validate_tags(tags, system_tags)
|
||||
params_dict = {
|
||||
field: value
|
||||
for field, value in (
|
||||
("hyperparams", hyperparams),
|
||||
("configuration", configuration),
|
||||
)
|
||||
if value is not None
|
||||
}
|
||||
task: Task = cls.get_by_id(
|
||||
company_id=company_id, task_id=task_id, allow_public=True
|
||||
)
|
||||
|
||||
task = cls.get_by_id(company_id=company_id, task_id=task_id, allow_public=True)
|
||||
params_dict = {}
|
||||
if hyperparams:
|
||||
params_dict["hyperparams"] = hyperparams
|
||||
elif hyperparams_overrides:
|
||||
updated_hyperparams = {
|
||||
sec: {k: value for k, value in sec_data.items()}
|
||||
for sec, sec_data in (task.hyperparams or {}).items()
|
||||
}
|
||||
for section, section_data in hyperparams_overrides.items():
|
||||
for key, value in section_data.items():
|
||||
nested_set(updated_hyperparams, (section, key), value)
|
||||
params_dict["hyperparams"] = updated_hyperparams
|
||||
|
||||
if configuration:
|
||||
params_dict["configuration"] = configuration
|
||||
elif configuration_overrides:
|
||||
updated_configuration = {
|
||||
k: value for k, value in (task.configuration or {}).items()
|
||||
}
|
||||
for key, value in configuration_overrides.items():
|
||||
updated_configuration[key] = value
|
||||
params_dict["configuration"] = updated_configuration
|
||||
|
||||
now = datetime.utcnow()
|
||||
if input_models:
|
||||
@@ -260,58 +263,66 @@ class TaskBLL:
|
||||
not in [TaskSystemTags.development, EntityVisibility.archived.value]
|
||||
]
|
||||
|
||||
with TimingContext("mongo", "clone task"):
|
||||
parent_task = (
|
||||
task.parent
|
||||
if task.parent and not task.parent.startswith(deleted_prefix)
|
||||
else None
|
||||
)
|
||||
new_task = Task(
|
||||
id=create_id(),
|
||||
user=user_id,
|
||||
company=company_id,
|
||||
created=now,
|
||||
last_update=now,
|
||||
last_change=now,
|
||||
name=name or task.name,
|
||||
comment=comment or task.comment,
|
||||
parent=parent or parent_task,
|
||||
project=project or task.project,
|
||||
tags=tags or task.tags,
|
||||
system_tags=system_tags or clean_system_tags(task.system_tags),
|
||||
type=task.type,
|
||||
script=task.script,
|
||||
output=Output(destination=task.output.destination)
|
||||
if task.output
|
||||
else None,
|
||||
models=Models(input=input_models or task.models.input),
|
||||
container=escape_dict(container) or task.container,
|
||||
execution=execution_dict,
|
||||
configuration=params_dict.get("configuration") or task.configuration,
|
||||
hyperparams=params_dict.get("hyperparams") or task.hyperparams,
|
||||
)
|
||||
cls.validate(
|
||||
new_task,
|
||||
validate_models=validate_references or input_models,
|
||||
validate_parent=validate_references or parent,
|
||||
validate_project=validate_references or project,
|
||||
)
|
||||
new_task.save()
|
||||
def ensure_int_labels(execution: dict) -> dict:
|
||||
if not execution:
|
||||
return execution
|
||||
|
||||
if task.project == new_task.project:
|
||||
updated_tags = tags
|
||||
updated_system_tags = system_tags
|
||||
else:
|
||||
updated_tags = new_task.tags
|
||||
updated_system_tags = new_task.system_tags
|
||||
org_bll.update_tags(
|
||||
company_id,
|
||||
Tags.Task,
|
||||
project=new_task.project,
|
||||
tags=updated_tags,
|
||||
system_tags=updated_system_tags,
|
||||
)
|
||||
update_project_time(new_task.project)
|
||||
model_labels = execution.get("model_labels")
|
||||
if model_labels:
|
||||
execution["model_labels"] = {k: int(v) for k, v in model_labels.items()}
|
||||
|
||||
return execution
|
||||
|
||||
parent_task = (
|
||||
task.parent
|
||||
if task.parent and not task.parent.startswith(deleted_prefix)
|
||||
else task.id
|
||||
)
|
||||
new_task = Task(
|
||||
id=create_id(),
|
||||
user=user_id,
|
||||
company=company_id,
|
||||
created=now,
|
||||
last_update=now,
|
||||
last_change=now,
|
||||
last_changed_by=user_id,
|
||||
name=name or task.name,
|
||||
comment=comment or task.comment,
|
||||
parent=parent or parent_task,
|
||||
project=project or task.project,
|
||||
tags=tags or task.tags,
|
||||
system_tags=system_tags or clean_system_tags(task.system_tags),
|
||||
type=task.type,
|
||||
script=task.script,
|
||||
output=Output(destination=task.output.destination) if task.output else None,
|
||||
models=Models(input=input_models or task.models.input),
|
||||
container=escape_dict(container) or task.container,
|
||||
execution=ensure_int_labels(execution_dict),
|
||||
configuration=params_dict.get("configuration") or task.configuration,
|
||||
hyperparams=params_dict.get("hyperparams") or task.hyperparams,
|
||||
)
|
||||
cls.validate(
|
||||
new_task,
|
||||
validate_models=validate_references or input_models,
|
||||
validate_parent=validate_references or parent,
|
||||
validate_project=validate_references or project,
|
||||
)
|
||||
new_task.save()
|
||||
|
||||
if task.project == new_task.project:
|
||||
updated_tags = tags
|
||||
updated_system_tags = system_tags
|
||||
else:
|
||||
updated_tags = new_task.tags
|
||||
updated_system_tags = new_task.system_tags
|
||||
org_bll.update_tags(
|
||||
company_id,
|
||||
Tags.Task,
|
||||
projects=[new_task.project],
|
||||
tags=updated_tags,
|
||||
system_tags=updated_system_tags,
|
||||
)
|
||||
update_project_time(new_task.project)
|
||||
|
||||
return new_task, new_project_data
|
||||
|
||||
@@ -350,6 +361,7 @@ class TaskBLL:
|
||||
def set_last_update(
|
||||
task_ids: Collection[str],
|
||||
company_id: str,
|
||||
user_id: str,
|
||||
last_update: datetime,
|
||||
**extra_updates,
|
||||
):
|
||||
@@ -370,6 +382,7 @@ class TaskBLL:
|
||||
upsert=False,
|
||||
last_update=last_update,
|
||||
last_change=last_update,
|
||||
last_changed_by=user_id,
|
||||
**updates,
|
||||
)
|
||||
return count
|
||||
@@ -378,10 +391,11 @@ class TaskBLL:
|
||||
def update_statistics(
|
||||
task_id: str,
|
||||
company_id: str,
|
||||
user_id: str,
|
||||
last_update: datetime = None,
|
||||
last_iteration: int = None,
|
||||
last_iteration_max: int = None,
|
||||
last_scalar_values: Sequence[Tuple[Tuple[str, ...], Any]] = None,
|
||||
last_scalar_events: Dict[str, Dict[str, dict]] = None,
|
||||
last_events: Dict[str, Dict[str, dict]] = None,
|
||||
**extra_updates,
|
||||
):
|
||||
@@ -394,7 +408,7 @@ class TaskBLL:
|
||||
task's last iteration value.
|
||||
:param last_iteration_max: Last reported iteration. Use this to conditionally set a value only
|
||||
if the current task's last iteration value is smaller than the provided value.
|
||||
:param last_scalar_values: Last reported metrics summary for scalar events (value, metric, variant).
|
||||
:param last_scalar_events: Last reported metrics summary for scalar events (value, metric, variant).
|
||||
:param last_events: Last reported metrics summary (value, metric, event type).
|
||||
:param extra_updates: Extra task updates to include in this update call.
|
||||
:return:
|
||||
@@ -406,25 +420,21 @@ class TaskBLL:
|
||||
elif last_iteration_max is not None:
|
||||
extra_updates.update(max__last_iteration=last_iteration_max)
|
||||
|
||||
if last_scalar_values is not None:
|
||||
|
||||
def op_path(op, *path):
|
||||
return "__".join((op, "last_metrics") + path)
|
||||
|
||||
for path, value in last_scalar_values:
|
||||
if path[-1] == "min_value":
|
||||
extra_updates[op_path("min", *path[:-1], "min_value")] = value
|
||||
elif path[-1] == "max_value":
|
||||
extra_updates[op_path("max", *path[:-1], "max_value")] = value
|
||||
else:
|
||||
extra_updates[op_path("set", *path)] = value
|
||||
raw_updates = {}
|
||||
if last_scalar_events is not None:
|
||||
get_last_metric_updates(
|
||||
task_id=task_id,
|
||||
last_scalar_events=last_scalar_events,
|
||||
raw_updates=raw_updates,
|
||||
extra_updates=extra_updates,
|
||||
)
|
||||
|
||||
if last_events is not None:
|
||||
|
||||
def events_per_type(metric_data: Dict[str, dict]) -> Dict[str, EventStats]:
|
||||
def events_per_type(metric_data_: Dict[str, dict]) -> Dict[str, EventStats]:
|
||||
return {
|
||||
event_type: EventStats(last_update=event["timestamp"])
|
||||
for event_type, event in metric_data.items()
|
||||
for event_type, event in metric_data_.items()
|
||||
}
|
||||
|
||||
metric_stats = {
|
||||
@@ -435,28 +445,67 @@ class TaskBLL:
|
||||
}
|
||||
extra_updates["metric_stats"] = metric_stats
|
||||
|
||||
return TaskBLL.set_last_update(
|
||||
ret = TaskBLL.set_last_update(
|
||||
task_ids=[task_id],
|
||||
company_id=company_id,
|
||||
user_id=user_id,
|
||||
last_update=last_update,
|
||||
**extra_updates,
|
||||
)
|
||||
if ret and raw_updates:
|
||||
Task.objects(id=task_id).update_one(__raw__=[{"$set": raw_updates}])
|
||||
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def remove_task_from_all_queues(
|
||||
company_id: str, task_id: str, exclude: str = None
|
||||
) -> int:
|
||||
more = {}
|
||||
if exclude:
|
||||
more["id__ne"] = exclude
|
||||
return Queue.objects(company=company_id, entries__task=task_id, **more).update(
|
||||
pull__entries__task=task_id, last_update=datetime.utcnow()
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def dequeue_and_change_status(
|
||||
cls, task: Task, company_id: str, status_message: str, status_reason: str,
|
||||
cls,
|
||||
task: Task,
|
||||
company_id: str,
|
||||
user_id: str,
|
||||
status_message: str,
|
||||
status_reason: str,
|
||||
remove_from_all_queues=False,
|
||||
new_status=None,
|
||||
new_status_for_aborted_task=None,
|
||||
):
|
||||
cls.dequeue(task, company_id)
|
||||
try:
|
||||
cls.dequeue(task, company_id=company_id, user_id=user_id, silent_fail=True)
|
||||
except APIError:
|
||||
# dequeue may fail if the queue was deleted
|
||||
pass
|
||||
|
||||
if remove_from_all_queues:
|
||||
cls.remove_task_from_all_queues(company_id=company_id, task_id=task.id)
|
||||
|
||||
if task.status not in [TaskStatus.queued, TaskStatus.in_progress]:
|
||||
return {"updated": 0}
|
||||
|
||||
if new_status_for_aborted_task and task.status == TaskStatus.in_progress:
|
||||
new_status = new_status_for_aborted_task
|
||||
|
||||
return ChangeStatusRequest(
|
||||
task=task,
|
||||
new_status=task.enqueue_status or TaskStatus.created,
|
||||
new_status=new_status or task.enqueue_status or TaskStatus.created,
|
||||
status_reason=status_reason,
|
||||
status_message=status_message,
|
||||
user_id=user_id,
|
||||
force=True,
|
||||
).execute(enqueue_status=None)
|
||||
|
||||
@classmethod
|
||||
def dequeue(cls, task: Task, company_id: str, silent_fail=False):
|
||||
def dequeue(cls, task: Task, company_id: str, user_id: str, silent_fail=False):
|
||||
"""
|
||||
Dequeue the task from the queue
|
||||
:param task: task to dequeue
|
||||
@@ -483,6 +532,9 @@ class TaskBLL:
|
||||
|
||||
return {
|
||||
"removed": queue_bll.remove_task(
|
||||
company_id=company_id, queue_id=task.execution.queue, task_id=task.id
|
||||
company_id=company_id,
|
||||
user_id=user_id,
|
||||
queue_id=task.execution.queue,
|
||||
task_id=task.id,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,71 +1,38 @@
|
||||
from datetime import datetime
|
||||
from itertools import chain
|
||||
from operator import attrgetter
|
||||
from typing import Sequence, Generic, Callable, Type, Iterable, TypeVar, List, Set
|
||||
from typing import Sequence, Set, Tuple, Union
|
||||
|
||||
import attr
|
||||
from boltons.iterutils import partition
|
||||
from mongoengine import QuerySet, Document
|
||||
from boltons.iterutils import partition, bucketize, first, chunked_iter
|
||||
from furl import furl
|
||||
from mongoengine import NotUniqueError
|
||||
from pymongo.errors import DuplicateKeyError
|
||||
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.bll.event import EventBLL
|
||||
from apiserver.bll.event.event_bll import PlotFields
|
||||
from apiserver.bll.event.event_common import EventType
|
||||
from apiserver.bll.task.utils import deleted_prefix
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.model.model import Model
|
||||
from apiserver.database.model.task.task import Task, TaskStatus, ArtifactModes
|
||||
from apiserver.timing_context import TimingContext
|
||||
from apiserver.database.model.url_to_delete import (
|
||||
StorageType,
|
||||
UrlToDelete,
|
||||
FileType,
|
||||
DeletionStatus,
|
||||
)
|
||||
from apiserver.database.utils import id as db_id
|
||||
|
||||
log = config.logger(__file__)
|
||||
event_bll = EventBLL()
|
||||
T = TypeVar("T", bound=Document)
|
||||
|
||||
|
||||
class DocumentGroup(List[T]):
|
||||
"""
|
||||
Operate on a list of documents as if they were a query result
|
||||
"""
|
||||
|
||||
def __init__(self, document_type: Type[T], documents: Iterable[T]):
|
||||
super(DocumentGroup, self).__init__(documents)
|
||||
self.type = document_type
|
||||
|
||||
@property
|
||||
def ids(self) -> Set[str]:
|
||||
return {obj.id for obj in self}
|
||||
|
||||
def objects(self, *args, **kwargs) -> QuerySet:
|
||||
return self.type.objects(id__in=self.ids, *args, **kwargs)
|
||||
|
||||
|
||||
class TaskOutputs(Generic[T]):
|
||||
"""
|
||||
Split task outputs of the same type by the ready state
|
||||
"""
|
||||
|
||||
published: DocumentGroup[T]
|
||||
draft: DocumentGroup[T]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
is_published: Callable[[T], bool],
|
||||
document_type: Type[T],
|
||||
children: Iterable[T],
|
||||
):
|
||||
"""
|
||||
:param is_published: predicate returning whether items is considered published
|
||||
:param document_type: type of output
|
||||
:param children: output documents
|
||||
"""
|
||||
self.published, self.draft = map(
|
||||
lambda x: DocumentGroup(document_type, x),
|
||||
partition(children, key=is_published),
|
||||
)
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class TaskUrls:
|
||||
model_urls: Sequence[str]
|
||||
event_urls: Sequence[str]
|
||||
artifact_urls: Sequence[str]
|
||||
event_urls: Sequence[str] = [] # left here is in order not to break the api
|
||||
|
||||
def __add__(self, other: "TaskUrls"):
|
||||
if not other:
|
||||
@@ -73,7 +40,6 @@ class TaskUrls:
|
||||
|
||||
return TaskUrls(
|
||||
model_urls=list(set(self.model_urls) | set(other.model_urls)),
|
||||
event_urls=list(set(self.event_urls) | set(other.event_urls)),
|
||||
artifact_urls=list(set(self.artifact_urls) | set(other.artifact_urls)),
|
||||
)
|
||||
|
||||
@@ -87,8 +53,23 @@ class CleanupResult:
|
||||
updated_children: int
|
||||
updated_models: int
|
||||
deleted_models: int
|
||||
deleted_model_ids: Set[str]
|
||||
urls: TaskUrls = None
|
||||
|
||||
def to_res_dict(self, return_file_urls: bool) -> dict:
|
||||
remove_fields = ["deleted_model_ids"]
|
||||
if not return_file_urls:
|
||||
remove_fields.append("urls")
|
||||
|
||||
# noinspection PyTypeChecker
|
||||
res = attr.asdict(
|
||||
self, filter=lambda attrib, value: attrib.name not in remove_fields
|
||||
)
|
||||
if not return_file_urls:
|
||||
res["urls"] = None
|
||||
|
||||
return res
|
||||
|
||||
def __add__(self, other: "CleanupResult"):
|
||||
if not other:
|
||||
return self
|
||||
@@ -98,16 +79,29 @@ class CleanupResult:
|
||||
updated_models=self.updated_models + other.updated_models,
|
||||
deleted_models=self.deleted_models + other.deleted_models,
|
||||
urls=self.urls + other.urls if self.urls else other.urls,
|
||||
deleted_model_ids=self.deleted_model_ids | other.deleted_model_ids,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def empty():
|
||||
return CleanupResult(
|
||||
updated_children=0,
|
||||
updated_models=0,
|
||||
deleted_models=0,
|
||||
deleted_model_ids=set(),
|
||||
)
|
||||
|
||||
|
||||
def collect_plot_image_urls(company: str, task: str) -> Set[str]:
|
||||
def collect_plot_image_urls(
|
||||
company: str, task_or_model: Union[str, Sequence[str]]
|
||||
) -> Set[str]:
|
||||
urls = set()
|
||||
next_scroll_id = None
|
||||
with TimingContext("es", "collect_plot_image_urls"):
|
||||
task_ids = task_or_model if isinstance(task_or_model, list) else [task_or_model]
|
||||
for tasks in chunked_iter(task_ids, 100):
|
||||
next_scroll_id = None
|
||||
while True:
|
||||
events, next_scroll_id = event_bll.get_plot_image_urls(
|
||||
company_id=company, task_id=task, scroll_id=next_scroll_id
|
||||
company_id=company, task_ids=tasks, scroll_id=next_scroll_id
|
||||
)
|
||||
if not events:
|
||||
break
|
||||
@@ -119,45 +113,134 @@ def collect_plot_image_urls(company: str, task: str) -> Set[str]:
|
||||
return urls
|
||||
|
||||
|
||||
def collect_debug_image_urls(company: str, task: str) -> Set[str]:
|
||||
def collect_debug_image_urls(
|
||||
company: str, task_or_model: Union[str, Sequence[str]]
|
||||
) -> Set[str]:
|
||||
"""
|
||||
Return the set of unique image urls
|
||||
Uses DebugImagesIterator to make sure that we do not retrieve recycled urls
|
||||
"""
|
||||
metrics = event_bll.get_metrics_and_variants(
|
||||
company_id=company, task_id=task, event_type=EventType.metrics_image
|
||||
)
|
||||
if not metrics:
|
||||
return set()
|
||||
|
||||
task_metrics = {task: {m: [] for m in metrics}}
|
||||
scroll_id = None
|
||||
urls = set()
|
||||
while True:
|
||||
res = event_bll.debug_images_iterator.get_task_events(
|
||||
company_id=company,
|
||||
task_metrics=task_metrics,
|
||||
iter_count=10,
|
||||
state_id=scroll_id,
|
||||
)
|
||||
if not res.metric_events or not any(
|
||||
iterations for _, iterations in res.metric_events
|
||||
):
|
||||
break
|
||||
task_ids = task_or_model if isinstance(task_or_model, list) else [task_or_model]
|
||||
for tasks in chunked_iter(task_ids, 100):
|
||||
after_key = None
|
||||
while True:
|
||||
res, after_key = event_bll.get_debug_image_urls(
|
||||
company_id=company,
|
||||
task_ids=tasks,
|
||||
after_key=after_key,
|
||||
)
|
||||
urls.update(res)
|
||||
if not after_key:
|
||||
break
|
||||
|
||||
scroll_id = res.next_scroll_id
|
||||
for task, iterations in res.metric_events:
|
||||
urls.update(ev.get("url") for it in iterations for ev in it["events"])
|
||||
|
||||
urls.discard({None})
|
||||
return urls
|
||||
|
||||
|
||||
supported_storage_types = {
|
||||
"s3://": StorageType.s3,
|
||||
"azure://": StorageType.azure,
|
||||
"gs://": StorageType.gs,
|
||||
}
|
||||
|
||||
supported_storage_types.update(
|
||||
{
|
||||
p: StorageType.fileserver
|
||||
for p in config.get(
|
||||
"services.async_urls_delete.fileserver.url_prefixes",
|
||||
["https://", "http://"],
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def schedule_for_delete(
|
||||
company: str,
|
||||
user: str,
|
||||
task_id: str,
|
||||
urls: Set[str],
|
||||
can_delete_folders: bool,
|
||||
) -> Set[str]:
|
||||
urls_per_storage = bucketize(
|
||||
urls,
|
||||
key=lambda u: first(
|
||||
type_
|
||||
for prefix, type_ in supported_storage_types.items()
|
||||
if u.startswith(prefix)
|
||||
),
|
||||
)
|
||||
urls_per_storage.pop(None, None)
|
||||
|
||||
processed_urls = set()
|
||||
for storage_type, storage_urls in urls_per_storage.items():
|
||||
delete_folders = (storage_type == StorageType.fileserver) and can_delete_folders
|
||||
scheduled_to_delete = set()
|
||||
for url in storage_urls:
|
||||
folder = None
|
||||
if delete_folders:
|
||||
try:
|
||||
parsed = furl(url)
|
||||
if parsed.path and len(parsed.path.segments) > 1:
|
||||
folder = parsed.remove(
|
||||
args=True, fragment=True, path=parsed.path.segments[-1]
|
||||
).url.rstrip("/")
|
||||
except Exception as ex:
|
||||
pass
|
||||
|
||||
to_delete = folder or url
|
||||
if to_delete in scheduled_to_delete:
|
||||
processed_urls.add(url)
|
||||
continue
|
||||
|
||||
try:
|
||||
UrlToDelete(
|
||||
id=db_id(),
|
||||
company=company,
|
||||
user=user,
|
||||
url=to_delete,
|
||||
task=task_id,
|
||||
created=datetime.utcnow(),
|
||||
storage_type=storage_type,
|
||||
type=FileType.folder if folder else FileType.file,
|
||||
).save()
|
||||
except (DuplicateKeyError, NotUniqueError):
|
||||
existing = UrlToDelete.objects(company=company, url=to_delete).first()
|
||||
if existing:
|
||||
existing.update(
|
||||
user=user,
|
||||
task=task_id,
|
||||
created=datetime.utcnow(),
|
||||
retry_count=0,
|
||||
unset__last_failure_time=1,
|
||||
unset__last_failure_reason=1,
|
||||
status=DeletionStatus.created,
|
||||
)
|
||||
processed_urls.add(url)
|
||||
scheduled_to_delete.add(to_delete)
|
||||
|
||||
return processed_urls
|
||||
|
||||
|
||||
def delete_task_events_and_collect_urls(
|
||||
company: str, task_ids: Sequence[str], wait_for_delete: bool, model=False
|
||||
) -> Set[str]:
|
||||
event_urls = collect_debug_image_urls(company, task_ids) | collect_plot_image_urls(
|
||||
company, task_ids
|
||||
)
|
||||
|
||||
event_bll.delete_task_events(
|
||||
company, task_ids, model=model, wait_for_delete=wait_for_delete
|
||||
)
|
||||
|
||||
return event_urls
|
||||
|
||||
|
||||
def cleanup_task(
|
||||
company: str,
|
||||
user: str,
|
||||
task: Task,
|
||||
force: bool = False,
|
||||
update_children=True,
|
||||
return_file_urls=False,
|
||||
delete_output_models=True,
|
||||
) -> CleanupResult:
|
||||
"""
|
||||
@@ -166,113 +249,126 @@ def cleanup_task(
|
||||
:param force: whether to delete task with published outputs
|
||||
:return: count of delete and modified items
|
||||
"""
|
||||
models = verify_task_children_and_ouptuts(task, force)
|
||||
|
||||
event_urls, artifact_urls, model_urls = set(), set(), set()
|
||||
if return_file_urls:
|
||||
event_urls = collect_debug_image_urls(task.company, task.id)
|
||||
event_urls.update(collect_plot_image_urls(task.company, task.id))
|
||||
if task.execution and task.execution.artifacts:
|
||||
artifact_urls = {
|
||||
a.uri
|
||||
for a in task.execution.artifacts.values()
|
||||
if a.mode == ArtifactModes.output and a.uri
|
||||
}
|
||||
model_urls = {m.uri for m in models.draft.objects().only("uri") if m.uri}
|
||||
published_models, draft_models, in_use_model_ids = verify_task_children_and_ouptuts(
|
||||
task, force
|
||||
)
|
||||
artifact_urls = (
|
||||
{
|
||||
a.uri
|
||||
for a in task.execution.artifacts.values()
|
||||
if a.mode == ArtifactModes.output and a.uri
|
||||
}
|
||||
if task.execution and task.execution.artifacts
|
||||
else {}
|
||||
)
|
||||
model_urls = {m.uri for m in draft_models if m.uri and m.id not in in_use_model_ids}
|
||||
|
||||
deleted_task_id = f"{deleted_prefix}{task.id}"
|
||||
updated_children = 0
|
||||
now = datetime.utcnow()
|
||||
if update_children:
|
||||
with TimingContext("mongo", "update_task_children"):
|
||||
updated_children = Task.objects(parent=task.id).update(
|
||||
parent=deleted_task_id
|
||||
updated_children = Task.objects(parent=task.id).update(
|
||||
parent=deleted_task_id,
|
||||
last_change=now,
|
||||
last_changed_by=user,
|
||||
)
|
||||
|
||||
deleted_models = 0
|
||||
updated_models = 0
|
||||
deleted_model_ids = set()
|
||||
for models, allow_delete in ((draft_models, True), (published_models, False)):
|
||||
if not models:
|
||||
continue
|
||||
if delete_output_models and allow_delete:
|
||||
model_ids = list({m.id for m in models if m.id not in in_use_model_ids})
|
||||
if model_ids:
|
||||
deleted_models += Model.objects(id__in=model_ids).delete()
|
||||
deleted_model_ids.update(model_ids)
|
||||
|
||||
if in_use_model_ids:
|
||||
Model.objects(id__in=list(in_use_model_ids)).update(
|
||||
unset__task=1,
|
||||
set__last_change=now,
|
||||
set__last_changed_by=user,
|
||||
)
|
||||
continue
|
||||
|
||||
if update_children:
|
||||
updated_models += Model.objects(id__in=[m.id for m in models]).update(
|
||||
task=deleted_task_id,
|
||||
last_change=now,
|
||||
last_changed_by=user,
|
||||
)
|
||||
else:
|
||||
Model.objects(id__in=[m.id for m in models]).update(
|
||||
unset__task=1,
|
||||
set__last_change=now,
|
||||
set__last_changed_by=user,
|
||||
)
|
||||
else:
|
||||
updated_children = 0
|
||||
|
||||
if models.draft and delete_output_models:
|
||||
with TimingContext("mongo", "delete_models"):
|
||||
deleted_models = models.draft.objects().delete()
|
||||
else:
|
||||
deleted_models = 0
|
||||
|
||||
if models.published and update_children:
|
||||
with TimingContext("mongo", "update_task_models"):
|
||||
updated_models = models.published.objects().update(task=deleted_task_id)
|
||||
else:
|
||||
updated_models = 0
|
||||
|
||||
event_bll.delete_task_events(task.company, task.id, allow_locked=force)
|
||||
|
||||
return CleanupResult(
|
||||
deleted_models=deleted_models,
|
||||
updated_children=updated_children,
|
||||
updated_models=updated_models,
|
||||
urls=TaskUrls(
|
||||
event_urls=list(event_urls),
|
||||
artifact_urls=list(artifact_urls),
|
||||
model_urls=list(model_urls),
|
||||
)
|
||||
if return_file_urls
|
||||
else None,
|
||||
),
|
||||
deleted_model_ids=deleted_model_ids,
|
||||
)
|
||||
|
||||
|
||||
def verify_task_children_and_ouptuts(task: Task, force: bool) -> TaskOutputs[Model]:
|
||||
def verify_task_children_and_ouptuts(
|
||||
task, force: bool
|
||||
) -> Tuple[Sequence[Model], Sequence[Model], Set[str]]:
|
||||
if not force:
|
||||
with TimingContext("mongo", "count_published_children"):
|
||||
published_children_count = Task.objects(
|
||||
parent=task.id, status=TaskStatus.published
|
||||
).count()
|
||||
if published_children_count:
|
||||
raise errors.bad_request.TaskCannotBeDeleted(
|
||||
"has children, use force=True",
|
||||
task=task.id,
|
||||
children=published_children_count,
|
||||
)
|
||||
|
||||
with TimingContext("mongo", "get_task_models"):
|
||||
models = TaskOutputs(
|
||||
attrgetter("ready"),
|
||||
Model,
|
||||
Model.objects(task=task.id).only("id", "task", "ready"),
|
||||
)
|
||||
if not force and models.published:
|
||||
published_children_count = Task.objects(
|
||||
parent=task.id, status=TaskStatus.published
|
||||
).count()
|
||||
if published_children_count:
|
||||
raise errors.bad_request.TaskCannotBeDeleted(
|
||||
"has output models, use force=True",
|
||||
"has children, use force=True",
|
||||
task=task.id,
|
||||
models=len(models.published),
|
||||
children=published_children_count,
|
||||
)
|
||||
|
||||
model_fields = ["id", "ready", "uri"]
|
||||
published_models, draft_models = partition(
|
||||
Model.objects(task=task.id).only(*model_fields),
|
||||
key=attrgetter("ready"),
|
||||
)
|
||||
if not force and published_models:
|
||||
raise errors.bad_request.TaskCannotBeDeleted(
|
||||
"has output models, use force=True",
|
||||
task=task.id,
|
||||
models=len(published_models),
|
||||
)
|
||||
|
||||
if task.models and task.models.output:
|
||||
with TimingContext("mongo", "get_task_output_model"):
|
||||
model_ids = [m.model for m in task.models.output]
|
||||
for output_model in Model.objects(id__in=model_ids):
|
||||
if output_model.ready:
|
||||
if not force:
|
||||
raise errors.bad_request.TaskCannotBeDeleted(
|
||||
"has output model, use force=True",
|
||||
task=task.id,
|
||||
model=output_model.id,
|
||||
)
|
||||
models.published.append(output_model)
|
||||
else:
|
||||
models.draft.append(output_model)
|
||||
model_ids = [m.model for m in task.models.output]
|
||||
for output_model in Model.objects(id__in=model_ids).only(*model_fields):
|
||||
if output_model.ready:
|
||||
if not force:
|
||||
raise errors.bad_request.TaskCannotBeDeleted(
|
||||
"has output model, use force=True",
|
||||
task=task.id,
|
||||
model=output_model.id,
|
||||
)
|
||||
published_models.append(output_model)
|
||||
else:
|
||||
draft_models.append(output_model)
|
||||
|
||||
if models.draft:
|
||||
with TimingContext("mongo", "get_execution_models"):
|
||||
model_ids = models.draft.ids
|
||||
dependent_tasks = Task.objects(models__input__model__in=model_ids).only(
|
||||
"id", "models"
|
||||
in_use_model_ids = {}
|
||||
if draft_models:
|
||||
model_ids = {m.id for m in draft_models}
|
||||
dependent_tasks = Task.objects(models__input__model__in=list(model_ids)).only(
|
||||
"id", "models"
|
||||
)
|
||||
in_use_model_ids = model_ids & {
|
||||
m.model
|
||||
for m in chain.from_iterable(
|
||||
t.models.input for t in dependent_tasks if t.models
|
||||
)
|
||||
input_models = {
|
||||
m.model
|
||||
for m in chain.from_iterable(
|
||||
t.models.input for t in dependent_tasks if t.models
|
||||
)
|
||||
}
|
||||
if input_models:
|
||||
models.draft = DocumentGroup(
|
||||
Model, (m for m in models.draft if m.id not in input_models)
|
||||
)
|
||||
}
|
||||
|
||||
return models
|
||||
return published_models, draft_models, in_use_model_ids
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from datetime import datetime
|
||||
from typing import Callable, Any, Tuple, Union
|
||||
from typing import Callable, Any, Tuple, Union, Sequence
|
||||
|
||||
from apiserver.apierrors import errors, APIError
|
||||
from apiserver.bll.queue import QueueBLL
|
||||
@@ -7,9 +7,10 @@ from apiserver.bll.task import (
|
||||
TaskBLL,
|
||||
validate_status_change,
|
||||
ChangeStatusRequest,
|
||||
update_project_time,
|
||||
)
|
||||
from apiserver.bll.task.task_cleanup import cleanup_task, CleanupResult
|
||||
from apiserver.bll.task.utils import get_task_with_write_access
|
||||
from apiserver.bll.util import update_project_time
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.model import EntityVisibility
|
||||
from apiserver.database.model.model import Model
|
||||
@@ -22,82 +23,179 @@ from apiserver.database.model.task.task import (
|
||||
ArtifactModes,
|
||||
Execution,
|
||||
DEFAULT_LAST_ITERATION,
|
||||
TaskType,
|
||||
)
|
||||
from apiserver.database.utils import get_options
|
||||
from apiserver.service_repo.auth import Identity
|
||||
from apiserver.utilities.dicts import nested_set
|
||||
|
||||
log = config.logger(__file__)
|
||||
queue_bll = QueueBLL()
|
||||
|
||||
|
||||
def _get_pipeline_steps_for_controller_task(
|
||||
task: Task, company_id: str, only: Sequence[str] = None
|
||||
) -> Sequence[Task]:
|
||||
if not task or task.type != TaskType.controller:
|
||||
return []
|
||||
|
||||
query = Task.objects(company=company_id, parent=task.id)
|
||||
if only:
|
||||
query = query.only(*only)
|
||||
|
||||
return list(query)
|
||||
|
||||
|
||||
def archive_task(
|
||||
task: Union[str, Task], company_id: str, status_message: str, status_reason: str,
|
||||
task: Union[str, Task],
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
status_message: str,
|
||||
status_reason: str,
|
||||
include_pipeline_steps: bool,
|
||||
) -> int:
|
||||
"""
|
||||
Deque and archive task
|
||||
Return 1 if successful
|
||||
"""
|
||||
user_id = identity.user
|
||||
fields = (
|
||||
"id",
|
||||
"company",
|
||||
"execution",
|
||||
"status",
|
||||
"project",
|
||||
"system_tags",
|
||||
"enqueue_status",
|
||||
"type",
|
||||
)
|
||||
if isinstance(task, str):
|
||||
task = TaskBLL.get_task_with_access(
|
||||
task = get_task_with_write_access(
|
||||
task,
|
||||
company_id=company_id,
|
||||
only=(
|
||||
"id",
|
||||
"execution",
|
||||
"status",
|
||||
"project",
|
||||
"system_tags",
|
||||
"enqueue_status",
|
||||
),
|
||||
requires_write_access=True,
|
||||
identity=identity,
|
||||
only=fields,
|
||||
)
|
||||
try:
|
||||
TaskBLL.dequeue_and_change_status(
|
||||
task, company_id, status_message, status_reason,
|
||||
)
|
||||
except APIError:
|
||||
# dequeue may fail if the task was not enqueued
|
||||
pass
|
||||
|
||||
return task.update(
|
||||
status_message=status_message,
|
||||
status_reason=status_reason,
|
||||
add_to_set__system_tags=EntityVisibility.archived.value,
|
||||
last_change=datetime.utcnow(),
|
||||
)
|
||||
def archive_task_core(task_: Task) -> int:
|
||||
try:
|
||||
TaskBLL.dequeue_and_change_status(
|
||||
task_,
|
||||
company_id=company_id,
|
||||
user_id=user_id,
|
||||
status_message=status_message,
|
||||
status_reason=status_reason,
|
||||
remove_from_all_queues=True,
|
||||
new_status_for_aborted_task=TaskStatus.stopped,
|
||||
)
|
||||
except APIError:
|
||||
# dequeue may fail if the task was not enqueued
|
||||
pass
|
||||
|
||||
return task_.update(
|
||||
status_message=status_message,
|
||||
status_reason=status_reason,
|
||||
add_to_set__system_tags=EntityVisibility.archived.value,
|
||||
last_change=datetime.utcnow(),
|
||||
last_changed_by=user_id,
|
||||
)
|
||||
|
||||
if include_pipeline_steps and (
|
||||
step_tasks := _get_pipeline_steps_for_controller_task(
|
||||
task, company_id, only=fields
|
||||
)
|
||||
):
|
||||
for step in step_tasks:
|
||||
archive_task_core(step)
|
||||
|
||||
return archive_task_core(task)
|
||||
|
||||
|
||||
def unarchive_task(
|
||||
task: str, company_id: str, status_message: str, status_reason: str,
|
||||
task_id: str,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
status_message: str,
|
||||
status_reason: str,
|
||||
include_pipeline_steps: bool,
|
||||
) -> int:
|
||||
"""
|
||||
Unarchive task. Return 1 if successful
|
||||
"""
|
||||
task = TaskBLL.get_task_with_access(
|
||||
task, company_id=company_id, only=("id",), requires_write_access=True,
|
||||
)
|
||||
return task.update(
|
||||
status_message=status_message,
|
||||
status_reason=status_reason,
|
||||
pull__system_tags=EntityVisibility.archived.value,
|
||||
last_change=datetime.utcnow(),
|
||||
fields = ("id", "type")
|
||||
task = get_task_with_write_access(
|
||||
task_id,
|
||||
company_id=company_id,
|
||||
identity=identity,
|
||||
only=fields,
|
||||
)
|
||||
|
||||
def unarchive_task_core(task_: Task) -> int:
|
||||
return task_.update(
|
||||
status_message=status_message,
|
||||
status_reason=status_reason,
|
||||
pull__system_tags=EntityVisibility.archived.value,
|
||||
last_change=datetime.utcnow(),
|
||||
last_changed_by=identity.user,
|
||||
)
|
||||
|
||||
if include_pipeline_steps and (
|
||||
step_tasks := _get_pipeline_steps_for_controller_task(
|
||||
task, company_id, only=fields
|
||||
)
|
||||
):
|
||||
for step in step_tasks:
|
||||
unarchive_task_core(step)
|
||||
|
||||
return unarchive_task_core(task)
|
||||
|
||||
|
||||
def dequeue_task(
|
||||
task_id: str,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
status_message: str,
|
||||
status_reason: str,
|
||||
remove_from_all_queues: bool = False,
|
||||
new_status=None,
|
||||
) -> Tuple[int, dict]:
|
||||
query = dict(id=task_id, company=company_id)
|
||||
task = Task.get_for_writing(**query)
|
||||
if new_status and new_status not in get_options(TaskStatus):
|
||||
raise errors.bad_request.ValidationError(f"Invalid task status: {new_status}")
|
||||
|
||||
# get the task without write access to make sure that it actually exists
|
||||
task = Task.get(
|
||||
id=task_id,
|
||||
company=company_id,
|
||||
_only=("id",),
|
||||
include_public=True,
|
||||
)
|
||||
if not task:
|
||||
raise errors.bad_request.InvalidTaskId(**query)
|
||||
TaskBLL.remove_task_from_all_queues(company_id, task_id=task_id)
|
||||
return 1, {"updated": 0}
|
||||
|
||||
user_id = identity.user
|
||||
task = get_task_with_write_access(
|
||||
task_id,
|
||||
company_id=company_id,
|
||||
identity=identity,
|
||||
only=(
|
||||
"id",
|
||||
"company",
|
||||
"execution",
|
||||
"status",
|
||||
"project",
|
||||
"enqueue_status",
|
||||
),
|
||||
)
|
||||
|
||||
res = TaskBLL.dequeue_and_change_status(
|
||||
task,
|
||||
company_id,
|
||||
company_id=company_id,
|
||||
user_id=user_id,
|
||||
status_message=status_message,
|
||||
status_reason=status_reason,
|
||||
remove_from_all_queues=remove_from_all_queues,
|
||||
new_status=new_status,
|
||||
)
|
||||
return 1, res
|
||||
|
||||
@@ -105,32 +203,59 @@ def dequeue_task(
|
||||
def enqueue_task(
|
||||
task_id: str,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
queue_id: str,
|
||||
status_message: str,
|
||||
status_reason: str,
|
||||
queue_name: str = None,
|
||||
validate: bool = False,
|
||||
force: bool = False,
|
||||
update_execution_queue: bool = True,
|
||||
) -> Tuple[int, dict]:
|
||||
if queue_id and queue_name:
|
||||
raise errors.bad_request.ValidationError(
|
||||
"Either queue id or queue name should be provided"
|
||||
)
|
||||
|
||||
task = get_task_with_write_access(
|
||||
task_id=task_id, company_id=company_id, identity=identity
|
||||
)
|
||||
if not update_execution_queue:
|
||||
if not (
|
||||
task.status == TaskStatus.queued and task.execution and task.execution.queue
|
||||
):
|
||||
raise errors.bad_request.ValidationError(
|
||||
"Cannot skip setting execution queue for a task "
|
||||
"that is not enqueued or does not have execution queue set"
|
||||
)
|
||||
|
||||
if queue_name:
|
||||
queue = queue_bll.get_by_name(
|
||||
company_id=company_id, queue_name=queue_name, only=("id",)
|
||||
)
|
||||
if not queue:
|
||||
queue = queue_bll.create(company_id=company_id, name=queue_name)
|
||||
queue_id = queue.id
|
||||
|
||||
if not queue_id:
|
||||
# try to get default queue
|
||||
queue_id = queue_bll.get_default(company_id).id
|
||||
|
||||
query = dict(id=task_id, company=company_id)
|
||||
task = Task.get_for_writing(**query)
|
||||
if not task:
|
||||
raise errors.bad_request.InvalidTaskId(**query)
|
||||
|
||||
user_id = identity.user
|
||||
if validate:
|
||||
TaskBLL.validate(task)
|
||||
|
||||
before_enqueue_status = task.status
|
||||
if task.status == TaskStatus.queued and task.enqueue_status:
|
||||
before_enqueue_status = task.enqueue_status
|
||||
res = ChangeStatusRequest(
|
||||
task=task,
|
||||
new_status=TaskStatus.queued,
|
||||
status_reason=status_reason,
|
||||
status_message=status_message,
|
||||
allow_same_state_transition=False,
|
||||
force=force,
|
||||
).execute(enqueue_status=task.status)
|
||||
user_id=user_id,
|
||||
).execute(enqueue_status=before_enqueue_status)
|
||||
|
||||
try:
|
||||
queue_bll.add_task(company_id=company_id, queue_id=queue_id, task_id=task.id)
|
||||
@@ -142,32 +267,64 @@ def enqueue_task(
|
||||
new_status=task.status,
|
||||
force=True,
|
||||
status_reason="failed enqueueing",
|
||||
user_id=user_id,
|
||||
).execute(enqueue_status=None)
|
||||
raise
|
||||
|
||||
# set the current queue ID in the task
|
||||
if task.execution:
|
||||
Task.objects(**query).update(execution__queue=queue_id, multi=False)
|
||||
else:
|
||||
Task.objects(**query).update(execution=Execution(queue=queue_id), multi=False)
|
||||
if update_execution_queue:
|
||||
if task.execution:
|
||||
Task.objects(id=task_id).update(execution__queue=queue_id, multi=False)
|
||||
else:
|
||||
Task.objects(id=task_id).update(
|
||||
execution=Execution(queue=queue_id), multi=False
|
||||
)
|
||||
nested_set(res, ("fields", "execution.queue"), queue_id)
|
||||
|
||||
nested_set(res, ("fields", "execution.queue"), queue_id)
|
||||
# make sure that the task is not queued in any other queue
|
||||
TaskBLL.remove_task_from_all_queues(
|
||||
company_id=company_id, task_id=task_id, exclude=queue_id
|
||||
)
|
||||
return 1, res
|
||||
|
||||
|
||||
def move_tasks_to_trash(tasks: Sequence[str]) -> int:
|
||||
try:
|
||||
collection_name = Task._get_collection_name()
|
||||
trash_collection_name = f"{collection_name}__trash"
|
||||
Task.aggregate(
|
||||
[
|
||||
{"$match": {"_id": {"$in": tasks}}},
|
||||
{
|
||||
"$merge": {
|
||||
"into": trash_collection_name,
|
||||
"on": "_id",
|
||||
"whenMatched": "replace",
|
||||
"whenNotMatched": "insert",
|
||||
}
|
||||
},
|
||||
],
|
||||
allow_disk_use=True,
|
||||
)
|
||||
except Exception as ex:
|
||||
log.error(f"Error copying tasks to trash {str(ex)}")
|
||||
|
||||
return Task.objects(id__in=tasks).delete()
|
||||
|
||||
|
||||
def delete_task(
|
||||
task_id: str,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
move_to_trash: bool,
|
||||
force: bool,
|
||||
return_file_urls: bool,
|
||||
delete_output_models: bool,
|
||||
status_message: str,
|
||||
status_reason: str,
|
||||
include_pipeline_steps: bool,
|
||||
) -> Tuple[int, Task, CleanupResult]:
|
||||
task = TaskBLL.get_task_with_access(
|
||||
task_id, company_id=company_id, requires_write_access=True
|
||||
)
|
||||
user_id = identity.user
|
||||
task = get_task_with_write_access(task_id, company_id=company_id, identity=identity)
|
||||
|
||||
if (
|
||||
task.status != TaskStatus.created
|
||||
@@ -181,37 +338,51 @@ def delete_task(
|
||||
current=task.status,
|
||||
)
|
||||
|
||||
try:
|
||||
TaskBLL.dequeue_and_change_status(
|
||||
task,
|
||||
company_id=company_id,
|
||||
status_message=status_message,
|
||||
status_reason=status_reason,
|
||||
)
|
||||
except APIError:
|
||||
# dequeue may fail if the task was not enqueued
|
||||
pass
|
||||
|
||||
cleanup_res = cleanup_task(
|
||||
task,
|
||||
force=force,
|
||||
return_file_urls=return_file_urls,
|
||||
delete_output_models=delete_output_models,
|
||||
)
|
||||
|
||||
if move_to_trash:
|
||||
collection_name = task._get_collection_name()
|
||||
archived_collection = "{}__trash".format(collection_name)
|
||||
task.switch_collection(archived_collection)
|
||||
def delete_task_core(task_: Task, force_: bool) -> CleanupResult:
|
||||
try:
|
||||
# A simple save() won't do due to mongoengine caching (nothing will be saved), so we have to force
|
||||
# an insert. However, if for some reason such an ID exists, let's make sure we'll keep going.
|
||||
task.save(force_insert=True)
|
||||
except Exception:
|
||||
TaskBLL.dequeue_and_change_status(
|
||||
task_,
|
||||
company_id=company_id,
|
||||
user_id=user_id,
|
||||
status_message=status_message,
|
||||
status_reason=status_reason,
|
||||
remove_from_all_queues=True,
|
||||
)
|
||||
except APIError:
|
||||
# dequeue may fail if the task was not enqueued
|
||||
pass
|
||||
task.switch_collection(collection_name)
|
||||
|
||||
task.delete()
|
||||
res = cleanup_task(
|
||||
company=company_id,
|
||||
user=user_id,
|
||||
task=task_,
|
||||
force=force_,
|
||||
delete_output_models=delete_output_models,
|
||||
)
|
||||
|
||||
if move_to_trash:
|
||||
# make sure that whatever changes were done to the task are saved
|
||||
# the task itself will be deleted later in the move_tasks_to_trash operation
|
||||
task_.last_update = datetime.utcnow()
|
||||
task_.save()
|
||||
else:
|
||||
task_.delete()
|
||||
|
||||
return res
|
||||
|
||||
task_ids = [task.id]
|
||||
cleanup_res = CleanupResult.empty()
|
||||
if include_pipeline_steps and (
|
||||
step_tasks := _get_pipeline_steps_for_controller_task(task, company_id)
|
||||
):
|
||||
for step in step_tasks:
|
||||
cleanup_res += delete_task_core(step, True)
|
||||
task_ids.append(step.id)
|
||||
|
||||
cleanup_res = delete_task_core(task, force)
|
||||
if move_to_trash:
|
||||
move_tasks_to_trash(task_ids)
|
||||
|
||||
update_project_time(task.project)
|
||||
return 1, task, cleanup_res
|
||||
|
||||
@@ -219,14 +390,13 @@ def delete_task(
|
||||
def reset_task(
|
||||
task_id: str,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
force: bool,
|
||||
return_file_urls: bool,
|
||||
delete_output_models: bool,
|
||||
clear_all: bool,
|
||||
) -> Tuple[dict, CleanupResult, dict]:
|
||||
task = TaskBLL.get_task_with_access(
|
||||
task_id, company_id=company_id, requires_write_access=True
|
||||
)
|
||||
user_id = identity.user
|
||||
task = get_task_with_write_access(task_id, company_id=company_id, identity=identity)
|
||||
|
||||
if not force and task.status == TaskStatus.published:
|
||||
raise errors.bad_request.InvalidTaskStatus(task_id=task.id, status=task.status)
|
||||
@@ -235,22 +405,28 @@ def reset_task(
|
||||
updates = {}
|
||||
|
||||
try:
|
||||
dequeued = TaskBLL.dequeue(task, company_id, silent_fail=True)
|
||||
dequeued = TaskBLL.dequeue(
|
||||
task, company_id=company_id, user_id=user_id, silent_fail=True
|
||||
)
|
||||
except APIError:
|
||||
# dequeue may fail if the task was not enqueued
|
||||
pass
|
||||
|
||||
TaskBLL.remove_task_from_all_queues(company_id=company_id, task_id=task.id)
|
||||
|
||||
cleaned_up = cleanup_task(
|
||||
task,
|
||||
company=company_id,
|
||||
user=user_id,
|
||||
task=task,
|
||||
force=force,
|
||||
update_children=False,
|
||||
return_file_urls=return_file_urls,
|
||||
delete_output_models=delete_output_models,
|
||||
)
|
||||
|
||||
updates.update(
|
||||
set__last_iteration=DEFAULT_LAST_ITERATION,
|
||||
set__last_metrics={},
|
||||
set__unique_metrics=[],
|
||||
set__metric_stats={},
|
||||
set__models__output=[],
|
||||
set__runtime={},
|
||||
@@ -258,11 +434,17 @@ def reset_task(
|
||||
unset__output__error=1,
|
||||
unset__last_worker=1,
|
||||
unset__last_worker_report=1,
|
||||
unset__started=1,
|
||||
unset__completed=1,
|
||||
unset__published=1,
|
||||
unset__active_duration=1,
|
||||
unset__enqueue_status=1,
|
||||
)
|
||||
|
||||
if clear_all:
|
||||
updates.update(
|
||||
set__execution=Execution(), unset__script=1,
|
||||
set__execution=Execution(),
|
||||
unset__script=1,
|
||||
)
|
||||
else:
|
||||
updates.update(unset__execution__queue=1)
|
||||
@@ -281,12 +463,8 @@ def reset_task(
|
||||
force=force,
|
||||
status_reason="reset",
|
||||
status_message="reset",
|
||||
user_id=user_id,
|
||||
).execute(
|
||||
started=None,
|
||||
completed=None,
|
||||
published=None,
|
||||
active_duration=None,
|
||||
enqueue_status=None,
|
||||
**updates,
|
||||
)
|
||||
|
||||
@@ -296,14 +474,14 @@ def reset_task(
|
||||
def publish_task(
|
||||
task_id: str,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
force: bool,
|
||||
publish_model_func: Callable[[str, str], Any] = None,
|
||||
publish_model_func: Callable[[str, str, Identity], Any] = None,
|
||||
status_message: str = "",
|
||||
status_reason: str = "",
|
||||
) -> dict:
|
||||
task = TaskBLL.get_task_with_access(
|
||||
task_id, company_id=company_id, requires_write_access=True
|
||||
)
|
||||
user_id = identity.user
|
||||
task = get_task_with_write_access(task_id, company_id=company_id, identity=identity)
|
||||
if not force:
|
||||
validate_status_change(task.status, TaskStatus.published)
|
||||
|
||||
@@ -325,7 +503,7 @@ def publish_task(
|
||||
.first()
|
||||
)
|
||||
if model and not model.ready:
|
||||
publish_model_func(model.id, company_id)
|
||||
publish_model_func(model.id, company_id, identity)
|
||||
|
||||
# set task status to published, and update (or set) it's new output (view and models)
|
||||
return ChangeStatusRequest(
|
||||
@@ -334,6 +512,7 @@ def publish_task(
|
||||
force=force,
|
||||
status_reason=status_reason,
|
||||
status_message=status_message,
|
||||
user_id=user_id,
|
||||
).execute(published=datetime.utcnow(), output=output)
|
||||
|
||||
except Exception as ex:
|
||||
@@ -346,7 +525,13 @@ def publish_task(
|
||||
|
||||
|
||||
def stop_task(
|
||||
task_id: str, company_id: str, user_name: str, status_reason: str, force: bool,
|
||||
task_id: str,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
user_name: str,
|
||||
status_reason: str,
|
||||
force: bool,
|
||||
include_pipeline_steps: bool,
|
||||
) -> dict:
|
||||
"""
|
||||
Stop a running task. Requires task status 'in_progress' and
|
||||
@@ -356,20 +541,22 @@ def stop_task(
|
||||
is set to 'stopping' to allow the worker to stop the task and report by itself
|
||||
:return: updated task fields
|
||||
"""
|
||||
|
||||
task = TaskBLL.get_task_with_access(
|
||||
user_id = identity.user
|
||||
fields = (
|
||||
"status",
|
||||
"project",
|
||||
"tags",
|
||||
"system_tags",
|
||||
"last_worker",
|
||||
"last_update",
|
||||
"execution.queue",
|
||||
"type",
|
||||
)
|
||||
task = get_task_with_write_access(
|
||||
task_id,
|
||||
company_id=company_id,
|
||||
only=(
|
||||
"status",
|
||||
"project",
|
||||
"tags",
|
||||
"system_tags",
|
||||
"last_worker",
|
||||
"last_update",
|
||||
"execution.queue",
|
||||
),
|
||||
requires_write_access=True,
|
||||
identity=identity,
|
||||
only=fields,
|
||||
)
|
||||
|
||||
def is_run_by_worker(t: Task) -> bool:
|
||||
@@ -381,31 +568,45 @@ def stop_task(
|
||||
and (datetime.utcnow() - t.last_update).total_seconds() < update_timeout
|
||||
)
|
||||
|
||||
is_queued = task.status == TaskStatus.queued
|
||||
set_stopped = (
|
||||
is_queued
|
||||
or TaskSystemTags.development in task.system_tags
|
||||
or not is_run_by_worker(task)
|
||||
)
|
||||
def stop_task_core(task_: Task, force_: bool):
|
||||
is_queued = task_.status == TaskStatus.queued
|
||||
set_stopped = (
|
||||
is_queued
|
||||
or TaskSystemTags.development in task_.system_tags
|
||||
or not is_run_by_worker(task_)
|
||||
)
|
||||
|
||||
if set_stopped:
|
||||
if is_queued:
|
||||
try:
|
||||
TaskBLL.dequeue(task, company_id=company_id, silent_fail=True)
|
||||
except APIError:
|
||||
# dequeue may fail if the task was not enqueued
|
||||
pass
|
||||
if set_stopped:
|
||||
if is_queued:
|
||||
try:
|
||||
TaskBLL.dequeue(
|
||||
task_, company_id=company_id, user_id=user_id, silent_fail=True
|
||||
)
|
||||
except APIError:
|
||||
# dequeue may fail if the task was not enqueued
|
||||
pass
|
||||
|
||||
new_status = TaskStatus.stopped
|
||||
status_message = f"Stopped by {user_name}"
|
||||
else:
|
||||
new_status = task.status
|
||||
status_message = TaskStatusMessage.stopping
|
||||
new_status = TaskStatus.stopped
|
||||
status_message = f"Stopped by {user_name}"
|
||||
else:
|
||||
new_status = task_.status
|
||||
status_message = TaskStatusMessage.stopping
|
||||
|
||||
return ChangeStatusRequest(
|
||||
task=task,
|
||||
new_status=new_status,
|
||||
status_reason=status_reason,
|
||||
status_message=status_message,
|
||||
force=force,
|
||||
).execute()
|
||||
return ChangeStatusRequest(
|
||||
task=task_,
|
||||
new_status=new_status,
|
||||
status_reason=status_reason,
|
||||
status_message=status_message,
|
||||
force=force_,
|
||||
user_id=user_id,
|
||||
).execute()
|
||||
|
||||
if include_pipeline_steps and (
|
||||
step_tasks := _get_pipeline_steps_for_controller_task(
|
||||
task, company_id, only=fields
|
||||
)
|
||||
):
|
||||
for step in step_tasks:
|
||||
stop_task_core(step, True)
|
||||
|
||||
return stop_task_core(task, force)
|
||||
|
||||
@@ -1,15 +1,19 @@
|
||||
from datetime import datetime
|
||||
from typing import Sequence, Union
|
||||
from typing import Sequence
|
||||
|
||||
import attr
|
||||
import six
|
||||
from mongoengine import Q
|
||||
from mongoengine.base import UPDATE_OPERATORS
|
||||
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.bll.util import update_project_time
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.database.model.project import Project
|
||||
from apiserver.database.model.model import Model
|
||||
from apiserver.database.model.task.task import Task, TaskStatus, TaskSystemTags
|
||||
from apiserver.database.utils import get_options
|
||||
from apiserver.timing_context import TimingContext
|
||||
from apiserver.service_repo.auth import Identity
|
||||
from apiserver.utilities.attrs import typed_attrs
|
||||
|
||||
valid_statuses = get_options(TaskStatus)
|
||||
@@ -27,6 +31,7 @@ class ChangeStatusRequest(object):
|
||||
force = attr.ib(type=bool, default=False)
|
||||
allow_same_state_transition = attr.ib(type=bool, default=True)
|
||||
current_status_override = attr.ib(default=None)
|
||||
user_id = attr.ib(type=str, default=None)
|
||||
|
||||
def execute(self, **kwargs):
|
||||
current_status = self.current_status_override or self.task.status
|
||||
@@ -45,6 +50,7 @@ class ChangeStatusRequest(object):
|
||||
status_changed=now,
|
||||
last_update=now,
|
||||
last_change=now,
|
||||
last_changed_by=self.user_id,
|
||||
)
|
||||
|
||||
if self.new_status == TaskStatus.queued:
|
||||
@@ -55,7 +61,7 @@ class ChangeStatusRequest(object):
|
||||
|
||||
fields.update({safe_mongoengine_key(k): v for k, v in kwargs.items()})
|
||||
|
||||
with translate_errors_context(), TimingContext("mongo", "task_status"):
|
||||
with translate_errors_context():
|
||||
# atomic change of task status by querying the task with the EXPECTED status before modifying it
|
||||
params = fields.copy()
|
||||
params.update(control)
|
||||
@@ -73,8 +79,16 @@ class ChangeStatusRequest(object):
|
||||
|
||||
update_project_time(project_id)
|
||||
|
||||
# make sure that _raw_ queries are not returned back to the client
|
||||
fields.pop("__raw__", None)
|
||||
def is_mongo_operator(field: str) -> bool:
|
||||
head, _, tail = field.partition("__")
|
||||
return tail and (head in UPDATE_OPERATORS)
|
||||
|
||||
# make sure to not return _raw_ queries or any of the update operators
|
||||
fields = {
|
||||
key: value
|
||||
for key, value in fields.items()
|
||||
if not (key == "__raw__" or is_mongo_operator(key))
|
||||
}
|
||||
|
||||
return dict(updated=updated, fields=fields)
|
||||
|
||||
@@ -130,7 +144,12 @@ state_machine = {
|
||||
TaskStatus.publishing,
|
||||
TaskStatus.stopped,
|
||||
},
|
||||
TaskStatus.failed: {TaskStatus.created, TaskStatus.stopped, TaskStatus.published},
|
||||
TaskStatus.failed: {
|
||||
TaskStatus.created,
|
||||
TaskStatus.stopped,
|
||||
TaskStatus.published,
|
||||
TaskStatus.queued,
|
||||
},
|
||||
TaskStatus.publishing: {TaskStatus.published},
|
||||
TaskStatus.published: set(),
|
||||
TaskStatus.completed: {
|
||||
@@ -155,25 +174,78 @@ def get_possible_status_changes(current_status):
|
||||
return possible
|
||||
|
||||
|
||||
def update_project_time(project_ids: Union[str, Sequence[str]]):
|
||||
if not project_ids:
|
||||
return
|
||||
def get_many_tasks_for_writing(
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
query: Q = None,
|
||||
only: Sequence = None,
|
||||
throw_on_forbidden: bool = True,
|
||||
) -> Sequence[Task]:
|
||||
if only:
|
||||
missing = [f for f in ("company",) if f not in only]
|
||||
if missing:
|
||||
only = [*only, *missing]
|
||||
|
||||
if isinstance(project_ids, str):
|
||||
project_ids = [project_ids]
|
||||
result = list(
|
||||
Task.get_many(
|
||||
company=company_id,
|
||||
query=query,
|
||||
override_projection=only,
|
||||
allow_public=True,
|
||||
return_dicts=False,
|
||||
)
|
||||
)
|
||||
|
||||
return Project.objects(id__in=project_ids).update(last_update=datetime.utcnow())
|
||||
if not company_id:
|
||||
return result
|
||||
|
||||
forbidden_tasks = {task.id for task in result if not task.company}
|
||||
if forbidden_tasks:
|
||||
if throw_on_forbidden:
|
||||
raise errors.forbidden.NoWritePermission(
|
||||
f"cannot modify public task(s), ids={tuple(forbidden_tasks)}"
|
||||
)
|
||||
result = [task for task in result if task.id not in forbidden_tasks]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_task_with_write_access(
|
||||
task_id: str,
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
only=None,
|
||||
) -> Task:
|
||||
"""
|
||||
Gets a task that has a required write access
|
||||
:except errors.bad_request.InvalidTaskId: if the task is not found
|
||||
:except errors.forbidden.NoWritePermission: if write_access was required and the task cannot be modified
|
||||
"""
|
||||
query = dict(id=task_id, company=company_id)
|
||||
|
||||
task = Task.get_for_writing(_only=only, **query)
|
||||
if not task:
|
||||
raise errors.bad_request.InvalidTaskId(**query)
|
||||
|
||||
return task
|
||||
|
||||
|
||||
def get_task_for_update(
|
||||
company_id: str, task_id: str, allow_all_statuses: bool = False, force: bool = False
|
||||
company_id: str,
|
||||
task_id: str,
|
||||
identity: Identity,
|
||||
allow_all_statuses: bool = False,
|
||||
force: bool = False,
|
||||
) -> Task:
|
||||
"""
|
||||
Loads only task id and return the task only if it is updatable (status == 'created')
|
||||
"""
|
||||
task = Task.get_for_writing(company=company_id, id=task_id, _only=("id", "status"))
|
||||
if not task:
|
||||
raise errors.bad_request.InvalidTaskId(id=task_id)
|
||||
task = get_task_with_write_access(
|
||||
task_id=task_id,
|
||||
company_id=company_id,
|
||||
only=("id", "status"),
|
||||
identity=identity,
|
||||
)
|
||||
|
||||
if allow_all_statuses:
|
||||
return task
|
||||
@@ -188,9 +260,152 @@ def get_task_for_update(
|
||||
return task
|
||||
|
||||
|
||||
def update_task(task: Task, update_cmds: dict, set_last_update: bool = True):
|
||||
def update_task(
|
||||
task: Task, user_id: str, update_cmds: dict, set_last_update: bool = True
|
||||
):
|
||||
now = datetime.utcnow()
|
||||
last_updates = dict(last_change=now)
|
||||
last_updates = dict(last_change=now, last_changed_by=user_id)
|
||||
if set_last_update:
|
||||
last_updates.update(last_update=now)
|
||||
return task.update(**update_cmds, **last_updates)
|
||||
|
||||
|
||||
def get_last_metric_updates(
|
||||
task_id: str,
|
||||
last_scalar_events: dict,
|
||||
raw_updates: dict,
|
||||
extra_updates: dict,
|
||||
model_events: bool = False,
|
||||
):
|
||||
max_values = config.get("services.tasks.max_last_metrics", 2000)
|
||||
total_metrics = set()
|
||||
if max_values:
|
||||
query = dict(id=task_id)
|
||||
to_add = sum(len(v) for m, v in last_scalar_events.items())
|
||||
if to_add <= max_values:
|
||||
query[f"unique_metrics__{max_values - to_add}__exists"] = True
|
||||
db_cls = Model if model_events else Task
|
||||
task = db_cls.objects(**query).only("unique_metrics").first()
|
||||
if task and task.unique_metrics:
|
||||
total_metrics = set(task.unique_metrics)
|
||||
|
||||
new_metrics = []
|
||||
|
||||
def add_last_metric_mean_update(
|
||||
metric_path: str,
|
||||
metric_count: int,
|
||||
metric_total: float,
|
||||
):
|
||||
"""
|
||||
Update new mean field based on the value in db and new data
|
||||
The count field is updated here too and not with inc__ so that
|
||||
it will not get updated in the db earlier than the corresponding mean
|
||||
"""
|
||||
metric_path = metric_path.replace("__", ".")
|
||||
mean_value_field = f"{metric_path}.mean_value"
|
||||
count_field = f"{metric_path}.count"
|
||||
raw_updates[mean_value_field] = {
|
||||
"$round": [
|
||||
{
|
||||
"$divide": [
|
||||
{
|
||||
"$add": [
|
||||
{
|
||||
"$multiply": [
|
||||
{"$ifNull": [f"${mean_value_field}", 0]},
|
||||
{"$ifNull": [f"${count_field}", 0]},
|
||||
]
|
||||
},
|
||||
metric_total,
|
||||
]
|
||||
},
|
||||
{
|
||||
"$add": [
|
||||
{"$ifNull": [f"${count_field}", 0]},
|
||||
metric_count,
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
2,
|
||||
]
|
||||
}
|
||||
raw_updates[count_field] = {
|
||||
"$add": [
|
||||
{"$ifNull": [f"${count_field}", 0]},
|
||||
metric_count,
|
||||
]
|
||||
}
|
||||
|
||||
def add_last_metric_conditional_update(
|
||||
metric_path: str, metric_value, iter_value: int, is_min: bool, is_first: bool
|
||||
):
|
||||
"""
|
||||
Build an aggregation for an atomic update of the min or max value and the corresponding iteration
|
||||
"""
|
||||
if is_first:
|
||||
field_prefix = "first"
|
||||
op = None
|
||||
elif is_min:
|
||||
field_prefix = "min"
|
||||
op = "$gt"
|
||||
else:
|
||||
field_prefix = "max"
|
||||
op = "$lt"
|
||||
|
||||
value_field = f"{metric_path}__{field_prefix}_value".replace("__", ".")
|
||||
exists = {"$lte": [f"${value_field}", None]}
|
||||
if op:
|
||||
condition = {
|
||||
"$or": [
|
||||
exists,
|
||||
{op: [f"${value_field}", metric_value]},
|
||||
]
|
||||
}
|
||||
else:
|
||||
condition = exists
|
||||
|
||||
raw_updates[value_field] = {
|
||||
"$cond": [condition, metric_value, f"${value_field}"]
|
||||
}
|
||||
|
||||
value_iteration_field = (
|
||||
f"{metric_path}__{field_prefix}_value_iteration".replace("__", ".")
|
||||
)
|
||||
raw_updates[value_iteration_field] = {
|
||||
"$cond": [condition, iter_value, f"${value_iteration_field}"]
|
||||
}
|
||||
|
||||
for metric_key, metric_data in last_scalar_events.items():
|
||||
for variant_key, variant_data in metric_data.items():
|
||||
metric = f"{variant_data.get('metric')}/{variant_data.get('variant')}"
|
||||
if max_values:
|
||||
if len(total_metrics) >= max_values and metric not in total_metrics:
|
||||
continue
|
||||
total_metrics.add(metric)
|
||||
|
||||
new_metrics.append(metric)
|
||||
path = f"last_metrics__{metric_key}__{variant_key}"
|
||||
for key, value in variant_data.items():
|
||||
if key in ("min_value", "max_value", "first_value"):
|
||||
add_last_metric_conditional_update(
|
||||
metric_path=path,
|
||||
metric_value=value,
|
||||
iter_value=variant_data.get(f"{key}_iter", 0),
|
||||
is_min=(key == "min_value"),
|
||||
is_first=(key == "first_value"),
|
||||
)
|
||||
elif key in ("metric", "variant", "value", "x_axis_label"):
|
||||
extra_updates[f"set__{path}__{key}"] = value
|
||||
|
||||
count = variant_data.get("count")
|
||||
total = variant_data.get("total")
|
||||
if count is not None and total is not None:
|
||||
add_last_metric_mean_update(
|
||||
metric_path=path,
|
||||
metric_count=count,
|
||||
metric_total=total,
|
||||
)
|
||||
|
||||
if new_metrics:
|
||||
extra_updates["add_to_set__unique_metrics"] = new_metrics
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from datetime import datetime
|
||||
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.apimodels.users import CreateRequest
|
||||
from apiserver.config.info import get_version
|
||||
from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.database.model.user import User
|
||||
|
||||
@@ -12,7 +15,11 @@ class UserBLL:
|
||||
if user_id and User.objects(id=user_id).only("id"):
|
||||
raise errors.bad_request.UserIdExists(id=user_id)
|
||||
|
||||
user = User(**request.to_struct())
|
||||
user = User(
|
||||
**request.to_struct(),
|
||||
created=datetime.utcnow(),
|
||||
created_in_version=get_version(),
|
||||
)
|
||||
user.save(force_insert=True)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -1,76 +1,24 @@
|
||||
import functools
|
||||
import itertools
|
||||
from concurrent.futures.thread import ThreadPoolExecutor
|
||||
from datetime import datetime
|
||||
from typing import (
|
||||
Optional,
|
||||
Callable,
|
||||
Dict,
|
||||
Any,
|
||||
Set,
|
||||
Iterable,
|
||||
Tuple,
|
||||
Sequence,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
from boltons import iterutils
|
||||
|
||||
from apiserver.apierrors import APIError
|
||||
from apiserver.database.model import AttributedDocument
|
||||
from apiserver.database.model.project import Project
|
||||
from apiserver.database.model.settings import Settings
|
||||
|
||||
|
||||
class SetFieldsResolver:
|
||||
"""
|
||||
The class receives set fields dictionary
|
||||
and for the set fields that require 'min' or 'max'
|
||||
operation replace them with a simple set in case the
|
||||
DB document does not have these fields set
|
||||
"""
|
||||
|
||||
SET_MODIFIERS = ("min", "max")
|
||||
|
||||
def __init__(self, set_fields: Dict[str, Any]):
|
||||
self.orig_fields = {}
|
||||
self.fields = {}
|
||||
self.add_fields(**set_fields)
|
||||
|
||||
def add_fields(self, **set_fields: Any):
|
||||
self.orig_fields.update(set_fields)
|
||||
self.fields.update(
|
||||
{
|
||||
f: fname
|
||||
for f, modifier, dunder, fname in (
|
||||
(f,) + f.partition("__") for f in set_fields.keys()
|
||||
)
|
||||
if dunder and modifier in self.SET_MODIFIERS
|
||||
}
|
||||
)
|
||||
|
||||
def _get_updated_name(self, doc: AttributedDocument, name: str) -> str:
|
||||
if name in self.fields and doc.get_field_value(self.fields[name]) is None:
|
||||
return self.fields[name]
|
||||
return name
|
||||
|
||||
def get_fields(self, doc: AttributedDocument):
|
||||
"""
|
||||
For the given document return the set fields instructions
|
||||
with min/max operations replaced with a single set in case
|
||||
the document does not have the field set
|
||||
"""
|
||||
return {
|
||||
self._get_updated_name(doc, name): value
|
||||
for name, value in self.orig_fields.items()
|
||||
}
|
||||
|
||||
def get_names(self) -> Set[str]:
|
||||
"""
|
||||
Returns the names of the fields that had min/max modifiers
|
||||
in the format suitable for projection (dot separated)
|
||||
"""
|
||||
return set(name.replace("__", ".") for name in self.fields.values())
|
||||
|
||||
|
||||
@functools.lru_cache()
|
||||
def get_server_uuid() -> Optional[str]:
|
||||
return Settings.get_by_key("server.uuid")
|
||||
@@ -132,3 +80,13 @@ def run_batch_operation(
|
||||
}
|
||||
)
|
||||
return results, failures
|
||||
|
||||
|
||||
def update_project_time(project_ids: Union[str, Sequence[str]]):
|
||||
if not project_ids:
|
||||
return
|
||||
|
||||
if isinstance(project_ids, str):
|
||||
project_ids = [project_ids]
|
||||
|
||||
return Project.objects(id__in=project_ids).update(last_update=datetime.utcnow())
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
import itertools
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from time import time
|
||||
from typing import Sequence, Set, Optional
|
||||
|
||||
import attr
|
||||
import elasticsearch.helpers
|
||||
from boltons.iterutils import partition, chunked_iter
|
||||
from pyhocon import ConfigTree
|
||||
|
||||
from apiserver.es_factory import es_factory
|
||||
from apiserver.apierrors import APIError
|
||||
from apiserver.apierrors.errors import bad_request, server_error
|
||||
from apiserver.apimodels.workers import (
|
||||
DEFAULT_TIMEOUT,
|
||||
IdNameEntry,
|
||||
WorkerEntry,
|
||||
StatusReportRequest,
|
||||
@@ -25,16 +28,18 @@ from apiserver.database.model.project import Project
|
||||
from apiserver.database.model.queue import Queue
|
||||
from apiserver.database.model.task.task import Task
|
||||
from apiserver.redis_manager import redman
|
||||
from apiserver.timing_context import TimingContext
|
||||
from apiserver.tools import safe_get
|
||||
from apiserver.utilities.dicts import nested_get
|
||||
from .stats import WorkerStats
|
||||
|
||||
log = config.logger(__file__)
|
||||
|
||||
|
||||
class WorkerBLL:
|
||||
_key_regex_trans = str.maketrans({"*": ".*", "?": ".?"})
|
||||
|
||||
def __init__(self, es=None, redis=None):
|
||||
self.es_client = es or es_factory.connect("workers")
|
||||
self.config = config.get("services.workers", ConfigTree())
|
||||
self.redis = redis or redman.connection("workers")
|
||||
self._stats = WorkerStats(self.es_client)
|
||||
|
||||
@@ -51,6 +56,7 @@ class WorkerBLL:
|
||||
queues: Sequence[str] = None,
|
||||
timeout: int = 0,
|
||||
tags: Sequence[str] = None,
|
||||
system_tags: Sequence[str] = None,
|
||||
) -> WorkerEntry:
|
||||
"""
|
||||
Register a worker
|
||||
@@ -66,7 +72,7 @@ class WorkerBLL:
|
||||
"""
|
||||
key = WorkerBLL._get_worker_key(company_id, user_id, worker)
|
||||
|
||||
timeout = timeout or DEFAULT_TIMEOUT
|
||||
timeout = timeout or int(self.config.get("default_worker_timeout_sec", 10 * 60))
|
||||
queues = queues or []
|
||||
|
||||
with translate_errors_context():
|
||||
@@ -76,7 +82,7 @@ class WorkerBLL:
|
||||
raise bad_request.InvalidUserId(**query)
|
||||
company = Company.objects(id=company_id).only("id", "name").first()
|
||||
if not company:
|
||||
raise server_error.InternalError("invalid company", company=company_id)
|
||||
raise bad_request.InvalidId("invalid company", company=company_id)
|
||||
|
||||
queue_objs = Queue.objects(company=company_id, id__in=queues).only("id")
|
||||
if len(queue_objs) < len(queues):
|
||||
@@ -95,9 +101,10 @@ class WorkerBLL:
|
||||
register_timeout=timeout,
|
||||
last_activity_time=now,
|
||||
tags=tags,
|
||||
system_tags=system_tags,
|
||||
)
|
||||
|
||||
self.redis.setex(key, timedelta(seconds=timeout), entry.to_json())
|
||||
self._save_worker_data(entry)
|
||||
|
||||
return entry
|
||||
|
||||
@@ -109,15 +116,20 @@ class WorkerBLL:
|
||||
:param worker: worker ID
|
||||
:raise bad_request.WorkerNotRegistered: the worker was not previously registered
|
||||
"""
|
||||
with TimingContext("redis", "workers_unregister"):
|
||||
res = self.redis.delete(
|
||||
company_id, self._get_worker_key(company_id, user_id, worker)
|
||||
)
|
||||
res = self.redis.delete(
|
||||
company_id, self._get_worker_key(company_id, user_id, worker)
|
||||
)
|
||||
if not res and not config.get("apiserver.workers.auto_unregister", False):
|
||||
raise bad_request.WorkerNotRegistered(worker=worker)
|
||||
|
||||
def status_report(
|
||||
self, company_id: str, user_id: str, ip: str, report: StatusReportRequest, tags: Sequence[str] = None,
|
||||
self,
|
||||
company_id: str,
|
||||
user_id: str,
|
||||
ip: str,
|
||||
report: StatusReportRequest,
|
||||
tags: Sequence[str] = None,
|
||||
system_tags: Sequence[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Write worker status report
|
||||
@@ -133,22 +145,23 @@ class WorkerBLL:
|
||||
|
||||
try:
|
||||
entry.ip = ip
|
||||
now = datetime.utcnow()
|
||||
entry.last_activity_time = now
|
||||
|
||||
if tags is not None:
|
||||
entry.tags = tags
|
||||
if system_tags is not None:
|
||||
entry.system_tags = system_tags
|
||||
|
||||
if report.machine_stats:
|
||||
self._log_stats_to_es(
|
||||
self.log_stats_to_es(
|
||||
company_id=company_id,
|
||||
company_name=entry.company.name,
|
||||
worker=report.worker,
|
||||
worker_id=report.worker,
|
||||
timestamp=report.timestamp,
|
||||
task=report.task,
|
||||
machine_stats=report.machine_stats,
|
||||
)
|
||||
|
||||
now = datetime.utcnow()
|
||||
entry.last_activity_time = now
|
||||
entry.queue = report.queue
|
||||
|
||||
if report.queues:
|
||||
@@ -165,6 +178,7 @@ class WorkerBLL:
|
||||
last_worker_report=now,
|
||||
last_update=now,
|
||||
last_change=now,
|
||||
last_changed_by=user_id,
|
||||
)
|
||||
# modify(new=True, ...) returns the modified object
|
||||
task = Task.objects(**query).modify(new=True, **update)
|
||||
@@ -176,7 +190,9 @@ class WorkerBLL:
|
||||
if task.project:
|
||||
project = Project.objects(id=task.project).only("name").first()
|
||||
if project:
|
||||
entry.project = IdNameEntry(id=project.id, name=project.name)
|
||||
entry.project = IdNameEntry(
|
||||
id=project.id, name=project.name
|
||||
)
|
||||
|
||||
entry.last_report_time = now
|
||||
except APIError:
|
||||
@@ -188,8 +204,41 @@ class WorkerBLL:
|
||||
finally:
|
||||
self._save_worker(entry)
|
||||
|
||||
def get_count(
|
||||
self,
|
||||
company_id: str,
|
||||
last_seen: Optional[int] = None,
|
||||
tags: Sequence[str] = None,
|
||||
system_tags: Sequence[str] = None,
|
||||
worker_pattern: str = None,
|
||||
):
|
||||
if not last_seen:
|
||||
return len(
|
||||
self._get_keys(
|
||||
company_id,
|
||||
user_tags=tags,
|
||||
system_tags=system_tags,
|
||||
worker_pattern=worker_pattern,
|
||||
)
|
||||
)
|
||||
|
||||
return len(
|
||||
self.get_all(
|
||||
company_id,
|
||||
last_seen=last_seen,
|
||||
tags=tags,
|
||||
system_tags=system_tags,
|
||||
worker_pattern=worker_pattern,
|
||||
)
|
||||
)
|
||||
|
||||
def get_all(
|
||||
self, company_id: str, last_seen: Optional[int] = None
|
||||
self,
|
||||
company_id: str,
|
||||
last_seen: Optional[int] = None,
|
||||
tags: Sequence[str] = None,
|
||||
system_tags: Sequence[str] = None,
|
||||
worker_pattern: str = None,
|
||||
) -> Sequence[WorkerEntry]:
|
||||
"""
|
||||
Get all the company workers that were active during the last_seen period
|
||||
@@ -198,7 +247,12 @@ class WorkerBLL:
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
workers = self._get(company_id)
|
||||
workers = self._get(
|
||||
company_id,
|
||||
user_tags=tags,
|
||||
system_tags=system_tags,
|
||||
worker_pattern=worker_pattern,
|
||||
)
|
||||
except Exception as e:
|
||||
raise server_error.DataError("failed loading worker entries", err=e.args[0])
|
||||
|
||||
@@ -213,15 +267,23 @@ class WorkerBLL:
|
||||
return workers
|
||||
|
||||
def get_all_with_projection(
|
||||
self, company_id: str, last_seen: int
|
||||
self,
|
||||
company_id: str,
|
||||
last_seen: int,
|
||||
tags: Sequence[str] = None,
|
||||
system_tags: Sequence[str] = None,
|
||||
worker_pattern: str = None,
|
||||
) -> Sequence[WorkerResponseEntry]:
|
||||
|
||||
helpers = list(
|
||||
map(
|
||||
WorkerConversionHelper.from_worker_entry,
|
||||
self.get_all(company_id=company_id, last_seen=last_seen),
|
||||
helpers = [
|
||||
WorkerConversionHelper.from_worker_entry(entry)
|
||||
for entry in self.get_all(
|
||||
company_id=company_id,
|
||||
last_seen=last_seen,
|
||||
tags=tags,
|
||||
system_tags=system_tags,
|
||||
worker_pattern=worker_pattern,
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
task_ids = set(filter(None, (helper.task_id for helper in helpers)))
|
||||
all_queues = set(
|
||||
@@ -235,19 +297,18 @@ class WorkerBLL:
|
||||
{
|
||||
"$project": {
|
||||
"name": 1,
|
||||
"display_name": 1,
|
||||
"next_entry": {"$arrayElemAt": ["$entries", 0]},
|
||||
"num_entries": {"$size": "$entries"},
|
||||
}
|
||||
},
|
||||
]
|
||||
queues_info = {
|
||||
res["_id"]: res for res in Queue.objects.aggregate(projection)
|
||||
}
|
||||
queues_info = {res["_id"]: res for res in Queue.aggregate(projection)}
|
||||
task_ids = task_ids.union(
|
||||
filter(
|
||||
None,
|
||||
(
|
||||
safe_get(info, "next_entry/task")
|
||||
nested_get(info, ("next_entry", "task"))
|
||||
for info in queues_info.values()
|
||||
),
|
||||
)
|
||||
@@ -270,8 +331,9 @@ class WorkerBLL:
|
||||
if not info:
|
||||
continue
|
||||
entry.name = info.get("name", None)
|
||||
entry.display_name = info.get("display_name", None)
|
||||
entry.num_tasks = info.get("num_entries", 0)
|
||||
task_id = safe_get(info, "next_entry/task")
|
||||
task_id = nested_get(info, ("next_entry", "task"))
|
||||
if task_id:
|
||||
task = tasks_info.get(task_id, None)
|
||||
entry.next_task = IdNameEntry(
|
||||
@@ -281,7 +343,7 @@ class WorkerBLL:
|
||||
for helper in helpers:
|
||||
worker = helper.worker
|
||||
if helper.task_id:
|
||||
task = tasks_info.get(helper.task_id, None)
|
||||
task: Task = tasks_info.get(helper.task_id, None)
|
||||
if task:
|
||||
worker.task.running_time = (task.active_duration or 0) * 1000
|
||||
worker.task.last_iteration = task.last_iteration
|
||||
@@ -310,8 +372,7 @@ class WorkerBLL:
|
||||
"""
|
||||
key = self._get_worker_key(company_id, user_id, worker)
|
||||
|
||||
with TimingContext("redis", "get_worker"):
|
||||
data = self.redis.get(key)
|
||||
data = self.redis.get(key)
|
||||
|
||||
if data:
|
||||
try:
|
||||
@@ -338,42 +399,163 @@ class WorkerBLL:
|
||||
|
||||
raise bad_request.InvalidWorkerId(worker=worker)
|
||||
|
||||
@staticmethod
|
||||
def _get_tagged_workers_key(company: str, tags_field: str, tag: str) -> str:
|
||||
"""Build redis key from company, user and worker_id"""
|
||||
return f"workers.{tags_field}_{company}_{tag}"
|
||||
|
||||
@staticmethod
|
||||
def _get_all_workers_key(company: str) -> str:
|
||||
"""Build redis key from company, user and worker_id"""
|
||||
return f"workers_{company}"
|
||||
|
||||
def _save_worker_data(self, entry: WorkerEntry):
|
||||
self.redis.setex(
|
||||
entry.key, timedelta(seconds=entry.register_timeout), entry.to_json()
|
||||
)
|
||||
company_id = entry.company.id
|
||||
expiration = int(time()) + entry.register_timeout
|
||||
worker_item = {entry.key: expiration}
|
||||
self.redis.zadd(self._get_all_workers_key(company_id), worker_item)
|
||||
for tags, tags_field in (
|
||||
(entry.tags, "tags"),
|
||||
(entry.system_tags, "systemtags"),
|
||||
):
|
||||
for tag in tags:
|
||||
name = self._get_tagged_workers_key(company_id, tags_field, tag)
|
||||
self.redis.zadd(name, worker_item)
|
||||
|
||||
def _save_worker(self, entry: WorkerEntry) -> None:
|
||||
"""Save worker entry in Redis"""
|
||||
try:
|
||||
self.redis.setex(
|
||||
entry.key, timedelta(seconds=entry.register_timeout), entry.to_json()
|
||||
)
|
||||
self._save_worker_data(entry)
|
||||
except Exception:
|
||||
msg = "Failed saving worker entry"
|
||||
log.exception(msg)
|
||||
|
||||
def _get_keys(
|
||||
self,
|
||||
company: str,
|
||||
user: str = "*",
|
||||
user_tags: Sequence[str] = None,
|
||||
system_tags: Sequence[str] = None,
|
||||
worker_pattern: str = None,
|
||||
) -> Sequence[bytes]:
|
||||
if not (user_tags or system_tags):
|
||||
match = self._get_worker_key(company, user, worker_pattern or "*")
|
||||
return list(self.redis.scan_iter(match))
|
||||
|
||||
def filter_by_user_and_pattern(in_keys: Set[bytes]) -> Set[bytes]:
|
||||
if user != "*":
|
||||
user_bytes = user.encode()
|
||||
in_keys = {k for k in in_keys if user_bytes in k}
|
||||
|
||||
if worker_pattern:
|
||||
worker_pattern_bytes = (
|
||||
f"{worker_pattern.translate(self._key_regex_trans)}$".encode()
|
||||
)
|
||||
regex = re.compile(worker_pattern_bytes)
|
||||
in_keys = {k for k in in_keys if regex.search(k)}
|
||||
|
||||
return in_keys
|
||||
|
||||
worker_keys = set()
|
||||
for tags, tags_field in (
|
||||
(user_tags, "tags"),
|
||||
(system_tags, "systemtags"),
|
||||
):
|
||||
if not tags:
|
||||
continue
|
||||
|
||||
timestamp = int(time())
|
||||
include, exclude = partition(tags, key=lambda x: x[0] != "-")
|
||||
if include:
|
||||
tagged_workers = set()
|
||||
for tag in include:
|
||||
tagged_workers_key = self._get_tagged_workers_key(
|
||||
company, tags_field, tag
|
||||
)
|
||||
self.redis.zremrangebyscore(
|
||||
tagged_workers_key, min=0, max=timestamp
|
||||
)
|
||||
tagged_workers.update(self.redis.zrange(tagged_workers_key, 0, -1))
|
||||
|
||||
tagged_workers = filter_by_user_and_pattern(tagged_workers)
|
||||
worker_keys = (
|
||||
worker_keys.intersection(tagged_workers)
|
||||
if worker_keys
|
||||
else tagged_workers
|
||||
)
|
||||
if not worker_keys:
|
||||
return []
|
||||
|
||||
if exclude:
|
||||
if not worker_keys:
|
||||
all_workers_key = self._get_all_workers_key(company)
|
||||
self.redis.zremrangebyscore(all_workers_key, min=0, max=timestamp)
|
||||
worker_keys.update(self.redis.zrange(all_workers_key, 0, -1))
|
||||
worker_keys = filter_by_user_and_pattern(worker_keys)
|
||||
if not worker_keys:
|
||||
return []
|
||||
|
||||
for tag in exclude:
|
||||
tagged_workers_key = self._get_tagged_workers_key(
|
||||
company, tags_field, tag[1:]
|
||||
)
|
||||
self.redis.zremrangebyscore(
|
||||
tagged_workers_key, min=0, max=timestamp
|
||||
)
|
||||
worker_keys.difference_update(
|
||||
self.redis.zrange(tagged_workers_key, 0, -1)
|
||||
)
|
||||
if not worker_keys:
|
||||
return []
|
||||
|
||||
return list(worker_keys)
|
||||
|
||||
def _get(
|
||||
self, company: str, user: str = "*", worker_id: str = "*"
|
||||
self,
|
||||
company: str,
|
||||
user: str = "*",
|
||||
user_tags: Sequence[str] = None,
|
||||
system_tags: Sequence[str] = None,
|
||||
worker_pattern: str = None,
|
||||
) -> Sequence[WorkerEntry]:
|
||||
"""Get worker entries matching the company and user, worker patterns"""
|
||||
match = self._get_worker_key(company, user, worker_id)
|
||||
with TimingContext("redis", "workers_get_all"):
|
||||
res = self.redis.scan_iter(match)
|
||||
return [WorkerEntry.from_json(self.redis.get(r)) for r in res]
|
||||
|
||||
entries = []
|
||||
for keys in chunked_iter(
|
||||
self._get_keys(
|
||||
company,
|
||||
user=user,
|
||||
user_tags=user_tags,
|
||||
system_tags=system_tags,
|
||||
worker_pattern=worker_pattern,
|
||||
),
|
||||
1000,
|
||||
):
|
||||
data = self.redis.mget(keys)
|
||||
if data:
|
||||
entries.extend(WorkerEntry.from_json(d) for d in data if d)
|
||||
|
||||
return entries
|
||||
|
||||
@staticmethod
|
||||
def _get_es_index_suffix():
|
||||
"""Get the index name suffix for storing current month data"""
|
||||
return datetime.utcnow().strftime("%Y-%m")
|
||||
|
||||
def _log_stats_to_es(
|
||||
def log_stats_to_es(
|
||||
self,
|
||||
company_id: str,
|
||||
company_name: str,
|
||||
worker: str,
|
||||
worker_id: str,
|
||||
timestamp: int,
|
||||
task: str,
|
||||
machine_stats: MachineStats,
|
||||
) -> bool:
|
||||
) -> int:
|
||||
"""
|
||||
Actually writing the worker statistics to Elastic
|
||||
:return: True if successful, False otherwise
|
||||
:return: The amount of logged documents
|
||||
"""
|
||||
es_index = (
|
||||
f"{self._stats.worker_stats_prefix_for_company(company_id)}"
|
||||
@@ -385,8 +567,7 @@ class WorkerBLL:
|
||||
_index=es_index,
|
||||
_source=dict(
|
||||
timestamp=timestamp,
|
||||
worker=worker,
|
||||
company=company_name,
|
||||
worker=worker_id,
|
||||
task=task,
|
||||
category=category,
|
||||
metric=metric,
|
||||
@@ -411,7 +592,7 @@ class WorkerBLL:
|
||||
|
||||
es_res = elasticsearch.helpers.bulk(self.es_client, actions)
|
||||
added, errors = es_res[:2]
|
||||
return (added == len(actions)) and not errors
|
||||
return added
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
|
||||
@@ -8,19 +8,20 @@ from apiserver.apimodels.workers import AggregationType, GetStatsRequest, StatIt
|
||||
from apiserver.bll.query import Builder as QueryBuilder
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.timing_context import TimingContext
|
||||
|
||||
log = config.logger(__file__)
|
||||
|
||||
|
||||
class WorkerStats:
|
||||
min_chart_interval = config.get("services.workers.min_chart_interval_sec", 40)
|
||||
|
||||
def __init__(self, es):
|
||||
self.es = es
|
||||
|
||||
@staticmethod
|
||||
def worker_stats_prefix_for_company(company_id: str) -> str:
|
||||
"""Returns the es index prefix for the company"""
|
||||
return f"worker_stats_{company_id}_"
|
||||
return f"worker_stats_{company_id.lower()}_"
|
||||
|
||||
def _search_company_stats(self, company_id: str, es_req: dict) -> dict:
|
||||
return self.es.search(
|
||||
@@ -72,9 +73,13 @@ class WorkerStats:
|
||||
Buckets with no metrics are not returned
|
||||
Note: all the statistics are retrieved as one ES query
|
||||
"""
|
||||
if request.from_date >= request.to_date:
|
||||
from_date = request.from_date
|
||||
to_date = request.to_date
|
||||
if from_date >= to_date:
|
||||
raise bad_request.FieldsValueError("from_date must be less than to_date")
|
||||
|
||||
interval = max(request.interval, self.min_chart_interval)
|
||||
|
||||
def get_dates_agg() -> dict:
|
||||
es_to_agg_types = (
|
||||
("avg", AggregationType.avg.value),
|
||||
@@ -86,8 +91,11 @@ class WorkerStats:
|
||||
"dates": {
|
||||
"date_histogram": {
|
||||
"field": "timestamp",
|
||||
"fixed_interval": f"{request.interval}s",
|
||||
"min_doc_count": 1,
|
||||
"fixed_interval": f"{interval}s",
|
||||
"extended_bounds": {
|
||||
"min": int(from_date) * 1000,
|
||||
"max": int(to_date) * 1000,
|
||||
}
|
||||
},
|
||||
"aggs": {
|
||||
agg_type: {es_agg: {"field": "value"}}
|
||||
@@ -119,26 +127,26 @@ class WorkerStats:
|
||||
}
|
||||
|
||||
query_terms = [
|
||||
QueryBuilder.dates_range(request.from_date, request.to_date),
|
||||
QueryBuilder.dates_range(from_date, to_date),
|
||||
QueryBuilder.terms("metric", {item.key for item in request.items}),
|
||||
]
|
||||
if request.worker_ids:
|
||||
query_terms.append(QueryBuilder.terms("worker", request.worker_ids))
|
||||
es_req["query"] = {"bool": {"must": query_terms}}
|
||||
|
||||
with translate_errors_context(), TimingContext("es", "get_worker_stats"):
|
||||
with translate_errors_context():
|
||||
data = self._search_company_stats(company_id, es_req)
|
||||
|
||||
return self._extract_results(data, request.items, request.split_by_variant)
|
||||
cutoff_date = (to_date - 0.9 * interval) * 1000 # do not return the point for the incomplete last interval
|
||||
return self._extract_results(data, request.items, request.split_by_variant, cutoff_date)
|
||||
|
||||
@staticmethod
|
||||
def _extract_results(
|
||||
data: dict, request_items: Sequence[StatItem], split_by_variant: bool
|
||||
data: dict, request_items: Sequence[StatItem], split_by_variant: bool, cutoff_date
|
||||
) -> dict:
|
||||
"""
|
||||
Clean results returned from elastic search (remove "aggregations", "buckets" etc.),
|
||||
leave only aggregation types requested by the user and return a clean dictionary
|
||||
and return a "clean" dictionary of
|
||||
:param data: aggregation data retrieved from ES
|
||||
:param request_items: aggs types requested by the user
|
||||
:param split_by_variant: if False then aggregate by metric type, otherwise metric type + variant
|
||||
@@ -156,7 +164,7 @@ class WorkerStats:
|
||||
return {
|
||||
"date": date["key"],
|
||||
"count": date["doc_count"],
|
||||
**{agg: date[agg]["value"] for agg in aggs_per_metric[metric_key]},
|
||||
**{agg: date[agg]["value"] or 0.0 for agg in aggs_per_metric[metric_key]},
|
||||
}
|
||||
|
||||
def extract_metric_results(
|
||||
@@ -165,7 +173,7 @@ class WorkerStats:
|
||||
return [
|
||||
extract_date_stats(date, metric_key)
|
||||
for date in metric_or_variant["dates"]["buckets"]
|
||||
if date["doc_count"]
|
||||
if date["key"] <= cutoff_date
|
||||
]
|
||||
|
||||
def extract_variant_results(metric: dict) -> dict:
|
||||
@@ -204,6 +212,7 @@ class WorkerStats:
|
||||
"""
|
||||
if from_date >= to_date:
|
||||
raise bad_request.FieldsValueError("from_date must be less than to_date")
|
||||
interval = max(interval, self.min_chart_interval)
|
||||
|
||||
must = [QueryBuilder.dates_range(from_date, to_date)]
|
||||
if active_only:
|
||||
@@ -216,6 +225,10 @@ class WorkerStats:
|
||||
"date_histogram": {
|
||||
"field": "timestamp",
|
||||
"fixed_interval": f"{interval}s",
|
||||
"extended_bounds": {
|
||||
"min": int(from_date) * 1000,
|
||||
"max": int(to_date) * 1000,
|
||||
}
|
||||
},
|
||||
"aggs": {"workers_count": {"cardinality": {"field": "worker"}}},
|
||||
}
|
||||
@@ -223,9 +236,7 @@ class WorkerStats:
|
||||
"query": {"bool": {"must": must}},
|
||||
}
|
||||
|
||||
with translate_errors_context(), TimingContext(
|
||||
"es", "get_worker_activity_report"
|
||||
):
|
||||
with translate_errors_context():
|
||||
data = self._search_company_stats(company_id, es_req)
|
||||
|
||||
if "aggregations" not in data:
|
||||
|
||||
@@ -6,7 +6,7 @@ from functools import reduce
|
||||
from os import getenv
|
||||
from os.path import expandvars
|
||||
from pathlib import Path
|
||||
from typing import List, Any, TypeVar, Sequence
|
||||
from typing import List, Any, TypeVar, Sequence, Set
|
||||
|
||||
from boltons.iterutils import first
|
||||
from pyhocon import ConfigTree, ConfigFactory, ConfigValues
|
||||
@@ -35,6 +35,7 @@ class BasicConfig:
|
||||
folder: str = None,
|
||||
verbose: bool = True,
|
||||
prefix: Sequence[str] = DEFAULT_PREFIXES,
|
||||
exclude_files_from_base_folder: Sequence[str] = None,
|
||||
):
|
||||
folder = (
|
||||
Path(folder)
|
||||
@@ -44,6 +45,11 @@ class BasicConfig:
|
||||
if not folder.is_dir():
|
||||
raise ValueError("Invalid configuration folder")
|
||||
|
||||
self.exclude_files_from_base_folder = (
|
||||
set(exclude_files_from_base_folder)
|
||||
if exclude_files_from_base_folder
|
||||
else set()
|
||||
)
|
||||
self.verbose = verbose
|
||||
|
||||
self.extra_config_path_override_var = [
|
||||
@@ -85,7 +91,7 @@ class BasicConfig:
|
||||
return logging.getLogger(path)
|
||||
|
||||
def _read_extra_env_config_values(self) -> ConfigTree:
|
||||
""" Loads extra configuration from environment-injected values """
|
||||
"""Loads extra configuration from environment-injected values"""
|
||||
result = ConfigTree()
|
||||
|
||||
for prefix in self.extra_config_values_env_key_prefix:
|
||||
@@ -125,12 +131,18 @@ class BasicConfig:
|
||||
def _reload(self) -> ConfigTree:
|
||||
extra_config_values = self._read_extra_env_config_values()
|
||||
|
||||
configs = [self._read_recursive(path) for path in self._paths]
|
||||
configs = [
|
||||
self._read_recursive(
|
||||
path,
|
||||
exclude_files=(
|
||||
self.exclude_files_from_base_folder if idx == 0 else None
|
||||
),
|
||||
)
|
||||
for idx, path in enumerate(self._paths)
|
||||
]
|
||||
|
||||
return reduce(
|
||||
lambda last, config: self._merge_configs(
|
||||
last, config, copy_trees=True
|
||||
),
|
||||
lambda last, config: self._merge_configs(last, config, copy_trees=True),
|
||||
configs + [extra_config_values],
|
||||
ConfigTree(),
|
||||
)
|
||||
@@ -141,9 +153,14 @@ class BasicConfig:
|
||||
for key, value in b.items():
|
||||
override = key.startswith(override_prefix)
|
||||
if override:
|
||||
key = key[len(override_prefix):]
|
||||
key = key[len(override_prefix) :]
|
||||
# if key is in both a and b and both values are dictionary then merge it otherwise override it
|
||||
if not override and key in a and isinstance(a[key], ConfigTree) and isinstance(b[key], ConfigTree):
|
||||
if (
|
||||
not override
|
||||
and key in a
|
||||
and isinstance(a[key], ConfigTree)
|
||||
and isinstance(b[key], ConfigTree)
|
||||
):
|
||||
if copy_trees:
|
||||
a[key] = a[key].copy()
|
||||
cls._merge_configs(a[key], b[key], copy_trees=copy_trees)
|
||||
@@ -156,13 +173,15 @@ class BasicConfig:
|
||||
a[key] = value
|
||||
if a.root:
|
||||
if b.root:
|
||||
a.history[key] = a.history.get(key, []) + b.history.get(key, [value])
|
||||
a.history[key] = a.history.get(key, []) + b.history.get(
|
||||
key, [value]
|
||||
)
|
||||
else:
|
||||
a.history[key] = a.history.get(key, []) + [value]
|
||||
|
||||
return a
|
||||
|
||||
def _read_recursive(self, conf_root) -> ConfigTree:
|
||||
def _read_recursive(self, conf_root, exclude_files: Set[str]) -> ConfigTree:
|
||||
conf = ConfigTree()
|
||||
|
||||
if not conf_root:
|
||||
@@ -180,6 +199,8 @@ class BasicConfig:
|
||||
print(f"Loading config from {conf_root}")
|
||||
|
||||
for file in conf_root.rglob("*.conf"):
|
||||
if exclude_files and file.name in exclude_files:
|
||||
continue
|
||||
key = ".".join(file.relative_to(conf_root).with_suffix("").parts)
|
||||
conf.put(key, self._read_single_file(file))
|
||||
|
||||
|
||||
@@ -41,10 +41,6 @@
|
||||
# controls whether FieldDoesNotExist exception will be raised for any extra attribute existing in stored data
|
||||
# but not declared in a data model
|
||||
strict: false
|
||||
|
||||
aggregate {
|
||||
allow_disk_use: true
|
||||
}
|
||||
}
|
||||
|
||||
elastic {
|
||||
@@ -62,6 +58,9 @@
|
||||
# verify user tokens
|
||||
verify_user_tokens: false
|
||||
|
||||
# If set then users that were created from secure credentials or fixed user settings and are no longer in these settings will be deleted on startup
|
||||
delete_missing_autocreated_users: true
|
||||
|
||||
# max token expiration timeout in seconds (1 year)
|
||||
max_expiration_sec: 31536000
|
||||
|
||||
@@ -76,6 +75,7 @@
|
||||
httponly: true # allow only http to access the cookies (no JS etc)
|
||||
secure: false # not using HTTPS
|
||||
domain: null # Limit to localhost is not supported
|
||||
samesite: Lax
|
||||
max_age: 99999999999
|
||||
}
|
||||
|
||||
@@ -117,6 +117,10 @@
|
||||
# Timeout in seconds on task status update. If exceeded
|
||||
# then task can be stopped without communicating to the worker
|
||||
task_update_timeout: 600
|
||||
|
||||
# Timeout in seconds for worker registration (or status report). If a worker did not report for this long,
|
||||
# it is discarded from the server's table
|
||||
default_timeout: 600
|
||||
}
|
||||
|
||||
check_for_updates {
|
||||
@@ -146,4 +150,11 @@
|
||||
max_backoff_sec: 5
|
||||
}
|
||||
|
||||
getting_started_info {
|
||||
"agentName": "clearml",
|
||||
"configure": "clearml-init",
|
||||
"install": "pip install clearml",
|
||||
"packageName": "clearml"
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
fileserver = "http://localhost:8081"
|
||||
|
||||
elastic {
|
||||
events {
|
||||
hosts: [{host: "127.0.0.1", port: 9200}]
|
||||
hosts: [{host: "127.0.0.1", port: 9200, scheme: http}]
|
||||
args {
|
||||
timeout: 60
|
||||
dead_timeout: 10
|
||||
max_retries: 3
|
||||
retry_on_timeout: true
|
||||
}
|
||||
@@ -11,10 +12,9 @@ elastic {
|
||||
}
|
||||
|
||||
workers {
|
||||
hosts: [{host:"127.0.0.1", port:9200}]
|
||||
hosts: [{host:"127.0.0.1", port:9200, scheme: http}]
|
||||
args {
|
||||
timeout: 60
|
||||
dead_timeout: 10
|
||||
max_retries: 3
|
||||
retry_on_timeout: true
|
||||
}
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
{
|
||||
http {
|
||||
session_secret {
|
||||
apiserver: "Gx*gB-L2U8!Naqzd#8=7A4&+=In4H(da424H33ZTDQRGF6=FWw"
|
||||
apiserver: "V8gcW3EneNDcNfO7G_TSUsWe7uLozyacc9_I33o7bxUo8rCN31VLRg"
|
||||
}
|
||||
}
|
||||
|
||||
auth {
|
||||
# token sign secret
|
||||
token_secret: "7E1ua3xP9GT2(cIQOfhjp+gwN6spBeCAmN-XuugYle00I=Wc+u"
|
||||
token_secret: "Rq8FW84sSqVgq7WvBB_4EzNl9y8z8IGiDXX3C345_a5AZfcwZcwCIA"
|
||||
}
|
||||
|
||||
credentials {
|
||||
@@ -15,19 +15,29 @@
|
||||
apiserver {
|
||||
role: "system"
|
||||
user_key: "62T8CP7HGBC6647XF9314C2VY67RJO"
|
||||
user_secret: "FhS8VZv_I4%6Mo$8S1BWc$n$=o1dMYSivuiWU-Vguq7qGOKskG-d+b@tn_Iq"
|
||||
user_secret: "gaOfhDX2-bpkeI7-cwEcaMuGijxaG2UG3jbIvg4DxmVGF0LNI7rgvCb1-ne38IlBo1w"
|
||||
}
|
||||
fileserver {
|
||||
role: "system"
|
||||
user_key: "GSQWPEKSKNKF354LC9V6BHXKTYFD5I"
|
||||
user_secret: "tuBXcGQBECsEhcNiK2kiWi750z9r8Z85XrQ9V0c24huTuCb2xf2X1nKG"
|
||||
}
|
||||
webserver {
|
||||
role: "system"
|
||||
user_key: "EYVQ385RW7Y2QQUH88CZ7DWIQ1WUHP"
|
||||
user_secret: "yfc8KQo*GMXb*9p((qcYC7ByFIpF7I&4VH3BfUYXH%o9vX1ZUZQEEw1Inc)S"
|
||||
user_secret: "XhkH6a6ds9JBnM_MrahYyYdO-wS2bqFSm8gl-V0UZXH26Ydd6Eyi28TeBEoSr6Z3Bes"
|
||||
revoke_in_fixed_mode: true
|
||||
}
|
||||
services_agent {
|
||||
role: "admin"
|
||||
user_key: ""
|
||||
user_secret: ""
|
||||
}
|
||||
tests {
|
||||
role: "user"
|
||||
display_name: "Default User"
|
||||
user_key: "EGRTCO8JMSIGI6S39GTP43NFWXDQOW"
|
||||
user_secret: "x!XTov_G-#vspE*Y(h$Anm&DIc5Ou-F)jsl$PdOyj5wG1&E!Z8"
|
||||
user_secret: "LPEJbGJ6bK4tujQcmrD3i1dbMBDdwUwelVa-LG0K0FFmY9bzH_H0Sw"
|
||||
revoke_in_fixed_mode: true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,3 +2,8 @@ max_page_size: 500
|
||||
|
||||
# expiration time in seconds for the redis scroll states in get_many family of apis
|
||||
scroll_state_expiration_seconds: 600
|
||||
|
||||
allow_disk_use {
|
||||
sort: true
|
||||
aggregate: true
|
||||
}
|
||||
13
apiserver/config/default/services/async_urls_delete.conf
Normal file
13
apiserver/config/default/services/async_urls_delete.conf
Normal file
@@ -0,0 +1,13 @@
|
||||
# if set to true then on task delete/reset external file urls for known storage types are scheduled for async delete
|
||||
# otherwise they are returned to a client for the client side delete
|
||||
enabled: true
|
||||
max_retries: 3
|
||||
retry_timeout_sec: 60
|
||||
|
||||
fileserver {
|
||||
# fileserver url prefixes. Evaluated in the order of priority
|
||||
# Can be in the form <schema>://host:port/path or /path
|
||||
url_prefixes: ["https://files.community-master.hosted.allegro.ai/"]
|
||||
timeout_sec: 300
|
||||
token_expiration_sec: 600
|
||||
}
|
||||
@@ -12,15 +12,28 @@ events_retrieval {
|
||||
# should not exceed the amount of concurrent connections set in the ES driver
|
||||
max_metrics_concurrency: 4
|
||||
|
||||
# If set then max_metrics_count and max_variants_count are calculated dynamically on user data
|
||||
dynamic_metrics_count: true
|
||||
|
||||
# The percentage from the ES aggs limit (10000) to use for the max_metrics and max_variants calculation
|
||||
dynamic_metrics_count_threshold: 80
|
||||
|
||||
# the max amount of metrics to aggregate on
|
||||
max_metrics_count: 100
|
||||
|
||||
# the max amount of variants to aggregate on
|
||||
max_variants_count: 100
|
||||
|
||||
debug_images {
|
||||
# Allow to return the debug images for the variants with uninitialized valid iterations border
|
||||
allow_uninitialized_variants: true
|
||||
}
|
||||
|
||||
max_raw_scalars_size: 200000
|
||||
|
||||
scroll_id_key: "cTN5VEtWEC6QrHvUl0FTx9kNyO0CcCK1p57akxma"
|
||||
|
||||
multi_plots_batch_size: 1000
|
||||
}
|
||||
|
||||
# if set then plot str will be checked for the valid json on plot add
|
||||
@@ -28,4 +41,7 @@ events_retrieval {
|
||||
validate_plot_str: false
|
||||
|
||||
# If not 0 then the plots equal or greater to the size will be stored compressed in the DB
|
||||
plot_compression_threshold: 100000
|
||||
plot_compression_threshold: 100000
|
||||
|
||||
# async events delete threshold
|
||||
max_async_deleted_events_per_sec: 1000
|
||||
@@ -1,7 +1,4 @@
|
||||
metadata_values {
|
||||
# maximal amount of distinct model values to retrieve
|
||||
max_count: 100
|
||||
|
||||
# cache ttl sec
|
||||
cache_ttl_sec: 86400
|
||||
}
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
tags_cache {
|
||||
expiration_seconds: 3600
|
||||
}
|
||||
download {
|
||||
redis_timeout_sec: 300
|
||||
batch_size: 500
|
||||
max_download_items: 50000
|
||||
max_project_name_length: 60
|
||||
}
|
||||
8
apiserver/config/default/services/queues.conf
Normal file
8
apiserver/config/default/services/queues.conf
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
metrics_before_from_date: 3600
|
||||
# interval in seconds to update queue metrics. Put 0 to disable
|
||||
metrics_refresh_interval_sec: 300
|
||||
# the queues with these tags will not be returned from get_all/get_all_ex unless id or name specified
|
||||
# or search_hidden is set
|
||||
hidden_tags: [k8s-glue]
|
||||
}
|
||||
7
apiserver/config/default/services/serving.conf
Normal file
7
apiserver/config/default/services/serving.conf
Normal file
@@ -0,0 +1,7 @@
|
||||
default_container_timeout_sec: 600
|
||||
# Auto-register unknown serving containers on status reports and other calls
|
||||
container_auto_register: true
|
||||
# Assume unknow serving containers have unregistered (i.e. do not raise unregistered error)
|
||||
container_auto_unregister: true
|
||||
# The minimal sampling interval for serving model monitor chars
|
||||
min_chart_interval_sec: 40
|
||||
54
apiserver/config/default/services/storage_credentials.conf
Normal file
54
apiserver/config/default/services/storage_credentials.conf
Normal file
@@ -0,0 +1,54 @@
|
||||
aws {
|
||||
s3 {
|
||||
# S3 credentials, used for read/write access by various SDK elements
|
||||
# default, used for any bucket not specified below
|
||||
key: ""
|
||||
secret: ""
|
||||
region: ""
|
||||
use_credentials_chain: false
|
||||
# Additional ExtraArgs passed to boto3 when uploading files. Can also be set per-bucket under "credentials".
|
||||
extra_args: {}
|
||||
credentials: [
|
||||
# specifies key/secret credentials to use when handling s3 urls (read or write)
|
||||
# {
|
||||
# bucket: "my-bucket-name"
|
||||
# key: "my-access-key"
|
||||
# secret: "my-secret-key"
|
||||
# },
|
||||
{
|
||||
# This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
|
||||
host: "localhost:9000"
|
||||
key: "minioadmin"
|
||||
secret: "minioadmin"
|
||||
# region: my-server
|
||||
multipart: false
|
||||
secure: false
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
google.storage {
|
||||
# Default project and credentials file
|
||||
# Will be used when no bucket configuration is found
|
||||
// project: "clearml"
|
||||
// credentials_json: "/path/to/credentials.json"
|
||||
//
|
||||
// # Specific credentials per bucket and sub directory
|
||||
// credentials = [
|
||||
// {
|
||||
// bucket: "my-bucket"
|
||||
// subdir: "path/in/bucket" # Not required
|
||||
// project: "clearml"
|
||||
// credentials_json: "/path/to/credentials.json"
|
||||
// },
|
||||
// ]
|
||||
}
|
||||
azure.storage {
|
||||
# containers: [
|
||||
# {
|
||||
# account_name: "clearml"
|
||||
# account_key: "secret"
|
||||
# # container_name:
|
||||
# }
|
||||
# ]
|
||||
}
|
||||
@@ -11,12 +11,18 @@ non_responsive_tasks_watchdog {
|
||||
multi_task_histogram_limit: 100
|
||||
|
||||
hyperparam_values {
|
||||
# maximal amount of distinct hyperparam values to retrieve
|
||||
max_count: 100
|
||||
|
||||
# max allowed outdate time for the cashed result
|
||||
cache_allowed_outdate_sec: 60
|
||||
|
||||
# cache ttl sec
|
||||
cache_ttl_sec: 86400
|
||||
}
|
||||
}
|
||||
|
||||
# the maximum amount of unique last metrics/variants combinations
|
||||
# for which the last values are stored in a task
|
||||
max_last_metrics: 2000
|
||||
|
||||
# if set then call to tasks.delete/cleanup does not wait for ES events deletion
|
||||
async_events_delete: true
|
||||
# do not use async_delete if the deleted task has amount of events lower than this threshold
|
||||
async_events_delete_threshold: 100000
|
||||
|
||||
5
apiserver/config/default/services/workers.conf
Normal file
5
apiserver/config/default/services/workers.conf
Normal file
@@ -0,0 +1,5 @@
|
||||
default_worker_timeout_sec: 600
|
||||
default_cluster_timeout_sec: 600
|
||||
|
||||
# The minimal sampling interval for resource dashboard and worker activity charts
|
||||
min_chart_interval_sec: 40
|
||||
@@ -29,11 +29,16 @@ OVERRIDE_PORT_ENV_KEY = (
|
||||
)
|
||||
|
||||
OVERRIDE_CONNECTION_STRING_ENV_KEY = "CLEARML_MONGODB_SERVICE_CONNECTION_STRING"
|
||||
OVERRIDE_USERNAME_ENV_KEY = "CLEARML_MONGODB_SERVICE_USERNAME"
|
||||
OVERRIDE_PASSWORD_ENV_KEY = "CLEARML_MONGODB_SERVICE_PASSWORD"
|
||||
OVERRIDE_QUERY_ENV_KEY = "CLEARML_MONGODB_SERVICE_QUERY"
|
||||
|
||||
|
||||
class DatabaseEntry(models.Base):
|
||||
host = StringField(required=True)
|
||||
alias = StringField()
|
||||
name = StringField()
|
||||
db = StringField()
|
||||
|
||||
|
||||
class DatabaseFactory:
|
||||
@@ -52,29 +57,49 @@ class DatabaseFactory:
|
||||
override_connection_string = getenv(OVERRIDE_CONNECTION_STRING_ENV_KEY)
|
||||
override_hostname = first(map(getenv, OVERRIDE_HOST_ENV_KEY), None)
|
||||
override_port = first(map(getenv, OVERRIDE_PORT_ENV_KEY), None)
|
||||
override_username = getenv(OVERRIDE_USERNAME_ENV_KEY)
|
||||
override_password = getenv(OVERRIDE_PASSWORD_ENV_KEY)
|
||||
override_query = getenv(OVERRIDE_QUERY_ENV_KEY)
|
||||
|
||||
if override_connection_string:
|
||||
log.info(f"Using override mongodb connection string {override_connection_string}")
|
||||
log.info(f"Using override mongodb connection string template {override_connection_string}")
|
||||
else:
|
||||
if override_hostname:
|
||||
log.info(f"Using override mongodb host {override_hostname}")
|
||||
if override_port:
|
||||
log.info(f"Using override mongodb port {override_port}")
|
||||
if override_username:
|
||||
log.info(f"Using override mongodb username {override_username}")
|
||||
if override_password:
|
||||
log.info(f"Using override mongodb password ******")
|
||||
if override_query:
|
||||
log.info(f"Using override mongodb query {override_query}")
|
||||
|
||||
for key, alias in get_items(Database).items():
|
||||
if key not in db_entries:
|
||||
missing.append(key)
|
||||
continue
|
||||
|
||||
entry = cls._create_db_entry(alias=alias, settings=db_entries.get(key))
|
||||
settings = {**db_entries.get(key)}
|
||||
if not any(field in settings for field in ("name", "db")):
|
||||
settings["name"] = key
|
||||
entry = cls._create_db_entry(alias=alias, settings=settings)
|
||||
|
||||
if override_connection_string:
|
||||
entry.host = override_connection_string
|
||||
con_str = override_connection_string
|
||||
log.info(f"Using override mongodb connection string for {alias}: {con_str}")
|
||||
entry.host = con_str
|
||||
else:
|
||||
if override_hostname:
|
||||
entry.host = furl(entry.host).set(host=override_hostname).url
|
||||
if override_port:
|
||||
entry.host = furl(entry.host).set(port=override_port).url
|
||||
if override_username:
|
||||
entry.host = furl(entry.host).set(username=override_username).url
|
||||
if override_password:
|
||||
entry.host = furl(entry.host).set(password=override_password).url
|
||||
if override_query:
|
||||
entry.host = furl(entry.host).set(query=override_query).url
|
||||
|
||||
try:
|
||||
entry.validate()
|
||||
|
||||
@@ -5,7 +5,7 @@ from textwrap import shorten
|
||||
|
||||
import dpath
|
||||
from dpath.exceptions import InvalidKeyName
|
||||
from elasticsearch import ElasticsearchException
|
||||
from elastic_transport import TransportError, ApiError
|
||||
from elasticsearch.helpers import BulkIndexError
|
||||
from jsonmodels.errors import ValidationError as JsonschemaValidationError
|
||||
from mongoengine.errors import (
|
||||
@@ -16,7 +16,7 @@ from mongoengine.errors import (
|
||||
LookUpError,
|
||||
InvalidQueryError,
|
||||
)
|
||||
from pymongo.errors import PyMongoError, NotMasterError
|
||||
from pymongo.errors import PyMongoError, NotPrimaryError
|
||||
|
||||
from apiserver.apierrors import errors
|
||||
|
||||
@@ -166,7 +166,10 @@ class MongoEngineErrorsHandler(object):
|
||||
@classmethod
|
||||
@throws_default_error(errors.server_error.InternalError)
|
||||
def invalid_query_error(cls, e, message, **_):
|
||||
pass
|
||||
if e.args:
|
||||
inner = e.args[0]
|
||||
if isinstance(inner, LookUpError):
|
||||
cls.lookup_error(inner, message)
|
||||
|
||||
|
||||
@contextmanager
|
||||
@@ -195,7 +198,7 @@ def translate_errors_context(message=None, **kwargs):
|
||||
MongoEngineErrorsHandler.invalid_query_error(e, message, **kwargs)
|
||||
except PyMongoError as e:
|
||||
raise errors.server_error.InternalError(message, err=str(e))
|
||||
except NotMasterError as e:
|
||||
except NotPrimaryError as e:
|
||||
raise errors.server_error.InternalError(message, err=str(e))
|
||||
except MakeGetAllQueryError as e:
|
||||
raise errors.bad_request.ValidationError(e.error, field=e.field)
|
||||
@@ -207,9 +210,9 @@ def translate_errors_context(message=None, **kwargs):
|
||||
raise errors.bad_request.ValidationError(e.args[0])
|
||||
except BulkIndexError as e:
|
||||
ElasticErrorsHandler.bulk_error(e, message, **kwargs)
|
||||
except ElasticsearchException as e:
|
||||
except (TransportError, ApiError) as e:
|
||||
raise errors.server_error.DataError(e, message, **kwargs)
|
||||
except InvalidKeyName:
|
||||
raise errors.server_error.DataError("invalid empty key encountered in data")
|
||||
except Exception as ex:
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
@@ -60,3 +60,4 @@ def validate_id(cls, company, **kwargs):
|
||||
class EntityVisibility(Enum):
|
||||
active = "active"
|
||||
archived = "archived"
|
||||
hidden = "hidden"
|
||||
|
||||
@@ -4,6 +4,7 @@ from mongoengine import (
|
||||
EmbeddedDocumentListField,
|
||||
EmailField,
|
||||
DateTimeField,
|
||||
BooleanField,
|
||||
)
|
||||
|
||||
from apiserver.database import Database, strict
|
||||
@@ -76,3 +77,6 @@ class User(DbModelMixin, AuthDocument):
|
||||
|
||||
email = EmailField(unique=True, sparse=True)
|
||||
""" Email uniquely identifying the user """
|
||||
|
||||
autocreated = BooleanField(default=False)
|
||||
""" Set to true if the user was auto created based on config settings"""
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3,6 +3,8 @@ from mongoengine import (
|
||||
DateTimeField,
|
||||
BooleanField,
|
||||
EmbeddedDocumentField,
|
||||
IntField,
|
||||
ListField,
|
||||
)
|
||||
|
||||
from apiserver.database import Database, strict
|
||||
@@ -17,12 +19,14 @@ from apiserver.database.model.base import GetMixin
|
||||
from apiserver.database.model.metadata import MetadataItem
|
||||
from apiserver.database.model.model_labels import ModelLabels
|
||||
from apiserver.database.model.project import Project
|
||||
from apiserver.database.model.task.metrics import MetricEvent
|
||||
from apiserver.database.model.task.task import Task
|
||||
|
||||
|
||||
class Model(AttributedDocument):
|
||||
_field_collation_overrides = {
|
||||
"metadata.": AttributedDocument._numeric_locale,
|
||||
"last_metrics.": AttributedDocument._numeric_locale,
|
||||
}
|
||||
|
||||
meta = {
|
||||
@@ -33,9 +37,18 @@ class Model(AttributedDocument):
|
||||
"project",
|
||||
"task",
|
||||
"last_update",
|
||||
("company", "framework"),
|
||||
("company", "last_update"),
|
||||
("company", "name"),
|
||||
("company", "uri"),
|
||||
# distinct queries support
|
||||
("company", "tags"),
|
||||
("company", "system_tags"),
|
||||
("company", "project", "tags"),
|
||||
("company", "project", "system_tags"),
|
||||
("company", "user"),
|
||||
("company", "project", "user"),
|
||||
("company", "framework"),
|
||||
("company", "project", "framework"),
|
||||
{
|
||||
"name": "%s.model.main_text_index" % Database.backend,
|
||||
"fields": ["$name", "$id", "$comment", "$parent", "$task", "$project"],
|
||||
@@ -66,7 +79,8 @@ class Model(AttributedDocument):
|
||||
"parent",
|
||||
"metadata.*",
|
||||
),
|
||||
datetime_fields=("last_update",),
|
||||
range_fields=("created", "last_metrics.*", "last_iteration"),
|
||||
datetime_fields=("last_update", "last_change"),
|
||||
)
|
||||
|
||||
id = StringField(primary_key=True)
|
||||
@@ -84,6 +98,8 @@ class Model(AttributedDocument):
|
||||
labels = ModelLabels()
|
||||
ready = BooleanField(required=True)
|
||||
last_update = DateTimeField()
|
||||
last_change = DateTimeField()
|
||||
last_changed_by = StringField()
|
||||
ui_cache = SafeDictField(
|
||||
default=dict, user_set_allowed=True, exclude_by_default=True
|
||||
)
|
||||
@@ -91,3 +107,9 @@ class Model(AttributedDocument):
|
||||
metadata = SafeMapField(
|
||||
field=EmbeddedDocumentField(MetadataItem), user_set_allowed=True
|
||||
)
|
||||
last_iteration = IntField(default=0)
|
||||
last_metrics = SafeMapField(field=SafeMapField(EmbeddedDocumentField(MetricEvent)))
|
||||
unique_metrics = ListField(StringField(required=True), exclude_by_default=True)
|
||||
|
||||
def get_index_company(self) -> str:
|
||||
return self.company or self.company_origin or ""
|
||||
|
||||
@@ -9,7 +9,7 @@ from apiserver.database.model.base import GetMixin
|
||||
class Project(AttributedDocument):
|
||||
|
||||
get_all_query_options = GetMixin.QueryParameterOptions(
|
||||
pattern_fields=("name", "description"),
|
||||
pattern_fields=("name", "basename", "description"),
|
||||
list_fields=("tags", "system_tags", "id", "parent", "path"),
|
||||
range_fields=("last_update",),
|
||||
)
|
||||
@@ -21,6 +21,7 @@ class Project(AttributedDocument):
|
||||
"parent",
|
||||
"path",
|
||||
("company", "name"),
|
||||
("company", "basename"),
|
||||
{
|
||||
"name": "%s.project.main_text_index" % Database.backend,
|
||||
"fields": ["$name", "$id", "$description"],
|
||||
@@ -37,6 +38,7 @@ class Project(AttributedDocument):
|
||||
min_length=3,
|
||||
sparse=True,
|
||||
)
|
||||
basename = StrippedStringField(required=True)
|
||||
description = StringField()
|
||||
created = DateTimeField(required=True)
|
||||
tags = SafeSortedListField(StringField(required=True))
|
||||
|
||||
@@ -47,6 +47,7 @@ class Queue(DbModelMixin, Document):
|
||||
name = StrippedStringField(
|
||||
required=True, unique_with="company", min_length=3, user_set_allowed=True
|
||||
)
|
||||
display_name = StringField(user_set_allowed=True)
|
||||
company = StringField(required=True, reference_field=Company)
|
||||
created = DateTimeField(required=True)
|
||||
tags = SafeSortedListField(
|
||||
|
||||
76
apiserver/database/model/storage_settings.py
Normal file
76
apiserver/database/model/storage_settings.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from mongoengine import (
|
||||
Document,
|
||||
EmbeddedDocument,
|
||||
StringField,
|
||||
DateTimeField,
|
||||
EmbeddedDocumentListField,
|
||||
EmbeddedDocumentField,
|
||||
BooleanField,
|
||||
)
|
||||
|
||||
from apiserver.database import Database, strict
|
||||
from apiserver.database.model import DbModelMixin
|
||||
from apiserver.database.model.base import ProperDictMixin
|
||||
|
||||
class AWSBucketSettings(EmbeddedDocument, ProperDictMixin):
|
||||
bucket = StringField()
|
||||
subdir = StringField()
|
||||
host = StringField()
|
||||
key = StringField()
|
||||
secret = StringField()
|
||||
token = StringField()
|
||||
multipart = BooleanField()
|
||||
acl = StringField()
|
||||
secure = BooleanField()
|
||||
region = StringField()
|
||||
verify = BooleanField()
|
||||
use_credentials_chain = BooleanField()
|
||||
|
||||
|
||||
class AWSSettings(EmbeddedDocument, DbModelMixin):
|
||||
key = StringField()
|
||||
secret = StringField()
|
||||
region = StringField()
|
||||
token = StringField()
|
||||
use_credentials_chain = BooleanField()
|
||||
buckets = EmbeddedDocumentListField(AWSBucketSettings)
|
||||
|
||||
|
||||
class GoogleBucketSettings(EmbeddedDocument, ProperDictMixin):
|
||||
bucket = StringField()
|
||||
subdir = StringField()
|
||||
project = StringField()
|
||||
credentials_json = StringField()
|
||||
|
||||
|
||||
class GoogleStorageSettings(EmbeddedDocument, DbModelMixin):
|
||||
project = StringField()
|
||||
credentials_json = StringField()
|
||||
buckets = EmbeddedDocumentListField(GoogleBucketSettings)
|
||||
|
||||
|
||||
class AzureStorageContainerSettings(EmbeddedDocument, ProperDictMixin):
|
||||
account_name = StringField(required=True)
|
||||
account_key = StringField(required=True)
|
||||
container_name = StringField()
|
||||
|
||||
|
||||
class AzureStorageSettings(EmbeddedDocument, DbModelMixin):
|
||||
containers = EmbeddedDocumentListField(AzureStorageContainerSettings)
|
||||
|
||||
|
||||
class StorageSettings(DbModelMixin, Document):
|
||||
meta = {
|
||||
"db_alias": Database.backend,
|
||||
"strict": strict,
|
||||
"indexes": [
|
||||
"company"
|
||||
],
|
||||
}
|
||||
|
||||
id = StringField(primary_key=True)
|
||||
company = StringField(required=True, unique=True)
|
||||
last_update = DateTimeField()
|
||||
aws: AWSSettings = EmbeddedDocumentField(AWSSettings)
|
||||
google: GoogleStorageSettings = EmbeddedDocumentField(GoogleStorageSettings)
|
||||
azure: AzureStorageSettings = EmbeddedDocumentField(AzureStorageSettings)
|
||||
@@ -4,6 +4,8 @@ from mongoengine import (
|
||||
DynamicField,
|
||||
LongField,
|
||||
EmbeddedDocumentField,
|
||||
IntField,
|
||||
FloatField,
|
||||
)
|
||||
|
||||
from apiserver.database.fields import SafeMapField
|
||||
@@ -19,7 +21,14 @@ class MetricEvent(EmbeddedDocument):
|
||||
variant = StringField(required=True)
|
||||
value = DynamicField(required=True)
|
||||
min_value = DynamicField() # for backwards compatibility reasons
|
||||
min_value_iteration = IntField()
|
||||
max_value = DynamicField() # for backwards compatibility reasons
|
||||
max_value_iteration = IntField()
|
||||
first_value = FloatField()
|
||||
first_value_iteration = IntField()
|
||||
count = IntField()
|
||||
mean_value = FloatField()
|
||||
x_axis_label = StringField()
|
||||
|
||||
|
||||
class EventStats(EmbeddedDocument):
|
||||
|
||||
@@ -19,6 +19,7 @@ from apiserver.database.fields import (
|
||||
SafeSortedListField,
|
||||
EmbeddedDocumentListField,
|
||||
NullableStringField,
|
||||
NoneType,
|
||||
)
|
||||
from apiserver.database.model import AttributedDocument
|
||||
from apiserver.database.model.base import ProperDictMixin, GetMixin
|
||||
@@ -89,7 +90,9 @@ class Artifact(EmbeddedDocument):
|
||||
content_size = LongField()
|
||||
timestamp = LongField()
|
||||
type_data = EmbeddedDocumentField(ArtifactTypeData)
|
||||
display_data = SafeSortedListField(ListField(UnionField((int, float, str))))
|
||||
display_data = SafeSortedListField(
|
||||
ListField(UnionField((int, float, str, NoneType)))
|
||||
)
|
||||
|
||||
|
||||
class ParamsItem(EmbeddedDocument, ProperDictMixin):
|
||||
@@ -149,6 +152,7 @@ class TaskType(object):
|
||||
application = "application"
|
||||
monitor = "monitor"
|
||||
controller = "controller"
|
||||
report = "report"
|
||||
optimizer = "optimizer"
|
||||
service = "service"
|
||||
qc = "qc"
|
||||
@@ -175,11 +179,12 @@ class Task(AttributedDocument):
|
||||
"active_duration",
|
||||
"parent",
|
||||
"project",
|
||||
"last_update",
|
||||
"status_changed",
|
||||
"models.input.model",
|
||||
("company", "name"),
|
||||
("company", "user"),
|
||||
("company", "status", "type"),
|
||||
("company", "system_tags", "last_update"),
|
||||
("company", "last_update", "system_tags"),
|
||||
("company", "type", "system_tags", "status"),
|
||||
("company", "project", "type", "system_tags", "status"),
|
||||
("status", "last_update"), # for maintenance tasks
|
||||
@@ -187,12 +192,24 @@ class Task(AttributedDocument):
|
||||
"fields": ["company", "project"],
|
||||
"collation": AttributedDocument._numeric_locale,
|
||||
},
|
||||
# distinct queries support
|
||||
("company", "tags"),
|
||||
("company", "system_tags"),
|
||||
("company", "project", "tags"),
|
||||
("company", "project", "system_tags"),
|
||||
("company", "user"),
|
||||
("company", "project", "user"),
|
||||
("company", "parent"),
|
||||
("company", "project", "parent"),
|
||||
("company", "type"),
|
||||
("company", "project", "type"),
|
||||
{
|
||||
"name": "%s.task.main_text_index" % Database.backend,
|
||||
"fields": [
|
||||
"$name",
|
||||
"$id",
|
||||
"$comment",
|
||||
"$report",
|
||||
"$models.input.model",
|
||||
"$models.output.model",
|
||||
"$script.repository",
|
||||
@@ -203,6 +220,7 @@ class Task(AttributedDocument):
|
||||
"name": 10,
|
||||
"id": 10,
|
||||
"comment": 10,
|
||||
"report": 10,
|
||||
"models.output.model": 2,
|
||||
"models.input.model": 2,
|
||||
"script.repository": 1,
|
||||
@@ -222,10 +240,13 @@ class Task(AttributedDocument):
|
||||
"project",
|
||||
"parent",
|
||||
"hyperparams.*",
|
||||
"execution.queue",
|
||||
"models.input.model",
|
||||
),
|
||||
range_fields=("started", "active_duration", "last_metrics.*", "last_iteration"),
|
||||
datetime_fields=("status_changed", "last_update"),
|
||||
pattern_fields=("name", "comment"),
|
||||
range_fields=("created", "started", "active_duration", "last_metrics.*", "last_iteration"),
|
||||
datetime_fields=("status_changed", "last_update", "last_change"),
|
||||
pattern_fields=("name", "comment", "report"),
|
||||
fields=("runtime.*",),
|
||||
)
|
||||
|
||||
id = StringField(primary_key=True)
|
||||
@@ -239,6 +260,8 @@ class Task(AttributedDocument):
|
||||
status_message = StringField(user_set_allowed=True)
|
||||
status_changed = DateTimeField()
|
||||
comment = StringField(user_set_allowed=True)
|
||||
report = StringField()
|
||||
report_assets = ListField(StringField())
|
||||
created = DateTimeField(required=True, user_set_allowed=True)
|
||||
started = DateTimeField()
|
||||
completed = DateTimeField()
|
||||
@@ -257,9 +280,10 @@ class Task(AttributedDocument):
|
||||
last_change = DateTimeField()
|
||||
last_iteration = IntField(default=DEFAULT_LAST_ITERATION)
|
||||
last_metrics = SafeMapField(field=SafeMapField(EmbeddedDocumentField(MetricEvent)))
|
||||
unique_metrics = ListField(StringField(required=True), exclude_by_default=True)
|
||||
metric_stats = SafeMapField(field=EmbeddedDocumentField(MetricEventStats))
|
||||
company_origin = StringField(exclude_by_default=True)
|
||||
duration = IntField() # task duration in seconds
|
||||
duration = IntField() # obsolete, do not use
|
||||
hyperparams = SafeMapField(field=SafeMapField(EmbeddedDocumentField(ParamsItem)))
|
||||
configuration = SafeMapField(field=EmbeddedDocumentField(ConfigurationItem))
|
||||
runtime = SafeDictField(default=dict)
|
||||
@@ -268,6 +292,7 @@ class Task(AttributedDocument):
|
||||
enqueue_status = StringField(
|
||||
choices=get_options(TaskStatus), exclude_by_default=True
|
||||
)
|
||||
last_changed_by = StringField()
|
||||
|
||||
def get_index_company(self) -> str:
|
||||
"""
|
||||
|
||||
52
apiserver/database/model/url_to_delete.py
Normal file
52
apiserver/database/model/url_to_delete.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from enum import Enum
|
||||
|
||||
from mongoengine import StringField, DateTimeField, IntField, EnumField
|
||||
|
||||
from apiserver.database import Database, strict
|
||||
from apiserver.database.model import AttributedDocument
|
||||
|
||||
|
||||
class StorageType(str, Enum):
|
||||
fileserver = "fileserver"
|
||||
s3 = "s3"
|
||||
azure = "azure"
|
||||
gs = "gs"
|
||||
unknown = "unknown"
|
||||
|
||||
|
||||
class FileType(str, Enum):
|
||||
file = "file"
|
||||
folder = "folder"
|
||||
|
||||
|
||||
class DeletionStatus(str, Enum):
|
||||
created = "created"
|
||||
retrying = "retrying"
|
||||
failed = "failed"
|
||||
|
||||
|
||||
class UrlToDelete(AttributedDocument):
|
||||
_field_collation_overrides = {
|
||||
"url": AttributedDocument._numeric_locale,
|
||||
}
|
||||
|
||||
meta = {
|
||||
"db_alias": Database.backend,
|
||||
"strict": strict,
|
||||
"indexes": [
|
||||
("company", "user", "task"),
|
||||
("company", "storage_type", "url"),
|
||||
("status", "retry_count", "storage_type"),
|
||||
],
|
||||
}
|
||||
|
||||
id = StringField(primary_key=True)
|
||||
url = StringField(required=True, unique_with="company")
|
||||
task = StringField(required=True)
|
||||
created = DateTimeField(required=True)
|
||||
storage_type = EnumField(StorageType, default=StorageType.unknown)
|
||||
type = EnumField(FileType, default=FileType.file)
|
||||
retry_count = IntField(default=0)
|
||||
last_failure_time = DateTimeField()
|
||||
last_failure_reason = StringField()
|
||||
status = EnumField(DeletionStatus, default=DeletionStatus.created)
|
||||
@@ -1,4 +1,4 @@
|
||||
from mongoengine import Document, StringField, DynamicField
|
||||
from mongoengine import Document, StringField, DynamicField, DateTimeField
|
||||
|
||||
from apiserver.database import Database, strict
|
||||
from apiserver.database.model import DbModelMixin
|
||||
@@ -20,3 +20,5 @@ class User(DbModelMixin, Document):
|
||||
given_name = StringField(user_set_allowed=True)
|
||||
avatar = StringField()
|
||||
preferences = DynamicField(default="", exclude_by_default=True)
|
||||
created_in_version = StringField()
|
||||
created = DateTimeField()
|
||||
|
||||
@@ -1,73 +1,16 @@
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from itertools import groupby, chain
|
||||
from typing import Sequence, Dict, Callable, Tuple, Any, Type
|
||||
from typing import Sequence, Dict, Callable
|
||||
|
||||
import dpath.path
|
||||
from boltons import iterutils
|
||||
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.props import PropsMixin
|
||||
|
||||
SEP = "."
|
||||
|
||||
|
||||
def project_dict(data, projection, separator=SEP):
|
||||
"""
|
||||
Project partial data from a dictionary into a new dictionary
|
||||
:param data: Input dictionary
|
||||
:param projection: List of dictionary paths (each a string with field names separated using a separator)
|
||||
:param separator: Separator (default is '.')
|
||||
:return: A new dictionary containing only the projected parts from the original dictionary
|
||||
"""
|
||||
assert isinstance(data, dict)
|
||||
result = {}
|
||||
|
||||
def copy_path(path_parts, source, destination):
|
||||
src, dst = source, destination
|
||||
try:
|
||||
for depth, path_part in enumerate(path_parts[:-1]):
|
||||
src_part = src[path_part]
|
||||
if isinstance(src_part, dict):
|
||||
src = src_part
|
||||
dst = dst.setdefault(path_part, {})
|
||||
elif isinstance(src_part, (list, tuple)):
|
||||
if path_part not in dst:
|
||||
dst[path_part] = [{} for _ in range(len(src_part))]
|
||||
elif not isinstance(dst[path_part], (list, tuple)):
|
||||
raise TypeError(
|
||||
"Incompatible destination type %s for %s (list expected)"
|
||||
% (type(dst), separator.join(path_parts[: depth + 1]))
|
||||
)
|
||||
elif not len(dst[path_part]) == len(src_part):
|
||||
raise ValueError(
|
||||
"Destination list length differs from source length for %s"
|
||||
% separator.join(path_parts[: depth + 1])
|
||||
)
|
||||
|
||||
dst[path_part] = [
|
||||
copy_path(path_parts[depth + 1 :], s, d)
|
||||
for s, d in zip(src_part, dst[path_part])
|
||||
]
|
||||
|
||||
return destination
|
||||
else:
|
||||
raise TypeError(
|
||||
"Unsupported projection type %s for %s"
|
||||
% (type(src), separator.join(path_parts[: depth + 1]))
|
||||
)
|
||||
|
||||
last_part = path_parts[-1]
|
||||
dst[last_part] = src[last_part]
|
||||
except KeyError:
|
||||
# Projection field not in source, no biggie.
|
||||
pass
|
||||
return destination
|
||||
|
||||
for projection_path in sorted(projection):
|
||||
copy_path(
|
||||
path_parts=projection_path.split(separator), source=data, destination=result
|
||||
)
|
||||
return result
|
||||
max_items_per_fetch = config.get("services._mongo.max_page_size", 500)
|
||||
|
||||
|
||||
class _ReferenceProxy(dict):
|
||||
@@ -110,9 +53,6 @@ class ProjectionHelper(object):
|
||||
self._ref_projection = None
|
||||
self._proxy_manager = _ProxyManager()
|
||||
|
||||
# Cached dpath paths for each of the result documents
|
||||
self._cached_results_paths: Dict[int, Sequence[Tuple[Any, Type]]] = {}
|
||||
|
||||
self._parse_projection(projection)
|
||||
|
||||
def _collect_projection_fields(self, doc_cls, projection):
|
||||
@@ -275,25 +215,26 @@ class ProjectionHelper(object):
|
||||
norm_path = doc_cls.get_dpath_translated_path(path)
|
||||
globlist = norm_path.strip(SEP).split(SEP)
|
||||
|
||||
obj_paths = self._cached_results_paths.get(id(obj))
|
||||
if obj_paths is None:
|
||||
obj_paths = self._cached_results_paths[id(obj)] = list(
|
||||
dpath.path.paths(obj, dirs=True, skip=True)
|
||||
)
|
||||
|
||||
paths = [p for p in obj_paths if dpath.path.match(p, globlist)]
|
||||
|
||||
def search_and_replace(p: Sequence[Tuple[str, Type]]) -> Any:
|
||||
def _search_and_replace(target: dict, p: Sequence[str]) -> Sequence[str]:
|
||||
parent = None
|
||||
target = obj
|
||||
for part in p:
|
||||
parent = target
|
||||
target = target[part[0]]
|
||||
if parent and factory:
|
||||
parent[p[-1][0]] = factory(target)
|
||||
return target
|
||||
for idx, part in enumerate(p):
|
||||
if isinstance(target, dict) and part in target:
|
||||
parent = target
|
||||
target = target[part]
|
||||
elif isinstance(target, list) and part == "*":
|
||||
return list(
|
||||
chain.from_iterable(
|
||||
_search_and_replace(t, p[idx + 1 :]) for t in target
|
||||
)
|
||||
)
|
||||
else:
|
||||
return []
|
||||
|
||||
return [search_and_replace(p) for p in paths]
|
||||
if parent and factory:
|
||||
parent[p[-1]] = factory(target)
|
||||
return [target]
|
||||
|
||||
return _search_and_replace(obj, globlist)
|
||||
|
||||
def project(self, results, projection_func):
|
||||
"""
|
||||
@@ -341,10 +282,11 @@ class ProjectionHelper(object):
|
||||
doc_only = list(filter(None, data["only"]))
|
||||
doc_only = list({"id"} | set(doc_only)) if doc_only else None
|
||||
|
||||
for res in projection_func(
|
||||
doc_type=doc_type, projection=doc_only, ids=ids
|
||||
):
|
||||
self._proxy_manager.update(res)
|
||||
for ids_chunk in iterutils.chunked_iter(ids, max_items_per_fetch):
|
||||
for res in projection_func(
|
||||
doc_type=doc_type, projection=doc_only, ids=ids_chunk
|
||||
):
|
||||
self._proxy_manager.update(res)
|
||||
|
||||
if len(ref_projection) == 1:
|
||||
do_projection(items[0])
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import hashlib
|
||||
from inspect import ismethod, getmembers
|
||||
from typing import Sequence, Tuple, Set, Optional, Callable, Any
|
||||
from typing import Sequence, Tuple, Set, Optional, Callable, Any, Mapping
|
||||
from uuid import uuid4
|
||||
|
||||
from mongoengine import EmbeddedDocumentField, ListField, Document, Q
|
||||
@@ -121,8 +121,8 @@ def init_cls_from_base(cls, instance):
|
||||
)
|
||||
|
||||
|
||||
def get_company_or_none_constraint(company=None):
|
||||
return Q(company__in=(company, None, "")) | Q(company__exists=False)
|
||||
def get_company_or_none_constraint(company=""):
|
||||
return Q(company__in=list({company, ""}))
|
||||
|
||||
|
||||
def field_does_not_exist(field: str, empty_value=None, is_list=False) -> Q:
|
||||
@@ -203,18 +203,22 @@ def _names_set(*names: str) -> Set[str]:
|
||||
return set(names) | set(f"-{name}" for name in names)
|
||||
|
||||
|
||||
system_tag_names = {
|
||||
_system_tag_names = {
|
||||
"model": _names_set("active", "archived"),
|
||||
"project": _names_set("archived", "public", "default"),
|
||||
"task": _names_set("active", "archived", "development"),
|
||||
"queue": _names_set("default"),
|
||||
}
|
||||
|
||||
system_tag_prefixes = {"task": _names_set("annotat")}
|
||||
_system_tag_prefixes = {"task": _names_set("annotat")}
|
||||
|
||||
|
||||
def partition_tags(
|
||||
entity: str, tags: Sequence[str], system_tags: Optional[Sequence[str]] = ()
|
||||
entity: str,
|
||||
tags: Sequence[str],
|
||||
system_tags: Optional[Sequence[str]] = (),
|
||||
system_tag_names: Mapping = _system_tag_names,
|
||||
system_tag_prefixes: Mapping = _system_tag_prefixes,
|
||||
) -> Tuple[Sequence[str], Sequence[str]]:
|
||||
"""
|
||||
Partition the given tags sequence into system and user-defined tags
|
||||
|
||||
23
apiserver/documentation/api_versions.md
Normal file
23
apiserver/documentation/api_versions.md
Normal file
@@ -0,0 +1,23 @@
|
||||
### Supported api versions
|
||||
|
||||
| Release | ApiVersion |
|
||||
|---------|------------|
|
||||
| v1.17 | 2.31 |
|
||||
| v1.16 | 2.30 |
|
||||
| v1.15 | 2.29 |
|
||||
| v1.14 | 2.28 |
|
||||
| v1.13 | 2.27 |
|
||||
| v1.12 | 2.26 |
|
||||
| v1.11 | 2.25 |
|
||||
| v1.10 | 2.24 |
|
||||
| v1.9 | 2.23 |
|
||||
| v1.8 | 2.22 |
|
||||
| v1.7 | 2.21 |
|
||||
| v1.6 | 2.20 |
|
||||
| v1.5 | 2.19 |
|
||||
| v1.4 | 2.18 |
|
||||
| v1.3 | 2.17 |
|
||||
| v1.2 | 2.16 |
|
||||
| v1.1 | 2.15 |
|
||||
| v1.0 | 2.14 |
|
||||
| v0.17 | 2.13 |
|
||||
@@ -4,34 +4,89 @@ Apply elasticsearch mappings to given hosts.
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, Sequence, Tuple
|
||||
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch import Elasticsearch, exceptions
|
||||
|
||||
HERE = Path(__file__).resolve().parent
|
||||
logging.getLogger("elasticsearch").setLevel(logging.WARNING)
|
||||
logging.getLogger("elastic_transport").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
def apply_mappings_to_cluster(
|
||||
hosts: Sequence, key: Optional[str] = None, es_args: dict = None, http_auth: Tuple = None
|
||||
hosts: Sequence,
|
||||
key: Optional[str] = None,
|
||||
es_args: dict = None,
|
||||
http_auth: Tuple = None,
|
||||
):
|
||||
"""Hosts maybe a sequence of strings or dicts in the form {"host": <host>, "port": <port>}"""
|
||||
|
||||
def _send_template(f):
|
||||
with f.open() as json_data:
|
||||
data = json.load(json_data)
|
||||
template_name = f.stem
|
||||
res = es.indices.put_template(template_name, body=data)
|
||||
return {"mapping": template_name, "result": res}
|
||||
def _send_component_template(ct_file):
|
||||
with ct_file.open() as json_data:
|
||||
body = json.load(json_data)
|
||||
template_name = f"{ct_file.stem}"
|
||||
res = es.cluster.put_component_template(name=template_name, body=body)
|
||||
return {"component_template": template_name, "result": res}
|
||||
|
||||
p = HERE / "mappings"
|
||||
if key:
|
||||
files = (p / key).glob("*.json")
|
||||
else:
|
||||
files = p.glob("**/*.json")
|
||||
def _send_index_template(it_file):
|
||||
with it_file.open() as json_data:
|
||||
body = json.load(json_data)
|
||||
template_name = f"{it_file.stem}"
|
||||
res = es.indices.put_index_template(name=template_name, body=body)
|
||||
return {"index_template": template_name, "result": res}
|
||||
|
||||
# def _send_legacy_template(f):
|
||||
# with f.open() as json_data:
|
||||
# data = json.load(json_data)
|
||||
# template_name = f.stem
|
||||
# res = es.indices.put_template(name=template_name, body=data)
|
||||
# return {"mapping": template_name, "result": res}
|
||||
|
||||
def _delete_legacy_templates(legacy_folder):
|
||||
res_list = []
|
||||
for lt in legacy_folder.glob("*.json"):
|
||||
template_name = lt.stem
|
||||
try:
|
||||
if not es.indices.get_template(name=template_name):
|
||||
continue
|
||||
res = es.indices.delete_template(name=template_name)
|
||||
except exceptions.NotFoundError:
|
||||
continue
|
||||
res_list.append({"deleted legacy mapping": template_name, "result": res})
|
||||
|
||||
return res_list
|
||||
|
||||
es = Elasticsearch(hosts=hosts, http_auth=http_auth, **(es_args or {}))
|
||||
return [_send_template(f) for f in files]
|
||||
root = HERE / "index_templates"
|
||||
if key:
|
||||
folders = [root / key]
|
||||
else:
|
||||
folders = [f for f in root.iterdir() if f.is_dir()]
|
||||
|
||||
ret = []
|
||||
for f in folders:
|
||||
for ct in (f / "component_templates").glob("*.json"):
|
||||
ret.append(_send_component_template(ct))
|
||||
for it in f.glob("*.json"):
|
||||
ret.append(_send_index_template(it))
|
||||
|
||||
legacy_root = HERE / "mappings"
|
||||
for f in folders:
|
||||
legacy_f = legacy_root / f.stem
|
||||
if not legacy_f.exists() or not legacy_f.is_dir():
|
||||
continue
|
||||
ret.extend(_delete_legacy_templates(legacy_f))
|
||||
|
||||
return ret
|
||||
# p = HERE / "mappings"
|
||||
# if key:
|
||||
# files = (p / key).glob("*.json")
|
||||
# else:
|
||||
# files = p.glob("**/*.json")
|
||||
#
|
||||
# return [_send_template(f) for f in files]
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
{
|
||||
"template": {
|
||||
"settings": {
|
||||
"number_of_replicas": 0,
|
||||
"number_of_shards": 1
|
||||
},
|
||||
"mappings": {
|
||||
"_source": {
|
||||
"enabled": true
|
||||
},
|
||||
"properties": {
|
||||
"@timestamp": {
|
||||
"type": "date"
|
||||
},
|
||||
"task": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"type": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"worker": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "date"
|
||||
},
|
||||
"iter": {
|
||||
"type": "long"
|
||||
},
|
||||
"metric": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"variant": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"value": {
|
||||
"type": "float"
|
||||
},
|
||||
"company_id": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"model_event": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
18
apiserver/elastic/index_templates/events/events_log.json
Normal file
18
apiserver/elastic/index_templates/events/events_log.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"index_patterns": "events-log-*",
|
||||
"template": {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"msg": {
|
||||
"type": "text",
|
||||
"index": false
|
||||
},
|
||||
"level": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"priority": 500,
|
||||
"composed_of": ["events_common"]
|
||||
}
|
||||
18
apiserver/elastic/index_templates/events/events_plot.json
Normal file
18
apiserver/elastic/index_templates/events/events_plot.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"index_patterns": "events-plot-*",
|
||||
"template": {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"plot_str": {
|
||||
"type": "text",
|
||||
"index": false
|
||||
},
|
||||
"plot_data": {
|
||||
"type": "binary"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"priority": 500,
|
||||
"composed_of": ["events_common"]
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"index_patterns": "events-training_debug_image-*",
|
||||
"template": {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"key": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"url": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"priority": 500,
|
||||
"composed_of": ["events_common"]
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"index_patterns": "events-training_stats_scalar-*",
|
||||
"priority": 500,
|
||||
"composed_of": ["events_common"]
|
||||
}
|
||||
31
apiserver/elastic/index_templates/workers/queue_metrics.json
Normal file
31
apiserver/elastic/index_templates/workers/queue_metrics.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"index_patterns": "queue_metrics_*",
|
||||
"template": {
|
||||
"settings": {
|
||||
"number_of_replicas": 0,
|
||||
"number_of_shards": 1
|
||||
},
|
||||
"mappings": {
|
||||
"_source": {
|
||||
"enabled": true
|
||||
},
|
||||
"properties": {
|
||||
"timestamp": {
|
||||
"type": "date"
|
||||
},
|
||||
"queue": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"average_waiting_time": {
|
||||
"type": "float"
|
||||
},
|
||||
"queue_length": {
|
||||
"type": "integer"
|
||||
},
|
||||
"company_id": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
79
apiserver/elastic/index_templates/workers/serving_stats.json
Normal file
79
apiserver/elastic/index_templates/workers/serving_stats.json
Normal file
@@ -0,0 +1,79 @@
|
||||
{
|
||||
"index_patterns": "serving_stats_*",
|
||||
"template": {
|
||||
"settings": {
|
||||
"number_of_replicas": 0,
|
||||
"number_of_shards": 1
|
||||
},
|
||||
"mappings": {
|
||||
"_source": {
|
||||
"enabled": true
|
||||
},
|
||||
"properties": {
|
||||
"timestamp": {
|
||||
"type": "date"
|
||||
},
|
||||
"container_id": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"company_id": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"endpoint_url": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"requests_num": {
|
||||
"type": "integer"
|
||||
},
|
||||
"requests_min": {
|
||||
"type": "float"
|
||||
},
|
||||
"uptime_sec": {
|
||||
"type": "integer"
|
||||
},
|
||||
"latency_ms": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cpu_usage": {
|
||||
"type": "float"
|
||||
},
|
||||
"cpu_num": {
|
||||
"type": "integer"
|
||||
},
|
||||
"gpu_usage": {
|
||||
"type": "float"
|
||||
},
|
||||
"gpu_num": {
|
||||
"type": "integer"
|
||||
},
|
||||
"memory_used": {
|
||||
"type": "float"
|
||||
},
|
||||
"memory_free": {
|
||||
"type": "float"
|
||||
},
|
||||
"memory_total": {
|
||||
"type": "float"
|
||||
},
|
||||
"gpu_memory_used": {
|
||||
"type": "float"
|
||||
},
|
||||
"gpu_memory_free": {
|
||||
"type": "float"
|
||||
},
|
||||
"gpu_memory_total": {
|
||||
"type": "float"
|
||||
},
|
||||
"disk_free_home": {
|
||||
"type": "float"
|
||||
},
|
||||
"network_rx": {
|
||||
"type": "float"
|
||||
},
|
||||
"network_tx": {
|
||||
"type": "float"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
43
apiserver/elastic/index_templates/workers/worker_stats.json
Normal file
43
apiserver/elastic/index_templates/workers/worker_stats.json
Normal file
@@ -0,0 +1,43 @@
|
||||
{
|
||||
"index_patterns": "worker_stats_*",
|
||||
"template": {
|
||||
"settings": {
|
||||
"number_of_replicas": 0,
|
||||
"number_of_shards": 1
|
||||
},
|
||||
"mappings": {
|
||||
"_source": {
|
||||
"enabled": true
|
||||
},
|
||||
"properties": {
|
||||
"timestamp": {
|
||||
"type": "date"
|
||||
},
|
||||
"worker": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"category": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"metric": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"variant": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"value": {
|
||||
"type": "float"
|
||||
},
|
||||
"unit": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"task": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"company_id": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user