From 8b0afd47a6d0b2d09ef52f3612419a54e66fdcab Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Tue, 5 Jan 2021 18:02:01 +0200 Subject: [PATCH] Set configurable and consistent limits on variants and metrics across different iterators --- apiserver/bll/event/debug_images_iterator.py | 12 ++++--- apiserver/bll/event/debug_sample_history.py | 4 ++- apiserver/bll/event/event_bll.py | 16 +++++---- apiserver/bll/event/event_metrics.py | 36 +++++++++++++------ apiserver/config/default/services/events.conf | 13 +++++-- 5 files changed, 57 insertions(+), 24 deletions(-) diff --git a/apiserver/bll/event/debug_images_iterator.py b/apiserver/bll/event/debug_images_iterator.py index 032d04f..9b14ce2 100644 --- a/apiserver/bll/event/debug_images_iterator.py +++ b/apiserver/bll/event/debug_images_iterator.py @@ -46,6 +46,7 @@ class MetricScrollState(Base): class DebugImageEventsScrollState(Base, JsonSerializableMixin): id: str = StringField(required=True) metrics: Sequence[MetricScrollState] = ListField([MetricScrollState]) + warning: str = StringField() @attr.s(auto_attribs=True) @@ -65,7 +66,7 @@ class DebugImagesIterator: @property def _max_workers(self): - return config.get("services.events.max_metrics_concurrency", 4) + return config.get("services.events.events_retrieval.max_metrics_concurrency", 4) def __init__(self, redis: StrictRedis, es: Elasticsearch): self.es = es @@ -219,14 +220,16 @@ class DebugImagesIterator: "metrics": { "terms": { "field": "metric", - "size": EventMetrics.MAX_METRICS_COUNT, + "size": EventMetrics.max_metrics_count, + "order": {"_key": "asc"}, }, "aggs": { "last_event_timestamp": {"max": {"field": "timestamp"}}, "variants": { "terms": { "field": "variant", - "size": EventMetrics.MAX_VARIANTS_COUNT, + "size": EventMetrics.max_variants_count, + "order": {"_key": "asc"}, }, "aggs": { "urls": { @@ -379,7 +382,8 @@ class DebugImagesIterator: "variants": { "terms": { "field": "variant", - "size": EventMetrics.MAX_VARIANTS_COUNT, + "size": EventMetrics.max_variants_count, + "order": {"_key": "asc"}, }, "aggs": { "events": { diff --git a/apiserver/bll/event/debug_sample_history.py b/apiserver/bll/event/debug_sample_history.py index d27fa1f..d298d9d 100644 --- a/apiserver/bll/event/debug_sample_history.py +++ b/apiserver/bll/event/debug_sample_history.py @@ -33,6 +33,7 @@ class DebugSampleHistoryState(Base, JsonSerializableMixin): reached_first: bool = BoolField() reached_last: bool = BoolField() variant_states: Sequence[VariantState] = ListField([VariantState]) + warning: str = StringField() @attr.s(auto_attribs=True) @@ -317,7 +318,8 @@ class DebugSampleHistory: # all variants that sent debug images "terms": { "field": "variant", - "size": EventMetrics.MAX_VARIANTS_COUNT, + "size": EventMetrics.max_variants_count, + "order": {"_key": "asc"}, }, "aggs": { "last_iter": {"max": {"field": "iter"}}, diff --git a/apiserver/bll/event/event_bll.py b/apiserver/bll/event/event_bll.py index 45b890a..8642a62 100644 --- a/apiserver/bll/event/event_bll.py +++ b/apiserver/bll/event/event_bll.py @@ -435,13 +435,15 @@ class EventBLL(object): "metrics": { "terms": { "field": "metric", - "size": EventMetrics.MAX_METRICS_COUNT, + "size": EventMetrics.max_metrics_count, + "order": {"_key": "asc"}, }, "aggs": { "variants": { "terms": { "field": "variant", - "size": EventMetrics.MAX_VARIANTS_COUNT, + "size": EventMetrics.max_variants_count, + "order": {"_key": "asc"}, }, "aggs": { "iters": { @@ -661,13 +663,15 @@ class EventBLL(object): "metrics": { "terms": { "field": "metric", - "size": EventMetrics.MAX_METRICS_COUNT, + "size": EventMetrics.max_metrics_count, + "order": {"_key": "asc"}, }, "aggs": { "variants": { "terms": { "field": "variant", - "size": EventMetrics.MAX_VARIANTS_COUNT, + "size": EventMetrics.max_variants_count, + "order": {"_key": "asc"}, } } }, @@ -710,14 +714,14 @@ class EventBLL(object): "metrics": { "terms": { "field": "metric", - "size": EventMetrics.MAX_METRICS_COUNT, + "size": EventMetrics.max_metrics_count, "order": {"_key": "asc"}, }, "aggs": { "variants": { "terms": { "field": "variant", - "size": EventMetrics.MAX_VARIANTS_COUNT, + "size": EventMetrics.max_variants_count, "order": {"_key": "asc"}, }, "aggs": { diff --git a/apiserver/bll/event/event_metrics.py b/apiserver/bll/event/event_metrics.py index e2f743a..7011eb5 100644 --- a/apiserver/bll/event/event_metrics.py +++ b/apiserver/bll/event/event_metrics.py @@ -7,6 +7,7 @@ from functools import partial from operator import itemgetter from typing import Sequence, Tuple +from boltons.typeutils import classproperty from elasticsearch import Elasticsearch from mongoengine import Q @@ -30,17 +31,23 @@ class EventType(Enum): class EventMetrics: - MAX_METRICS_COUNT = 100 - MAX_VARIANTS_COUNT = 100 MAX_AGGS_ELEMENTS_COUNT = 50 MAX_SAMPLE_BUCKETS = 6000 def __init__(self, es: Elasticsearch): self.es = es + @classproperty + def max_metrics_count(self): + return config.get("services.events.events_retrieval.max_metrics_count", 100) + + @classproperty + def max_variants_count(self): + return config.get("services.events.events_retrieval.max_variants_count", 100) + @property def _max_concurrency(self): - return config.get("services.events.max_metrics_concurrency", 4) + return config.get("services.events.events_retrieval.max_metrics_concurrency", 4) @staticmethod def get_index_name(company_id, event_type): @@ -207,12 +214,17 @@ class EventMetrics: "query": {"term": {"task": task_id}}, "aggs": { "metrics": { - "terms": {"field": "metric", "size": self.MAX_METRICS_COUNT}, + "terms": { + "field": "metric", + "size": self.max_metrics_count, + "order": {"_key": "asc"}, + }, "aggs": { "variants": { "terms": { "field": "variant", - "size": self.MAX_VARIANTS_COUNT, + "size": self.max_variants_count, + "order": {"_key": "asc"}, }, "aggs": { "count": {"value_count": {"field": field}}, @@ -281,15 +293,15 @@ class EventMetrics: "metrics": { "terms": { "field": "metric", - "size": self.MAX_METRICS_COUNT, - "order": {"_key": "desc"}, + "size": self.max_metrics_count, + "order": {"_key": "asc"}, }, "aggs": { "variants": { "terms": { "field": "variant", - "size": self.MAX_VARIANTS_COUNT, - "order": {"_key": "desc"}, + "size": self.max_variants_count, + "order": {"_key": "asc"}, }, "aggs": aggregation, } @@ -396,7 +408,11 @@ class EventMetrics: }, "aggs": { "metrics": { - "terms": {"field": "metric", "size": self.MAX_METRICS_COUNT} + "terms": { + "field": "metric", + "size": self.max_metrics_count, + "order": {"_key": "asc"}, + } } }, } diff --git a/apiserver/config/default/services/events.conf b/apiserver/config/default/services/events.conf index 953e964..5b52187 100644 --- a/apiserver/config/default/services/events.conf +++ b/apiserver/config/default/services/events.conf @@ -4,12 +4,19 @@ ignore_iteration { metrics: [":monitor:machine", ":monitor:gpu"] } -# max number of concurrent queries to ES when calculating events metrics -# should not exceed the amount of concurrent connections set in the ES driver -max_metrics_concurrency: 4 events_retrieval { state_expiration_sec: 3600 + + # max number of concurrent queries to ES when calculating events metrics + # should not exceed the amount of concurrent connections set in the ES driver + max_metrics_concurrency: 4 + + # the max amount of metrics to aggregate on + max_metrics_count: 100 + + # the max amount of variants to aggregate on + max_variants_count: 100 } # if set then plot str will be checked for the valid json on plot add