Add MongoDB performance optimization

This commit is contained in:
allegroai 2020-04-01 19:20:53 +03:00
parent 427322a424
commit cbcaa7c789
5 changed files with 49 additions and 39 deletions

View File

@ -12,35 +12,32 @@ from database.model.user import User
class Model(DbModelMixin, Document): class Model(DbModelMixin, Document):
meta = { meta = {
'db_alias': Database.backend, "db_alias": Database.backend,
'strict': strict, "strict": strict,
'indexes': [ "indexes": [
"parent",
"project",
"task",
("company", "name"),
{ {
'name': '%s.model.main_text_index' % Database.backend, "name": "%s.model.main_text_index" % Database.backend,
'fields': [ "fields": ["$name", "$id", "$comment", "$parent", "$task", "$project"],
'$name', "default_language": "english",
'$id', "weights": {
'$comment', "name": 10,
'$parent', "id": 10,
'$task', "comment": 10,
'$project', "parent": 5,
], "task": 3,
'default_language': 'english', "project": 3,
'weights': { },
'name': 10, },
'id': 10,
'comment': 10,
'parent': 5,
'task': 3,
'project': 3,
}
}
], ],
} }
id = StringField(primary_key=True) id = StringField(primary_key=True)
name = StrippedStringField(user_set_allowed=True, min_length=3) name = StrippedStringField(user_set_allowed=True, min_length=3)
parent = StringField(reference_field='Model', required=False) parent = StringField(reference_field="Model", required=False)
user = StringField(required=True, reference_field=User) user = StringField(required=True, reference_field=User)
company = StringField(required=True, reference_field=Company) company = StringField(required=True, reference_field=Company)
project = StringField(reference_field=Project, user_set_allowed=True) project = StringField(reference_field=Project, user_set_allowed=True)
@ -49,9 +46,11 @@ class Model(DbModelMixin, Document):
comment = StringField(user_set_allowed=True) comment = StringField(user_set_allowed=True)
tags = ListField(StringField(required=True), user_set_allowed=True) tags = ListField(StringField(required=True), user_set_allowed=True)
system_tags = ListField(StringField(required=True), user_set_allowed=True) system_tags = ListField(StringField(required=True), user_set_allowed=True)
uri = StrippedStringField(default='', user_set_allowed=True) uri = StrippedStringField(default="", user_set_allowed=True)
framework = StringField() framework = StringField()
design = SafeDictField() design = SafeDictField()
labels = ModelLabels() labels = ModelLabels()
ready = BooleanField(required=True) ready = BooleanField(required=True)
ui_cache = SafeDictField(default=dict, user_set_allowed=True, exclude_by_default=True) ui_cache = SafeDictField(
default=dict, user_set_allowed=True, exclude_by_default=True
)

View File

@ -17,12 +17,13 @@ class Project(AttributedDocument):
"db_alias": Database.backend, "db_alias": Database.backend,
"strict": strict, "strict": strict,
"indexes": [ "indexes": [
("company", "name"),
{ {
"name": "%s.project.main_text_index" % Database.backend, "name": "%s.project.main_text_index" % Database.backend,
"fields": ["$name", "$id", "$description"], "fields": ["$name", "$id", "$description"],
"default_language": "english", "default_language": "english",
"weights": {"name": 10, "id": 10, "description": 10}, "weights": {"name": 10, "id": 10, "description": 10},
} },
], ],
} }

View File

@ -110,6 +110,12 @@ class Task(AttributedDocument):
"created", "created",
"started", "started",
"completed", "completed",
"parent",
"project",
("company", "name"),
("company", "type", "system_tags", "status"),
("company", "project", "type", "system_tags", "status"),
("status", "last_update"), # for maintenance tasks
{ {
"name": "%s.task.main_text_index" % Database.backend, "name": "%s.task.main_text_index" % Database.backend,
"fields": [ "fields": [

View File

@ -33,8 +33,7 @@ create_fields = {
} }
get_all_query_options = Project.QueryParameterOptions( get_all_query_options = Project.QueryParameterOptions(
pattern_fields=("name", "description"), pattern_fields=("name", "description"), list_fields=("tags", "system_tags", "id"),
list_fields=("tags", "system_tags", "id"),
) )
@ -58,7 +57,7 @@ def get_by_id(call):
call.result.data = {"project": project_dict} call.result.data = {"project": project_dict}
def make_projects_get_all_pipelines(project_ids, specific_state=None): def make_projects_get_all_pipelines(company_id, project_ids, specific_state=None):
archived = EntityVisibility.archived.value archived = EntityVisibility.archived.value
def ensure_valid_fields(): def ensure_valid_fields():
@ -74,15 +73,18 @@ def make_projects_get_all_pipelines(project_ids, specific_state=None):
"else": "$system_tags", "else": "$system_tags",
} }
}, },
"status": { "status": {"$ifNull": ["$status", "unknown"]},
"$ifNull": ["$status", "unknown"]
}
} }
} }
status_count_pipeline = [ status_count_pipeline = [
# count tasks per project per status # count tasks per project per status
{"$match": {"project": {"$in": project_ids}}}, {
"$match": {
"company": {"$in": [None, "", company_id]},
"project": {"$in": project_ids},
}
},
ensure_valid_fields(), ensure_valid_fields(),
{ {
"$group": { "$group": {
@ -153,7 +155,10 @@ def make_projects_get_all_pipelines(project_ids, specific_state=None):
{ {
"$match": { "$match": {
"type": {"$in": ["training", "testing", "annotation"]}, "type": {"$in": ["training", "testing", "annotation"]},
"project": {"$in": project_ids}, "project": {
"company": {"$in": [None, "", company_id]},
"$in": project_ids,
},
} }
}, },
ensure_valid_fields(), ensure_valid_fields(),
@ -195,7 +200,7 @@ def get_all_ex(call: APICall):
ids = [project["id"] for project in projects] ids = [project["id"] for project in projects]
status_count_pipeline, runtime_pipeline = make_projects_get_all_pipelines( status_count_pipeline, runtime_pipeline = make_projects_get_all_pipelines(
ids, specific_state=specific_state call.identity.company, ids, specific_state=specific_state
) )
default_counts = dict.fromkeys(get_options(TaskStatus), 0) default_counts = dict.fromkeys(get_options(TaskStatus), 0)
@ -205,7 +210,7 @@ def get_all_ex(call: APICall):
status_count = defaultdict(lambda: {}) status_count = defaultdict(lambda: {})
key = itemgetter(EntityVisibility.archived.value) key = itemgetter(EntityVisibility.archived.value)
for result in Task.aggregate(*status_count_pipeline): for result in Task.aggregate(status_count_pipeline):
for k, group in groupby(sorted(result["counts"], key=key), key): for k, group in groupby(sorted(result["counts"], key=key), key):
section = ( section = (
EntityVisibility.archived if k else EntityVisibility.active EntityVisibility.archived if k else EntityVisibility.active
@ -219,7 +224,7 @@ def get_all_ex(call: APICall):
runtime = { runtime = {
result["_id"]: {k: v for k, v in result.items() if k != "_id"} result["_id"]: {k: v for k, v in result.items() if k != "_id"}
for result in Task.aggregate(*runtime_pipeline) for result in Task.aggregate(runtime_pipeline)
} }
def safe_get(obj, path, default=None): def safe_get(obj, path, default=None):

View File

@ -750,8 +750,7 @@ class CleanupResult(object):
deleted_models = attr.ib(type=int) deleted_models = attr.ib(type=int)
def cleanup_task(task, force=False): def cleanup_task(task: Task, force: bool = False):
# type: (Task, bool) -> CleanupResult
""" """
Validate task deletion and delete/modify all its output. Validate task deletion and delete/modify all its output.
:param task: task object :param task: task object