Optimize MongoDB indices usage for large dbs

This commit is contained in:
clearml 2024-12-05 22:17:13 +02:00
parent a1956cdd83
commit ee9f45ea61
11 changed files with 47 additions and 19 deletions

View File

@ -186,7 +186,7 @@ class ModelBLL:
[
{
"$match": {
"company": {"$in": [None, "", company]},
"company": {"$in": ["", company]},
"_id": {"$in": model_ids},
}
},

View File

@ -43,8 +43,8 @@ class _TagsCache:
query &= GetMixin.get_list_field_query(name, vals)
if project:
query &= Q(project__in=project_ids_with_children([project]))
else:
query &= Q(system_tags__nin=[EntityVisibility.hidden.value])
# else:
# query &= Q(system_tags__nin=[EntityVisibility.hidden.value])
return self.db_cls.objects(query).distinct(field)

View File

@ -1015,8 +1015,8 @@ class ProjectBLL:
if include_subprojects:
projects = _ids_with_children(projects)
query &= Q(project__in=projects)
else:
query &= Q(system_tags__nin=[EntityVisibility.hidden.value])
# else:
# query &= Q(system_tags__nin=[EntityVisibility.hidden.value])
if state == EntityVisibility.archived:
query &= Q(system_tags__in=[EntityVisibility.archived.value])
@ -1101,7 +1101,7 @@ class ProjectBLL:
project_field: str = "project",
):
conditions = {
"company": {"$in": [None, "", company]},
"company": {"$in": ["", company]},
project_field: {"$in": project_ids},
}
if users:

View File

@ -47,7 +47,7 @@ class ProjectQueries:
@staticmethod
def _get_company_constraint(company_id: str, allow_public: bool = True) -> dict:
if allow_public:
return {"company": {"$in": [None, "", company_id]}}
return {"company": {"$in": ["", company_id]}}
return {"company": company_id}

View File

@ -525,7 +525,7 @@ class QueueBLL(object):
[
{
"$match": {
"company": {"$in": [None, "", company]},
"company": {"$in": ["", company]},
"_id": queue_id,
}
},

View File

@ -193,7 +193,7 @@ class HyperParams:
pipeline = [
{
"$match": {
"company": {"$in": [None, "", company_id]},
"company": {"$in": ["", company_id]},
"_id": {"$in": task_ids},
}
},

View File

@ -1394,7 +1394,7 @@ class DbModelMixin(GetMixin, ProperDictMixin, UpdateMixin):
else:
items = list(
cls.objects(
id__in=ids, company__in=(None, ""), company_origin=company_id
id__in=ids, company="", company_origin=company_id
).only("id")
)
update: dict = dict(set__company=company_id, unset__company_origin=1)

View File

@ -37,10 +37,18 @@ class Model(AttributedDocument):
"project",
"task",
"last_update",
("company", "framework"),
("company", "last_update"),
("company", "name"),
("company", "user"),
("company", "uri"),
# distinct queries support
("company", "tags"),
("company", "system_tags"),
("company", "project", "tags"),
("company", "project", "system_tags"),
("company", "user"),
("company", "project", "user"),
("company", "framework"),
("company", "project", "framework"),
{
"name": "%s.model.main_text_index" % Database.backend,
"fields": ["$name", "$id", "$comment", "$parent", "$task", "$project"],

View File

@ -183,9 +183,8 @@ class Task(AttributedDocument):
"status_changed",
"models.input.model",
("company", "name"),
("company", "user"),
("company", "status", "type"),
("company", "system_tags", "last_update"),
("company", "last_update", "system_tags"),
("company", "type", "system_tags", "status"),
("company", "project", "type", "system_tags", "status"),
("status", "last_update"), # for maintenance tasks
@ -193,6 +192,17 @@ class Task(AttributedDocument):
"fields": ["company", "project"],
"collation": AttributedDocument._numeric_locale,
},
# distinct queries support
("company", "tags"),
("company", "system_tags"),
("company", "project", "tags"),
("company", "project", "system_tags"),
("company", "user"),
("company", "project", "user"),
("company", "parent"),
("company", "project", "parent"),
("company", "type"),
("company", "project", "type"),
{
"name": "%s.task.main_text_index" % Database.backend,
"fields": [

View File

@ -121,8 +121,8 @@ def init_cls_from_base(cls, instance):
)
def get_company_or_none_constraint(company=None):
return Q(company__in=(company, None, "")) | Q(company__exists=False)
def get_company_or_none_constraint(company=""):
return Q(company__in=list({company, ""}))
def field_does_not_exist(field: str, empty_value=None, is_list=False) -> Q:

View File

@ -188,7 +188,12 @@ def get_all(call: APICall, company_id, _):
def get_frameworks(call: APICall, company_id, request: GetFrameworksRequest):
call.result.data = {
"frameworks": sorted(
project_bll.get_model_frameworks(company_id, project_ids=request.projects)
filter(
None,
project_bll.get_model_frameworks(
company_id, project_ids=request.projects
),
)
)
}
@ -590,7 +595,10 @@ def _delete_model_events(
)
event_urls = delete_task_events_and_collect_urls(
company=company_id, task_ids=model_ids, model=True, wait_for_delete=sync_delete
company=company_id,
task_ids=model_ids,
model=True,
wait_for_delete=sync_delete,
)
if event_urls:
schedule_for_delete(
@ -601,7 +609,9 @@ def _delete_model_events(
can_delete_folders=False,
)
event_bll.delete_task_events(company_id, model_ids, model=True, wait_for_delete=sync_delete)
event_bll.delete_task_events(
company_id, model_ids, model=True, wait_for_delete=sync_delete
)
@endpoint("models.delete", request_data_model=DeleteModelRequest)