Add support for Dataset projects

This commit is contained in:
allegroai 2022-07-08 17:45:03 +03:00
parent ee87778548
commit 3a60f00d93
8 changed files with 114 additions and 17 deletions

View File

@ -18,3 +18,4 @@ class EntitiesCountRequest(models.Base):
tasks = DictField()
models = DictField()
pipelines = DictField()
datasets = DictField()

View File

@ -57,6 +57,7 @@ class ProjectModelMetadataValuesRequest(MultiProjectRequest):
class ProjectsGetRequest(models.Base):
include_dataset_stats = fields.BoolField(default=False)
include_stats = fields.BoolField(default=False)
include_stats_filter = DictField()
stats_with_children = fields.BoolField(default=True)

View File

@ -516,6 +516,50 @@ class ProjectBLL:
aggregated[pid] = reduce(func, relevant_data)
return aggregated
@classmethod
def get_dataset_stats(
cls,
company: str,
project_ids: Sequence[str],
users: Sequence[str] = None,
) -> Dict[str, dict]:
if not project_ids:
return {}
task_runtime_pipeline = [
{
"$match": {
**cls.get_match_conditions(
company=company,
project_ids=project_ids,
users=users,
filter_={"system_tags": [f"-{EntityVisibility.archived.value}"]}
),
"runtime": {"$exists": True, "$gt": {}},
}
},
{
"$project": {"project": 1, "runtime": 1, "last_update": 1}
},
{
"$sort": {"project": 1, "last_update": 1}
},
{
"$group": {
"_id": "$project",
"runtime": {"$last": "$runtime"},
}
},
]
return {
r["_id"]: {
"file_count": r["runtime"].get("ds_file_count", 0),
"total_size": r["runtime"].get("ds_total_size", 0),
}
for r in Task.aggregate(task_runtime_pipeline)
}
@classmethod
def get_project_stats(
cls,

View File

@ -129,6 +129,11 @@ get_entities_count {
additionalProperties: true
description: Search criteria for pipelines
}
datasets {
type: object
additionalProperties: true
description: Search criteria for datasets
}
}
}
response {
@ -150,6 +155,10 @@ get_entities_count {
type: integer
description: The number of pipelines matching the criteria
}
datasets {
type: integer
description: The number of datasets matching the criteria
}
}
}
}

View File

@ -230,6 +230,20 @@ _definitions {
description: "The amount of models under this project (without children projects). Returned if 'check_own_contents' flag is set in the request"
type: integer
}
dataset_stats {
description: Project dataset statistics
type: object
properties {
file_count {
type: integer
description: The number of files stored in the dataset
}
total_size {
type: integer
description: The total dataset size in bytes
}
}
}
}
}
metric_variant_result {
@ -595,6 +609,13 @@ get_all_ex {
additionalProperties: true
}
}
"999.0": ${get_all_ex."2.17"} {
request.properties.include_dataset_stats {
description: "If true, include project dataset statistic in response"
type: boolean
default: false
}
}
}
update {
"2.1" {

View File

@ -56,6 +56,7 @@ def get_entities_count(call: APICall, company, _):
"tasks": Task,
"models": Model,
"pipelines": Project,
"datasets": Project,
}
ret = {}
for field, entity_cls in entity_classes.items():

View File

@ -146,25 +146,32 @@ def get_all_ex(call: APICall, company_id: str, request: ProjectsGetRequest):
project.update(**contents.get(project["id"], {}))
conform_output_tags(call, projects)
if not request.include_stats:
call.result.data = {"projects": projects, **ret_params}
return
if request.include_stats:
project_ids = {project["id"] for project in projects}
stats, children = project_bll.get_project_stats(
company=company_id,
project_ids=list(project_ids),
specific_state=request.stats_for_state,
include_children=request.stats_with_children,
search_hidden=request.search_hidden,
filter_=request.include_stats_filter,
users=request.active_users,
user_active_project_ids=user_active_project_ids,
)
project_ids = {project["id"] for project in projects}
stats, children = project_bll.get_project_stats(
company=company_id,
project_ids=list(project_ids),
specific_state=request.stats_for_state,
include_children=request.stats_with_children,
search_hidden=request.search_hidden,
filter_=request.include_stats_filter,
users=request.active_users,
user_active_project_ids=user_active_project_ids,
)
for project in projects:
project["stats"] = stats[project["id"]]
project["sub_projects"] = children[project["id"]]
for project in projects:
project["stats"] = stats[project["id"]]
project["sub_projects"] = children[project["id"]]
if request.include_dataset_stats:
project_ids = {project["id"] for project in projects}
dataset_stats = project_bll.get_dataset_stats(
company=company_id,
project_ids=list(project_ids),
users=request.active_users,
)
for project in projects:
project["dataset_stats"] = dataset_stats.get(project["id"])
call.result.data = {"projects": projects, **ret_params}

View File

@ -12,6 +12,19 @@ from apiserver.tests.automated import TestService
class TestSubProjects(TestService):
def test_dataset_stats(self):
project = self._temp_project(name="Dataset test", system_tags=["dataset"])
res = self.api.organization.get_entities_count(datasets={"system_tags": ["dataset"]})
self.assertEqual(res.datasets, 1)
task = self._temp_task(project=project)
data = self.api.projects.get_all_ex(id=[project], include_dataset_stats=True).projects[0]
self.assertIsNone(data.dataset_stats)
self.api.tasks.edit(task=task, runtime={"ds_file_count": 2, "ds_total_size": 1000})
data = self.api.projects.get_all_ex(id=[project], include_dataset_stats=True).projects[0]
self.assertEqual(data.dataset_stats, {"file_count": 2, "total_size": 1000})
def test_project_aggregations(self):
"""This test requires user with user_auth_only... credentials in db"""
user2_client = APIClient(