mirror of
https://github.com/clearml/clearml-server
synced 2025-05-29 09:28:48 +00:00
Add support for Dataset projects
This commit is contained in:
parent
ee87778548
commit
3a60f00d93
@ -18,3 +18,4 @@ class EntitiesCountRequest(models.Base):
|
||||
tasks = DictField()
|
||||
models = DictField()
|
||||
pipelines = DictField()
|
||||
datasets = DictField()
|
||||
|
@ -57,6 +57,7 @@ class ProjectModelMetadataValuesRequest(MultiProjectRequest):
|
||||
|
||||
|
||||
class ProjectsGetRequest(models.Base):
|
||||
include_dataset_stats = fields.BoolField(default=False)
|
||||
include_stats = fields.BoolField(default=False)
|
||||
include_stats_filter = DictField()
|
||||
stats_with_children = fields.BoolField(default=True)
|
||||
|
@ -516,6 +516,50 @@ class ProjectBLL:
|
||||
aggregated[pid] = reduce(func, relevant_data)
|
||||
return aggregated
|
||||
|
||||
@classmethod
|
||||
def get_dataset_stats(
|
||||
cls,
|
||||
company: str,
|
||||
project_ids: Sequence[str],
|
||||
users: Sequence[str] = None,
|
||||
) -> Dict[str, dict]:
|
||||
if not project_ids:
|
||||
return {}
|
||||
|
||||
task_runtime_pipeline = [
|
||||
{
|
||||
"$match": {
|
||||
**cls.get_match_conditions(
|
||||
company=company,
|
||||
project_ids=project_ids,
|
||||
users=users,
|
||||
filter_={"system_tags": [f"-{EntityVisibility.archived.value}"]}
|
||||
),
|
||||
"runtime": {"$exists": True, "$gt": {}},
|
||||
}
|
||||
},
|
||||
{
|
||||
"$project": {"project": 1, "runtime": 1, "last_update": 1}
|
||||
},
|
||||
{
|
||||
"$sort": {"project": 1, "last_update": 1}
|
||||
},
|
||||
{
|
||||
"$group": {
|
||||
"_id": "$project",
|
||||
"runtime": {"$last": "$runtime"},
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
return {
|
||||
r["_id"]: {
|
||||
"file_count": r["runtime"].get("ds_file_count", 0),
|
||||
"total_size": r["runtime"].get("ds_total_size", 0),
|
||||
}
|
||||
for r in Task.aggregate(task_runtime_pipeline)
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_project_stats(
|
||||
cls,
|
||||
|
@ -129,6 +129,11 @@ get_entities_count {
|
||||
additionalProperties: true
|
||||
description: Search criteria for pipelines
|
||||
}
|
||||
datasets {
|
||||
type: object
|
||||
additionalProperties: true
|
||||
description: Search criteria for datasets
|
||||
}
|
||||
}
|
||||
}
|
||||
response {
|
||||
@ -150,6 +155,10 @@ get_entities_count {
|
||||
type: integer
|
||||
description: The number of pipelines matching the criteria
|
||||
}
|
||||
datasets {
|
||||
type: integer
|
||||
description: The number of datasets matching the criteria
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -230,6 +230,20 @@ _definitions {
|
||||
description: "The amount of models under this project (without children projects). Returned if 'check_own_contents' flag is set in the request"
|
||||
type: integer
|
||||
}
|
||||
dataset_stats {
|
||||
description: Project dataset statistics
|
||||
type: object
|
||||
properties {
|
||||
file_count {
|
||||
type: integer
|
||||
description: The number of files stored in the dataset
|
||||
}
|
||||
total_size {
|
||||
type: integer
|
||||
description: The total dataset size in bytes
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
metric_variant_result {
|
||||
@ -595,6 +609,13 @@ get_all_ex {
|
||||
additionalProperties: true
|
||||
}
|
||||
}
|
||||
"999.0": ${get_all_ex."2.17"} {
|
||||
request.properties.include_dataset_stats {
|
||||
description: "If true, include project dataset statistic in response"
|
||||
type: boolean
|
||||
default: false
|
||||
}
|
||||
}
|
||||
}
|
||||
update {
|
||||
"2.1" {
|
||||
|
@ -56,6 +56,7 @@ def get_entities_count(call: APICall, company, _):
|
||||
"tasks": Task,
|
||||
"models": Model,
|
||||
"pipelines": Project,
|
||||
"datasets": Project,
|
||||
}
|
||||
ret = {}
|
||||
for field, entity_cls in entity_classes.items():
|
||||
|
@ -146,25 +146,32 @@ def get_all_ex(call: APICall, company_id: str, request: ProjectsGetRequest):
|
||||
project.update(**contents.get(project["id"], {}))
|
||||
|
||||
conform_output_tags(call, projects)
|
||||
if not request.include_stats:
|
||||
call.result.data = {"projects": projects, **ret_params}
|
||||
return
|
||||
if request.include_stats:
|
||||
project_ids = {project["id"] for project in projects}
|
||||
stats, children = project_bll.get_project_stats(
|
||||
company=company_id,
|
||||
project_ids=list(project_ids),
|
||||
specific_state=request.stats_for_state,
|
||||
include_children=request.stats_with_children,
|
||||
search_hidden=request.search_hidden,
|
||||
filter_=request.include_stats_filter,
|
||||
users=request.active_users,
|
||||
user_active_project_ids=user_active_project_ids,
|
||||
)
|
||||
|
||||
project_ids = {project["id"] for project in projects}
|
||||
stats, children = project_bll.get_project_stats(
|
||||
company=company_id,
|
||||
project_ids=list(project_ids),
|
||||
specific_state=request.stats_for_state,
|
||||
include_children=request.stats_with_children,
|
||||
search_hidden=request.search_hidden,
|
||||
filter_=request.include_stats_filter,
|
||||
users=request.active_users,
|
||||
user_active_project_ids=user_active_project_ids,
|
||||
)
|
||||
for project in projects:
|
||||
project["stats"] = stats[project["id"]]
|
||||
project["sub_projects"] = children[project["id"]]
|
||||
|
||||
for project in projects:
|
||||
project["stats"] = stats[project["id"]]
|
||||
project["sub_projects"] = children[project["id"]]
|
||||
if request.include_dataset_stats:
|
||||
project_ids = {project["id"] for project in projects}
|
||||
dataset_stats = project_bll.get_dataset_stats(
|
||||
company=company_id,
|
||||
project_ids=list(project_ids),
|
||||
users=request.active_users,
|
||||
)
|
||||
for project in projects:
|
||||
project["dataset_stats"] = dataset_stats.get(project["id"])
|
||||
|
||||
call.result.data = {"projects": projects, **ret_params}
|
||||
|
||||
|
@ -12,6 +12,19 @@ from apiserver.tests.automated import TestService
|
||||
|
||||
|
||||
class TestSubProjects(TestService):
|
||||
def test_dataset_stats(self):
|
||||
project = self._temp_project(name="Dataset test", system_tags=["dataset"])
|
||||
res = self.api.organization.get_entities_count(datasets={"system_tags": ["dataset"]})
|
||||
self.assertEqual(res.datasets, 1)
|
||||
|
||||
task = self._temp_task(project=project)
|
||||
data = self.api.projects.get_all_ex(id=[project], include_dataset_stats=True).projects[0]
|
||||
self.assertIsNone(data.dataset_stats)
|
||||
|
||||
self.api.tasks.edit(task=task, runtime={"ds_file_count": 2, "ds_total_size": 1000})
|
||||
data = self.api.projects.get_all_ex(id=[project], include_dataset_stats=True).projects[0]
|
||||
self.assertEqual(data.dataset_stats, {"file_count": 2, "total_size": 1000})
|
||||
|
||||
def test_project_aggregations(self):
|
||||
"""This test requires user with user_auth_only... credentials in db"""
|
||||
user2_client = APIClient(
|
||||
|
Loading…
Reference in New Issue
Block a user