mirror of
https://github.com/clearml/clearml-server
synced 2025-06-26 23:15:47 +00:00
Add support for Dataset projects
This commit is contained in:
parent
ee87778548
commit
3a60f00d93
@ -18,3 +18,4 @@ class EntitiesCountRequest(models.Base):
|
|||||||
tasks = DictField()
|
tasks = DictField()
|
||||||
models = DictField()
|
models = DictField()
|
||||||
pipelines = DictField()
|
pipelines = DictField()
|
||||||
|
datasets = DictField()
|
||||||
|
@ -57,6 +57,7 @@ class ProjectModelMetadataValuesRequest(MultiProjectRequest):
|
|||||||
|
|
||||||
|
|
||||||
class ProjectsGetRequest(models.Base):
|
class ProjectsGetRequest(models.Base):
|
||||||
|
include_dataset_stats = fields.BoolField(default=False)
|
||||||
include_stats = fields.BoolField(default=False)
|
include_stats = fields.BoolField(default=False)
|
||||||
include_stats_filter = DictField()
|
include_stats_filter = DictField()
|
||||||
stats_with_children = fields.BoolField(default=True)
|
stats_with_children = fields.BoolField(default=True)
|
||||||
|
@ -516,6 +516,50 @@ class ProjectBLL:
|
|||||||
aggregated[pid] = reduce(func, relevant_data)
|
aggregated[pid] = reduce(func, relevant_data)
|
||||||
return aggregated
|
return aggregated
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_dataset_stats(
|
||||||
|
cls,
|
||||||
|
company: str,
|
||||||
|
project_ids: Sequence[str],
|
||||||
|
users: Sequence[str] = None,
|
||||||
|
) -> Dict[str, dict]:
|
||||||
|
if not project_ids:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
task_runtime_pipeline = [
|
||||||
|
{
|
||||||
|
"$match": {
|
||||||
|
**cls.get_match_conditions(
|
||||||
|
company=company,
|
||||||
|
project_ids=project_ids,
|
||||||
|
users=users,
|
||||||
|
filter_={"system_tags": [f"-{EntityVisibility.archived.value}"]}
|
||||||
|
),
|
||||||
|
"runtime": {"$exists": True, "$gt": {}},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$project": {"project": 1, "runtime": 1, "last_update": 1}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$sort": {"project": 1, "last_update": 1}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$group": {
|
||||||
|
"_id": "$project",
|
||||||
|
"runtime": {"$last": "$runtime"},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
r["_id"]: {
|
||||||
|
"file_count": r["runtime"].get("ds_file_count", 0),
|
||||||
|
"total_size": r["runtime"].get("ds_total_size", 0),
|
||||||
|
}
|
||||||
|
for r in Task.aggregate(task_runtime_pipeline)
|
||||||
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_project_stats(
|
def get_project_stats(
|
||||||
cls,
|
cls,
|
||||||
|
@ -129,6 +129,11 @@ get_entities_count {
|
|||||||
additionalProperties: true
|
additionalProperties: true
|
||||||
description: Search criteria for pipelines
|
description: Search criteria for pipelines
|
||||||
}
|
}
|
||||||
|
datasets {
|
||||||
|
type: object
|
||||||
|
additionalProperties: true
|
||||||
|
description: Search criteria for datasets
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
response {
|
response {
|
||||||
@ -150,6 +155,10 @@ get_entities_count {
|
|||||||
type: integer
|
type: integer
|
||||||
description: The number of pipelines matching the criteria
|
description: The number of pipelines matching the criteria
|
||||||
}
|
}
|
||||||
|
datasets {
|
||||||
|
type: integer
|
||||||
|
description: The number of datasets matching the criteria
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -230,6 +230,20 @@ _definitions {
|
|||||||
description: "The amount of models under this project (without children projects). Returned if 'check_own_contents' flag is set in the request"
|
description: "The amount of models under this project (without children projects). Returned if 'check_own_contents' flag is set in the request"
|
||||||
type: integer
|
type: integer
|
||||||
}
|
}
|
||||||
|
dataset_stats {
|
||||||
|
description: Project dataset statistics
|
||||||
|
type: object
|
||||||
|
properties {
|
||||||
|
file_count {
|
||||||
|
type: integer
|
||||||
|
description: The number of files stored in the dataset
|
||||||
|
}
|
||||||
|
total_size {
|
||||||
|
type: integer
|
||||||
|
description: The total dataset size in bytes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
metric_variant_result {
|
metric_variant_result {
|
||||||
@ -595,6 +609,13 @@ get_all_ex {
|
|||||||
additionalProperties: true
|
additionalProperties: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
"999.0": ${get_all_ex."2.17"} {
|
||||||
|
request.properties.include_dataset_stats {
|
||||||
|
description: "If true, include project dataset statistic in response"
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
update {
|
update {
|
||||||
"2.1" {
|
"2.1" {
|
||||||
|
@ -56,6 +56,7 @@ def get_entities_count(call: APICall, company, _):
|
|||||||
"tasks": Task,
|
"tasks": Task,
|
||||||
"models": Model,
|
"models": Model,
|
||||||
"pipelines": Project,
|
"pipelines": Project,
|
||||||
|
"datasets": Project,
|
||||||
}
|
}
|
||||||
ret = {}
|
ret = {}
|
||||||
for field, entity_cls in entity_classes.items():
|
for field, entity_cls in entity_classes.items():
|
||||||
|
@ -146,25 +146,32 @@ def get_all_ex(call: APICall, company_id: str, request: ProjectsGetRequest):
|
|||||||
project.update(**contents.get(project["id"], {}))
|
project.update(**contents.get(project["id"], {}))
|
||||||
|
|
||||||
conform_output_tags(call, projects)
|
conform_output_tags(call, projects)
|
||||||
if not request.include_stats:
|
if request.include_stats:
|
||||||
call.result.data = {"projects": projects, **ret_params}
|
project_ids = {project["id"] for project in projects}
|
||||||
return
|
stats, children = project_bll.get_project_stats(
|
||||||
|
company=company_id,
|
||||||
|
project_ids=list(project_ids),
|
||||||
|
specific_state=request.stats_for_state,
|
||||||
|
include_children=request.stats_with_children,
|
||||||
|
search_hidden=request.search_hidden,
|
||||||
|
filter_=request.include_stats_filter,
|
||||||
|
users=request.active_users,
|
||||||
|
user_active_project_ids=user_active_project_ids,
|
||||||
|
)
|
||||||
|
|
||||||
project_ids = {project["id"] for project in projects}
|
for project in projects:
|
||||||
stats, children = project_bll.get_project_stats(
|
project["stats"] = stats[project["id"]]
|
||||||
company=company_id,
|
project["sub_projects"] = children[project["id"]]
|
||||||
project_ids=list(project_ids),
|
|
||||||
specific_state=request.stats_for_state,
|
|
||||||
include_children=request.stats_with_children,
|
|
||||||
search_hidden=request.search_hidden,
|
|
||||||
filter_=request.include_stats_filter,
|
|
||||||
users=request.active_users,
|
|
||||||
user_active_project_ids=user_active_project_ids,
|
|
||||||
)
|
|
||||||
|
|
||||||
for project in projects:
|
if request.include_dataset_stats:
|
||||||
project["stats"] = stats[project["id"]]
|
project_ids = {project["id"] for project in projects}
|
||||||
project["sub_projects"] = children[project["id"]]
|
dataset_stats = project_bll.get_dataset_stats(
|
||||||
|
company=company_id,
|
||||||
|
project_ids=list(project_ids),
|
||||||
|
users=request.active_users,
|
||||||
|
)
|
||||||
|
for project in projects:
|
||||||
|
project["dataset_stats"] = dataset_stats.get(project["id"])
|
||||||
|
|
||||||
call.result.data = {"projects": projects, **ret_params}
|
call.result.data = {"projects": projects, **ret_params}
|
||||||
|
|
||||||
|
@ -12,6 +12,19 @@ from apiserver.tests.automated import TestService
|
|||||||
|
|
||||||
|
|
||||||
class TestSubProjects(TestService):
|
class TestSubProjects(TestService):
|
||||||
|
def test_dataset_stats(self):
|
||||||
|
project = self._temp_project(name="Dataset test", system_tags=["dataset"])
|
||||||
|
res = self.api.organization.get_entities_count(datasets={"system_tags": ["dataset"]})
|
||||||
|
self.assertEqual(res.datasets, 1)
|
||||||
|
|
||||||
|
task = self._temp_task(project=project)
|
||||||
|
data = self.api.projects.get_all_ex(id=[project], include_dataset_stats=True).projects[0]
|
||||||
|
self.assertIsNone(data.dataset_stats)
|
||||||
|
|
||||||
|
self.api.tasks.edit(task=task, runtime={"ds_file_count": 2, "ds_total_size": 1000})
|
||||||
|
data = self.api.projects.get_all_ex(id=[project], include_dataset_stats=True).projects[0]
|
||||||
|
self.assertEqual(data.dataset_stats, {"file_count": 2, "total_size": 1000})
|
||||||
|
|
||||||
def test_project_aggregations(self):
|
def test_project_aggregations(self):
|
||||||
"""This test requires user with user_auth_only... credentials in db"""
|
"""This test requires user with user_auth_only... credentials in db"""
|
||||||
user2_client = APIClient(
|
user2_client = APIClient(
|
||||||
|
Loading…
Reference in New Issue
Block a user