mirror of
https://github.com/clearml/clearml-server
synced 2025-03-12 06:51:37 +00:00
Project delete and validate_delete now analyses and presents info for datasets and pipelines
This commit is contained in:
parent
5cd59ea6e3
commit
5c80336aa9
@ -76,12 +76,14 @@
|
|||||||
402: ["project_has_tasks", "project has associated tasks"]
|
402: ["project_has_tasks", "project has associated tasks"]
|
||||||
403: ["project_not_found", "project not found"]
|
403: ["project_not_found", "project not found"]
|
||||||
405: ["project_has_models", "project has associated models"]
|
405: ["project_has_models", "project has associated models"]
|
||||||
|
406: ["project_has_datasets", "project has associated non-empty datasets"]
|
||||||
407: ["invalid_project_name", "invalid project name"]
|
407: ["invalid_project_name", "invalid project name"]
|
||||||
408: ["cannot_update_project_location", "Cannot update project location. Use projects.move instead"]
|
408: ["cannot_update_project_location", "Cannot update project location. Use projects.move instead"]
|
||||||
409: ["project_path_exceeds_max", "Project path exceed the maximum allowed depth"]
|
409: ["project_path_exceeds_max", "Project path exceed the maximum allowed depth"]
|
||||||
410: ["project_source_and_destination_are_the_same", "Project has the same source and destination paths"]
|
410: ["project_source_and_destination_are_the_same", "Project has the same source and destination paths"]
|
||||||
411: ["project_cannot_be_moved_under_itself", "Project can not be moved under itself in the projects hierarchy"]
|
411: ["project_cannot_be_moved_under_itself", "Project can not be moved under itself in the projects hierarchy"]
|
||||||
412: ["project_cannot_be_merged_into_its_child", "Project can not be merged into its own child"]
|
412: ["project_cannot_be_merged_into_its_child", "Project can not be merged into its own child"]
|
||||||
|
413: ["project_has_pipelines", "project has associated pipelines with active controllers"]
|
||||||
|
|
||||||
# Queues
|
# Queues
|
||||||
701: ["invalid_queue_id", "invalid queue id"]
|
701: ["invalid_queue_id", "invalid queue id"]
|
||||||
|
@ -3,6 +3,7 @@ from datetime import datetime
|
|||||||
from typing import Tuple, Set, Sequence
|
from typing import Tuple, Set, Sequence
|
||||||
|
|
||||||
import attr
|
import attr
|
||||||
|
from mongoengine import Q
|
||||||
|
|
||||||
from apiserver.apierrors import errors
|
from apiserver.apierrors import errors
|
||||||
from apiserver.bll.event import EventBLL
|
from apiserver.bll.event import EventBLL
|
||||||
@ -17,7 +18,14 @@ from apiserver.database.model import EntityVisibility
|
|||||||
from apiserver.database.model.model import Model
|
from apiserver.database.model.model import Model
|
||||||
from apiserver.database.model.project import Project
|
from apiserver.database.model.project import Project
|
||||||
from apiserver.database.model.task.task import Task, ArtifactModes, TaskType, TaskStatus
|
from apiserver.database.model.task.task import Task, ArtifactModes, TaskType, TaskStatus
|
||||||
from .project_bll import ProjectBLL
|
from .project_bll import (
|
||||||
|
ProjectBLL,
|
||||||
|
pipeline_tag,
|
||||||
|
pipelines_project_name,
|
||||||
|
dataset_tag,
|
||||||
|
datasets_project_name,
|
||||||
|
reports_tag,
|
||||||
|
)
|
||||||
from .sub_projects import _ids_with_children
|
from .sub_projects import _ids_with_children
|
||||||
|
|
||||||
log = config.logger(__file__)
|
log = config.logger(__file__)
|
||||||
@ -34,30 +42,82 @@ class DeleteProjectResult:
|
|||||||
urls: TaskUrls = None
|
urls: TaskUrls = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_child_project_ids(
|
||||||
|
project_id: str,
|
||||||
|
) -> Tuple[Sequence[str], Sequence[str], Sequence[str]]:
|
||||||
|
project_ids = _ids_with_children([project_id])
|
||||||
|
pipeline_ids = list(
|
||||||
|
Project.objects(
|
||||||
|
id__in=project_ids,
|
||||||
|
system_tags__in=[pipeline_tag],
|
||||||
|
basename__ne=pipelines_project_name,
|
||||||
|
).scalar("id")
|
||||||
|
)
|
||||||
|
dataset_ids = list(
|
||||||
|
Project.objects(
|
||||||
|
id__in=project_ids,
|
||||||
|
system_tags__in=[dataset_tag],
|
||||||
|
basename__ne=datasets_project_name,
|
||||||
|
).scalar("id")
|
||||||
|
)
|
||||||
|
return project_ids, pipeline_ids, dataset_ids
|
||||||
|
|
||||||
|
|
||||||
def validate_project_delete(company: str, project_id: str):
|
def validate_project_delete(company: str, project_id: str):
|
||||||
project = Project.get_for_writing(
|
project = Project.get_for_writing(
|
||||||
company=company, id=project_id, _only=("id", "path", "system_tags")
|
company=company, id=project_id, _only=("id", "path", "system_tags")
|
||||||
)
|
)
|
||||||
if not project:
|
if not project:
|
||||||
raise errors.bad_request.InvalidProjectId(id=project_id)
|
raise errors.bad_request.InvalidProjectId(id=project_id)
|
||||||
is_pipeline = "pipeline" in (project.system_tags or [])
|
|
||||||
project_ids = _ids_with_children([project_id])
|
project_ids, pipeline_ids, dataset_ids = _get_child_project_ids(project_id)
|
||||||
ret = {}
|
ret = {}
|
||||||
for cls in ProjectBLL.child_classes:
|
if pipeline_ids:
|
||||||
ret[f"{cls.__name__.lower()}s"] = cls.objects(project__in=project_ids).count()
|
pipelines_with_active_controllers = Task.objects(
|
||||||
for cls in ProjectBLL.child_classes:
|
project__in=pipeline_ids,
|
||||||
query = dict(
|
type=TaskType.controller,
|
||||||
project__in=project_ids, system_tags__nin=[EntityVisibility.archived.value]
|
system_tags__nin=[EntityVisibility.archived.value],
|
||||||
)
|
).distinct("project")
|
||||||
name = f"non_archived_{cls.__name__.lower()}s"
|
ret["pipelines"] = len(pipelines_with_active_controllers)
|
||||||
if not is_pipeline:
|
else:
|
||||||
ret[name] = cls.objects(**query).count()
|
ret["pipelines"] = 0
|
||||||
else:
|
if dataset_ids:
|
||||||
ret[name] = (
|
datasets_with_data = Task.objects(
|
||||||
cls.objects(**query, type=TaskType.controller).count()
|
project__in=dataset_ids, system_tags__nin=[EntityVisibility.archived.value],
|
||||||
if cls == Task
|
).distinct("project")
|
||||||
else 0
|
ret["datasets"] = len(datasets_with_data)
|
||||||
|
else:
|
||||||
|
ret["datasets"] = 0
|
||||||
|
|
||||||
|
project_ids = list(set(project_ids) - set(pipeline_ids) - set(dataset_ids))
|
||||||
|
if project_ids:
|
||||||
|
in_project_query = Q(project__in=project_ids)
|
||||||
|
for cls in (Task, Model):
|
||||||
|
query = (
|
||||||
|
in_project_query & Q(system_tags__nin=[reports_tag])
|
||||||
|
if cls is Task
|
||||||
|
else in_project_query
|
||||||
)
|
)
|
||||||
|
ret[f"{cls.__name__.lower()}s"] = cls.objects(query).count()
|
||||||
|
ret[f"non_archived_{cls.__name__.lower()}s"] = cls.objects(
|
||||||
|
query & Q(system_tags__nin=[EntityVisibility.archived.value])
|
||||||
|
).count()
|
||||||
|
ret["reports"] = Task.objects(
|
||||||
|
in_project_query & Q(system_tags__in=[reports_tag])
|
||||||
|
).count()
|
||||||
|
ret["non_archived_reports"] = Task.objects(
|
||||||
|
in_project_query
|
||||||
|
& Q(
|
||||||
|
system_tags__in=[reports_tag],
|
||||||
|
system_tags__nin=[EntityVisibility.archived.value],
|
||||||
|
)
|
||||||
|
).count()
|
||||||
|
else:
|
||||||
|
for cls in (Task, Model):
|
||||||
|
ret[f"{cls.__name__.lower()}s"] = 0
|
||||||
|
ret[f"non_archived_{cls.__name__.lower()}s"] = 0
|
||||||
|
ret["reports"] = 0
|
||||||
|
ret["non_archived_reports"] = 0
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
@ -79,31 +139,49 @@ def delete_project(
|
|||||||
delete_external_artifacts = delete_external_artifacts and config.get(
|
delete_external_artifacts = delete_external_artifacts and config.get(
|
||||||
"services.async_urls_delete.enabled", True
|
"services.async_urls_delete.enabled", True
|
||||||
)
|
)
|
||||||
is_pipeline = "pipeline" in (project.system_tags or [])
|
project_ids, pipeline_ids, dataset_ids = _get_child_project_ids(project_id)
|
||||||
project_ids = _ids_with_children([project_id])
|
|
||||||
if not force:
|
if not force:
|
||||||
query = dict(
|
if pipeline_ids:
|
||||||
project__in=project_ids, system_tags__nin=[EntityVisibility.archived.value]
|
active_controllers = Task.objects(
|
||||||
)
|
project__in=pipeline_ids,
|
||||||
if not is_pipeline:
|
type=TaskType.controller,
|
||||||
|
system_tags__nin=[EntityVisibility.archived.value],
|
||||||
|
).only("id")
|
||||||
|
if active_controllers:
|
||||||
|
raise errors.bad_request.ProjectHasPipelines(
|
||||||
|
"please archive all the controllers or use force=true",
|
||||||
|
id=project_id,
|
||||||
|
)
|
||||||
|
if dataset_ids:
|
||||||
|
datasets_with_data = Task.objects(
|
||||||
|
project__in=dataset_ids,
|
||||||
|
system_tags__nin=[EntityVisibility.archived.value],
|
||||||
|
).only("id")
|
||||||
|
if datasets_with_data:
|
||||||
|
raise errors.bad_request.ProjectHasDatasets(
|
||||||
|
"please delete all the dataset versions or use force=true",
|
||||||
|
id=project_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
regular_projects = list(set(project_ids) - set(pipeline_ids) - set(dataset_ids))
|
||||||
|
if regular_projects:
|
||||||
for cls, error in (
|
for cls, error in (
|
||||||
(Task, errors.bad_request.ProjectHasTasks),
|
(Task, errors.bad_request.ProjectHasTasks),
|
||||||
(Model, errors.bad_request.ProjectHasModels),
|
(Model, errors.bad_request.ProjectHasModels),
|
||||||
):
|
):
|
||||||
non_archived = cls.objects(**query).only("id")
|
non_archived = cls.objects(
|
||||||
|
project__in=regular_projects,
|
||||||
|
system_tags__nin=[EntityVisibility.archived.value],
|
||||||
|
).only("id")
|
||||||
if non_archived:
|
if non_archived:
|
||||||
raise error("use force=true to delete", id=project_id)
|
raise error("use force=true", id=project_id)
|
||||||
else:
|
|
||||||
non_archived = Task.objects(**query, type=TaskType.controller).only("id")
|
|
||||||
if non_archived:
|
|
||||||
raise errors.bad_request.ProjectHasTasks(
|
|
||||||
"please archive all the runs inside the project", id=project_id
|
|
||||||
)
|
|
||||||
|
|
||||||
if not delete_contents:
|
if not delete_contents:
|
||||||
disassociated = defaultdict(int)
|
disassociated = defaultdict(int)
|
||||||
for cls in ProjectBLL.child_classes:
|
for cls in ProjectBLL.child_classes:
|
||||||
disassociated[cls] = cls.objects(project__in=project_ids).update(project=None)
|
disassociated[cls] = cls.objects(project__in=project_ids).update(
|
||||||
|
project=None
|
||||||
|
)
|
||||||
res = DeleteProjectResult(disassociated_tasks=disassociated[Task])
|
res = DeleteProjectResult(disassociated_tasks=disassociated[Task])
|
||||||
else:
|
else:
|
||||||
deleted_models, model_event_urls, model_urls = _delete_models(
|
deleted_models, model_event_urls, model_urls = _delete_models(
|
||||||
@ -209,19 +287,14 @@ def _delete_models(
|
|||||||
"status": TaskStatus.published,
|
"status": TaskStatus.published,
|
||||||
},
|
},
|
||||||
update={
|
update={
|
||||||
"$set": {
|
"$set": {"models.output.$[elem].model": deleted, "last_change": now,}
|
||||||
"models.output.$[elem].model": deleted,
|
|
||||||
"last_change": now,
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
array_filters=[{"elem.model": {"$in": model_ids}}],
|
array_filters=[{"elem.model": {"$in": model_ids}}],
|
||||||
upsert=False,
|
upsert=False,
|
||||||
)
|
)
|
||||||
# update unpublished tasks
|
# update unpublished tasks
|
||||||
Task.objects(
|
Task.objects(
|
||||||
id__in=model_tasks,
|
id__in=model_tasks, project__nin=projects, status__ne=TaskStatus.published,
|
||||||
project__nin=projects,
|
|
||||||
status__ne=TaskStatus.published,
|
|
||||||
).update(pull__models__output__model__in=model_ids, set__last_change=now)
|
).update(pull__models__output__model__in=model_ids, set__last_change=now)
|
||||||
|
|
||||||
event_urls, model_urls = set(), set()
|
event_urls, model_urls = set(), set()
|
||||||
|
@ -808,6 +808,26 @@ validate_delete {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
"999.0": ${validate_delete."2.14"} {
|
||||||
|
response.properties {
|
||||||
|
reports {
|
||||||
|
description: "The total number of reports under the project and all its children"
|
||||||
|
type: integer
|
||||||
|
}
|
||||||
|
non_archived_reports {
|
||||||
|
description: "The total number of non-archived reports under the project and all its children"
|
||||||
|
type: integer
|
||||||
|
}
|
||||||
|
pipelines {
|
||||||
|
description: "The total number of pipelines with active controllers under the project and all its children"
|
||||||
|
type: integer
|
||||||
|
}
|
||||||
|
datasets {
|
||||||
|
description: "The total number of non-empty datasets under the project and all its children"
|
||||||
|
type: integer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
delete {
|
delete {
|
||||||
"2.1" {
|
"2.1" {
|
||||||
|
@ -5,19 +5,16 @@ from apiserver.database.utils import id as db_id
|
|||||||
|
|
||||||
|
|
||||||
class TestProjectsDelete(TestService):
|
class TestProjectsDelete(TestService):
|
||||||
def setUp(self, version="2.14"):
|
def new_task(self, type="testing", **kwargs):
|
||||||
super().setUp(version=version)
|
|
||||||
|
|
||||||
def new_task(self, **kwargs):
|
|
||||||
return self.create_temp(
|
return self.create_temp(
|
||||||
"tasks", type="testing", name=db_id(), **kwargs
|
"tasks", type=type, name=db_id(), **kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
def new_model(self, **kwargs):
|
def new_model(self, **kwargs):
|
||||||
return self.create_temp("models", uri="file:///a/b", name=db_id(), labels={}, **kwargs)
|
return self.create_temp("models", uri="file:///a/b", name=db_id(), labels={}, **kwargs)
|
||||||
|
|
||||||
def new_project(self, **kwargs):
|
def new_project(self, name=None, **kwargs):
|
||||||
return self.create_temp("projects", name=db_id(), description="", **kwargs)
|
return self.create_temp("projects", name=name or db_id(), description="", **kwargs)
|
||||||
|
|
||||||
def test_delete_fails_with_active_task(self):
|
def test_delete_fails_with_active_task(self):
|
||||||
project = self.new_project()
|
project = self.new_project()
|
||||||
@ -52,3 +49,33 @@ class TestProjectsDelete(TestService):
|
|||||||
self.assertEqual(res.models, 1)
|
self.assertEqual(res.models, 1)
|
||||||
self.assertEqual(res.non_archived_models, 0)
|
self.assertEqual(res.non_archived_models, 0)
|
||||||
self.api.projects.delete(project=project)
|
self.api.projects.delete(project=project)
|
||||||
|
|
||||||
|
def test_delete_dataset(self):
|
||||||
|
name = "Test datasets delete"
|
||||||
|
project = self.new_project(name=name)
|
||||||
|
dataset = self.new_project(f"{name}/.datasets/test dataset", system_tags=["dataset"])
|
||||||
|
task = self.new_task(project=dataset, system_tags=["dataset"])
|
||||||
|
res = self.api.projects.validate_delete(project=project)
|
||||||
|
self.assertEqual(res.datasets, 1)
|
||||||
|
with self.api.raises(errors.bad_request.ProjectHasDatasets):
|
||||||
|
self.api.projects.delete(project=project)
|
||||||
|
|
||||||
|
self.api.tasks.delete(task=task)
|
||||||
|
res = self.api.projects.validate_delete(project=project)
|
||||||
|
self.assertEqual(res.datasets, 0)
|
||||||
|
self.api.projects.delete(project=project)
|
||||||
|
|
||||||
|
def test_delete_pipeline(self):
|
||||||
|
name = "Test pipelines delete"
|
||||||
|
project = self.new_project(name=name)
|
||||||
|
pipeline = self.new_project(f"{name}/.pipelines/test pipeline", system_tags=["pipeline"])
|
||||||
|
task = self.new_task(project=pipeline, type="controller", system_tags=["pipeline"])
|
||||||
|
res = self.api.projects.validate_delete(project=project)
|
||||||
|
self.assertEqual(res.pipelines, 1)
|
||||||
|
with self.api.raises(errors.bad_request.ProjectHasPipelines):
|
||||||
|
self.api.projects.delete(project=project)
|
||||||
|
|
||||||
|
self.api.tasks.edit(task=task, system_tags=[EntityVisibility.archived.value])
|
||||||
|
res = self.api.projects.validate_delete(project=project)
|
||||||
|
self.assertEqual(res.pipelines, 0)
|
||||||
|
self.api.projects.delete(project=project)
|
||||||
|
Loading…
Reference in New Issue
Block a user