Project delete and validate_delete now analyses and presents info for datasets and pipelines

This commit is contained in:
allegroai 2023-07-26 18:36:45 +03:00
parent 5cd59ea6e3
commit 5c80336aa9
4 changed files with 168 additions and 46 deletions

View File

@ -76,12 +76,14 @@
402: ["project_has_tasks", "project has associated tasks"] 402: ["project_has_tasks", "project has associated tasks"]
403: ["project_not_found", "project not found"] 403: ["project_not_found", "project not found"]
405: ["project_has_models", "project has associated models"] 405: ["project_has_models", "project has associated models"]
406: ["project_has_datasets", "project has associated non-empty datasets"]
407: ["invalid_project_name", "invalid project name"] 407: ["invalid_project_name", "invalid project name"]
408: ["cannot_update_project_location", "Cannot update project location. Use projects.move instead"] 408: ["cannot_update_project_location", "Cannot update project location. Use projects.move instead"]
409: ["project_path_exceeds_max", "Project path exceed the maximum allowed depth"] 409: ["project_path_exceeds_max", "Project path exceed the maximum allowed depth"]
410: ["project_source_and_destination_are_the_same", "Project has the same source and destination paths"] 410: ["project_source_and_destination_are_the_same", "Project has the same source and destination paths"]
411: ["project_cannot_be_moved_under_itself", "Project can not be moved under itself in the projects hierarchy"] 411: ["project_cannot_be_moved_under_itself", "Project can not be moved under itself in the projects hierarchy"]
412: ["project_cannot_be_merged_into_its_child", "Project can not be merged into its own child"] 412: ["project_cannot_be_merged_into_its_child", "Project can not be merged into its own child"]
413: ["project_has_pipelines", "project has associated pipelines with active controllers"]
# Queues # Queues
701: ["invalid_queue_id", "invalid queue id"] 701: ["invalid_queue_id", "invalid queue id"]

View File

@ -3,6 +3,7 @@ from datetime import datetime
from typing import Tuple, Set, Sequence from typing import Tuple, Set, Sequence
import attr import attr
from mongoengine import Q
from apiserver.apierrors import errors from apiserver.apierrors import errors
from apiserver.bll.event import EventBLL from apiserver.bll.event import EventBLL
@ -17,7 +18,14 @@ from apiserver.database.model import EntityVisibility
from apiserver.database.model.model import Model from apiserver.database.model.model import Model
from apiserver.database.model.project import Project from apiserver.database.model.project import Project
from apiserver.database.model.task.task import Task, ArtifactModes, TaskType, TaskStatus from apiserver.database.model.task.task import Task, ArtifactModes, TaskType, TaskStatus
from .project_bll import ProjectBLL from .project_bll import (
ProjectBLL,
pipeline_tag,
pipelines_project_name,
dataset_tag,
datasets_project_name,
reports_tag,
)
from .sub_projects import _ids_with_children from .sub_projects import _ids_with_children
log = config.logger(__file__) log = config.logger(__file__)
@ -34,30 +42,82 @@ class DeleteProjectResult:
urls: TaskUrls = None urls: TaskUrls = None
def _get_child_project_ids(
project_id: str,
) -> Tuple[Sequence[str], Sequence[str], Sequence[str]]:
project_ids = _ids_with_children([project_id])
pipeline_ids = list(
Project.objects(
id__in=project_ids,
system_tags__in=[pipeline_tag],
basename__ne=pipelines_project_name,
).scalar("id")
)
dataset_ids = list(
Project.objects(
id__in=project_ids,
system_tags__in=[dataset_tag],
basename__ne=datasets_project_name,
).scalar("id")
)
return project_ids, pipeline_ids, dataset_ids
def validate_project_delete(company: str, project_id: str): def validate_project_delete(company: str, project_id: str):
project = Project.get_for_writing( project = Project.get_for_writing(
company=company, id=project_id, _only=("id", "path", "system_tags") company=company, id=project_id, _only=("id", "path", "system_tags")
) )
if not project: if not project:
raise errors.bad_request.InvalidProjectId(id=project_id) raise errors.bad_request.InvalidProjectId(id=project_id)
is_pipeline = "pipeline" in (project.system_tags or [])
project_ids = _ids_with_children([project_id]) project_ids, pipeline_ids, dataset_ids = _get_child_project_ids(project_id)
ret = {} ret = {}
for cls in ProjectBLL.child_classes: if pipeline_ids:
ret[f"{cls.__name__.lower()}s"] = cls.objects(project__in=project_ids).count() pipelines_with_active_controllers = Task.objects(
for cls in ProjectBLL.child_classes: project__in=pipeline_ids,
query = dict( type=TaskType.controller,
project__in=project_ids, system_tags__nin=[EntityVisibility.archived.value] system_tags__nin=[EntityVisibility.archived.value],
) ).distinct("project")
name = f"non_archived_{cls.__name__.lower()}s" ret["pipelines"] = len(pipelines_with_active_controllers)
if not is_pipeline: else:
ret[name] = cls.objects(**query).count() ret["pipelines"] = 0
else: if dataset_ids:
ret[name] = ( datasets_with_data = Task.objects(
cls.objects(**query, type=TaskType.controller).count() project__in=dataset_ids, system_tags__nin=[EntityVisibility.archived.value],
if cls == Task ).distinct("project")
else 0 ret["datasets"] = len(datasets_with_data)
else:
ret["datasets"] = 0
project_ids = list(set(project_ids) - set(pipeline_ids) - set(dataset_ids))
if project_ids:
in_project_query = Q(project__in=project_ids)
for cls in (Task, Model):
query = (
in_project_query & Q(system_tags__nin=[reports_tag])
if cls is Task
else in_project_query
) )
ret[f"{cls.__name__.lower()}s"] = cls.objects(query).count()
ret[f"non_archived_{cls.__name__.lower()}s"] = cls.objects(
query & Q(system_tags__nin=[EntityVisibility.archived.value])
).count()
ret["reports"] = Task.objects(
in_project_query & Q(system_tags__in=[reports_tag])
).count()
ret["non_archived_reports"] = Task.objects(
in_project_query
& Q(
system_tags__in=[reports_tag],
system_tags__nin=[EntityVisibility.archived.value],
)
).count()
else:
for cls in (Task, Model):
ret[f"{cls.__name__.lower()}s"] = 0
ret[f"non_archived_{cls.__name__.lower()}s"] = 0
ret["reports"] = 0
ret["non_archived_reports"] = 0
return ret return ret
@ -79,31 +139,49 @@ def delete_project(
delete_external_artifacts = delete_external_artifacts and config.get( delete_external_artifacts = delete_external_artifacts and config.get(
"services.async_urls_delete.enabled", True "services.async_urls_delete.enabled", True
) )
is_pipeline = "pipeline" in (project.system_tags or []) project_ids, pipeline_ids, dataset_ids = _get_child_project_ids(project_id)
project_ids = _ids_with_children([project_id])
if not force: if not force:
query = dict( if pipeline_ids:
project__in=project_ids, system_tags__nin=[EntityVisibility.archived.value] active_controllers = Task.objects(
) project__in=pipeline_ids,
if not is_pipeline: type=TaskType.controller,
system_tags__nin=[EntityVisibility.archived.value],
).only("id")
if active_controllers:
raise errors.bad_request.ProjectHasPipelines(
"please archive all the controllers or use force=true",
id=project_id,
)
if dataset_ids:
datasets_with_data = Task.objects(
project__in=dataset_ids,
system_tags__nin=[EntityVisibility.archived.value],
).only("id")
if datasets_with_data:
raise errors.bad_request.ProjectHasDatasets(
"please delete all the dataset versions or use force=true",
id=project_id,
)
regular_projects = list(set(project_ids) - set(pipeline_ids) - set(dataset_ids))
if regular_projects:
for cls, error in ( for cls, error in (
(Task, errors.bad_request.ProjectHasTasks), (Task, errors.bad_request.ProjectHasTasks),
(Model, errors.bad_request.ProjectHasModels), (Model, errors.bad_request.ProjectHasModels),
): ):
non_archived = cls.objects(**query).only("id") non_archived = cls.objects(
project__in=regular_projects,
system_tags__nin=[EntityVisibility.archived.value],
).only("id")
if non_archived: if non_archived:
raise error("use force=true to delete", id=project_id) raise error("use force=true", id=project_id)
else:
non_archived = Task.objects(**query, type=TaskType.controller).only("id")
if non_archived:
raise errors.bad_request.ProjectHasTasks(
"please archive all the runs inside the project", id=project_id
)
if not delete_contents: if not delete_contents:
disassociated = defaultdict(int) disassociated = defaultdict(int)
for cls in ProjectBLL.child_classes: for cls in ProjectBLL.child_classes:
disassociated[cls] = cls.objects(project__in=project_ids).update(project=None) disassociated[cls] = cls.objects(project__in=project_ids).update(
project=None
)
res = DeleteProjectResult(disassociated_tasks=disassociated[Task]) res = DeleteProjectResult(disassociated_tasks=disassociated[Task])
else: else:
deleted_models, model_event_urls, model_urls = _delete_models( deleted_models, model_event_urls, model_urls = _delete_models(
@ -209,19 +287,14 @@ def _delete_models(
"status": TaskStatus.published, "status": TaskStatus.published,
}, },
update={ update={
"$set": { "$set": {"models.output.$[elem].model": deleted, "last_change": now,}
"models.output.$[elem].model": deleted,
"last_change": now,
}
}, },
array_filters=[{"elem.model": {"$in": model_ids}}], array_filters=[{"elem.model": {"$in": model_ids}}],
upsert=False, upsert=False,
) )
# update unpublished tasks # update unpublished tasks
Task.objects( Task.objects(
id__in=model_tasks, id__in=model_tasks, project__nin=projects, status__ne=TaskStatus.published,
project__nin=projects,
status__ne=TaskStatus.published,
).update(pull__models__output__model__in=model_ids, set__last_change=now) ).update(pull__models__output__model__in=model_ids, set__last_change=now)
event_urls, model_urls = set(), set() event_urls, model_urls = set(), set()

View File

@ -808,6 +808,26 @@ validate_delete {
} }
} }
} }
"999.0": ${validate_delete."2.14"} {
response.properties {
reports {
description: "The total number of reports under the project and all its children"
type: integer
}
non_archived_reports {
description: "The total number of non-archived reports under the project and all its children"
type: integer
}
pipelines {
description: "The total number of pipelines with active controllers under the project and all its children"
type: integer
}
datasets {
description: "The total number of non-empty datasets under the project and all its children"
type: integer
}
}
}
} }
delete { delete {
"2.1" { "2.1" {

View File

@ -5,19 +5,16 @@ from apiserver.database.utils import id as db_id
class TestProjectsDelete(TestService): class TestProjectsDelete(TestService):
def setUp(self, version="2.14"): def new_task(self, type="testing", **kwargs):
super().setUp(version=version)
def new_task(self, **kwargs):
return self.create_temp( return self.create_temp(
"tasks", type="testing", name=db_id(), **kwargs "tasks", type=type, name=db_id(), **kwargs
) )
def new_model(self, **kwargs): def new_model(self, **kwargs):
return self.create_temp("models", uri="file:///a/b", name=db_id(), labels={}, **kwargs) return self.create_temp("models", uri="file:///a/b", name=db_id(), labels={}, **kwargs)
def new_project(self, **kwargs): def new_project(self, name=None, **kwargs):
return self.create_temp("projects", name=db_id(), description="", **kwargs) return self.create_temp("projects", name=name or db_id(), description="", **kwargs)
def test_delete_fails_with_active_task(self): def test_delete_fails_with_active_task(self):
project = self.new_project() project = self.new_project()
@ -52,3 +49,33 @@ class TestProjectsDelete(TestService):
self.assertEqual(res.models, 1) self.assertEqual(res.models, 1)
self.assertEqual(res.non_archived_models, 0) self.assertEqual(res.non_archived_models, 0)
self.api.projects.delete(project=project) self.api.projects.delete(project=project)
def test_delete_dataset(self):
name = "Test datasets delete"
project = self.new_project(name=name)
dataset = self.new_project(f"{name}/.datasets/test dataset", system_tags=["dataset"])
task = self.new_task(project=dataset, system_tags=["dataset"])
res = self.api.projects.validate_delete(project=project)
self.assertEqual(res.datasets, 1)
with self.api.raises(errors.bad_request.ProjectHasDatasets):
self.api.projects.delete(project=project)
self.api.tasks.delete(task=task)
res = self.api.projects.validate_delete(project=project)
self.assertEqual(res.datasets, 0)
self.api.projects.delete(project=project)
def test_delete_pipeline(self):
name = "Test pipelines delete"
project = self.new_project(name=name)
pipeline = self.new_project(f"{name}/.pipelines/test pipeline", system_tags=["pipeline"])
task = self.new_task(project=pipeline, type="controller", system_tags=["pipeline"])
res = self.api.projects.validate_delete(project=project)
self.assertEqual(res.pipelines, 1)
with self.api.raises(errors.bad_request.ProjectHasPipelines):
self.api.projects.delete(project=project)
self.api.tasks.edit(task=task, system_tags=[EntityVisibility.archived.value])
res = self.api.projects.validate_delete(project=project)
self.assertEqual(res.pipelines, 0)
self.api.projects.delete(project=project)