diff --git a/apiserver/apierrors/errors.conf b/apiserver/apierrors/errors.conf index c42814f..22dbb0d 100644 --- a/apiserver/apierrors/errors.conf +++ b/apiserver/apierrors/errors.conf @@ -76,12 +76,14 @@ 402: ["project_has_tasks", "project has associated tasks"] 403: ["project_not_found", "project not found"] 405: ["project_has_models", "project has associated models"] + 406: ["project_has_datasets", "project has associated non-empty datasets"] 407: ["invalid_project_name", "invalid project name"] 408: ["cannot_update_project_location", "Cannot update project location. Use projects.move instead"] 409: ["project_path_exceeds_max", "Project path exceed the maximum allowed depth"] 410: ["project_source_and_destination_are_the_same", "Project has the same source and destination paths"] 411: ["project_cannot_be_moved_under_itself", "Project can not be moved under itself in the projects hierarchy"] 412: ["project_cannot_be_merged_into_its_child", "Project can not be merged into its own child"] + 413: ["project_has_pipelines", "project has associated pipelines with active controllers"] # Queues 701: ["invalid_queue_id", "invalid queue id"] diff --git a/apiserver/bll/project/project_cleanup.py b/apiserver/bll/project/project_cleanup.py index cb3b4aa..b33b6f5 100644 --- a/apiserver/bll/project/project_cleanup.py +++ b/apiserver/bll/project/project_cleanup.py @@ -3,6 +3,7 @@ from datetime import datetime from typing import Tuple, Set, Sequence import attr +from mongoengine import Q from apiserver.apierrors import errors from apiserver.bll.event import EventBLL @@ -17,7 +18,14 @@ from apiserver.database.model import EntityVisibility from apiserver.database.model.model import Model from apiserver.database.model.project import Project from apiserver.database.model.task.task import Task, ArtifactModes, TaskType, TaskStatus -from .project_bll import ProjectBLL +from .project_bll import ( + ProjectBLL, + pipeline_tag, + pipelines_project_name, + dataset_tag, + datasets_project_name, + reports_tag, +) from .sub_projects import _ids_with_children log = config.logger(__file__) @@ -34,30 +42,82 @@ class DeleteProjectResult: urls: TaskUrls = None +def _get_child_project_ids( + project_id: str, +) -> Tuple[Sequence[str], Sequence[str], Sequence[str]]: + project_ids = _ids_with_children([project_id]) + pipeline_ids = list( + Project.objects( + id__in=project_ids, + system_tags__in=[pipeline_tag], + basename__ne=pipelines_project_name, + ).scalar("id") + ) + dataset_ids = list( + Project.objects( + id__in=project_ids, + system_tags__in=[dataset_tag], + basename__ne=datasets_project_name, + ).scalar("id") + ) + return project_ids, pipeline_ids, dataset_ids + + def validate_project_delete(company: str, project_id: str): project = Project.get_for_writing( company=company, id=project_id, _only=("id", "path", "system_tags") ) if not project: raise errors.bad_request.InvalidProjectId(id=project_id) - is_pipeline = "pipeline" in (project.system_tags or []) - project_ids = _ids_with_children([project_id]) + + project_ids, pipeline_ids, dataset_ids = _get_child_project_ids(project_id) ret = {} - for cls in ProjectBLL.child_classes: - ret[f"{cls.__name__.lower()}s"] = cls.objects(project__in=project_ids).count() - for cls in ProjectBLL.child_classes: - query = dict( - project__in=project_ids, system_tags__nin=[EntityVisibility.archived.value] - ) - name = f"non_archived_{cls.__name__.lower()}s" - if not is_pipeline: - ret[name] = cls.objects(**query).count() - else: - ret[name] = ( - cls.objects(**query, type=TaskType.controller).count() - if cls == Task - else 0 + if pipeline_ids: + pipelines_with_active_controllers = Task.objects( + project__in=pipeline_ids, + type=TaskType.controller, + system_tags__nin=[EntityVisibility.archived.value], + ).distinct("project") + ret["pipelines"] = len(pipelines_with_active_controllers) + else: + ret["pipelines"] = 0 + if dataset_ids: + datasets_with_data = Task.objects( + project__in=dataset_ids, system_tags__nin=[EntityVisibility.archived.value], + ).distinct("project") + ret["datasets"] = len(datasets_with_data) + else: + ret["datasets"] = 0 + + project_ids = list(set(project_ids) - set(pipeline_ids) - set(dataset_ids)) + if project_ids: + in_project_query = Q(project__in=project_ids) + for cls in (Task, Model): + query = ( + in_project_query & Q(system_tags__nin=[reports_tag]) + if cls is Task + else in_project_query ) + ret[f"{cls.__name__.lower()}s"] = cls.objects(query).count() + ret[f"non_archived_{cls.__name__.lower()}s"] = cls.objects( + query & Q(system_tags__nin=[EntityVisibility.archived.value]) + ).count() + ret["reports"] = Task.objects( + in_project_query & Q(system_tags__in=[reports_tag]) + ).count() + ret["non_archived_reports"] = Task.objects( + in_project_query + & Q( + system_tags__in=[reports_tag], + system_tags__nin=[EntityVisibility.archived.value], + ) + ).count() + else: + for cls in (Task, Model): + ret[f"{cls.__name__.lower()}s"] = 0 + ret[f"non_archived_{cls.__name__.lower()}s"] = 0 + ret["reports"] = 0 + ret["non_archived_reports"] = 0 return ret @@ -79,31 +139,49 @@ def delete_project( delete_external_artifacts = delete_external_artifacts and config.get( "services.async_urls_delete.enabled", True ) - is_pipeline = "pipeline" in (project.system_tags or []) - project_ids = _ids_with_children([project_id]) + project_ids, pipeline_ids, dataset_ids = _get_child_project_ids(project_id) if not force: - query = dict( - project__in=project_ids, system_tags__nin=[EntityVisibility.archived.value] - ) - if not is_pipeline: + if pipeline_ids: + active_controllers = Task.objects( + project__in=pipeline_ids, + type=TaskType.controller, + system_tags__nin=[EntityVisibility.archived.value], + ).only("id") + if active_controllers: + raise errors.bad_request.ProjectHasPipelines( + "please archive all the controllers or use force=true", + id=project_id, + ) + if dataset_ids: + datasets_with_data = Task.objects( + project__in=dataset_ids, + system_tags__nin=[EntityVisibility.archived.value], + ).only("id") + if datasets_with_data: + raise errors.bad_request.ProjectHasDatasets( + "please delete all the dataset versions or use force=true", + id=project_id, + ) + + regular_projects = list(set(project_ids) - set(pipeline_ids) - set(dataset_ids)) + if regular_projects: for cls, error in ( (Task, errors.bad_request.ProjectHasTasks), (Model, errors.bad_request.ProjectHasModels), ): - non_archived = cls.objects(**query).only("id") + non_archived = cls.objects( + project__in=regular_projects, + system_tags__nin=[EntityVisibility.archived.value], + ).only("id") if non_archived: - raise error("use force=true to delete", id=project_id) - else: - non_archived = Task.objects(**query, type=TaskType.controller).only("id") - if non_archived: - raise errors.bad_request.ProjectHasTasks( - "please archive all the runs inside the project", id=project_id - ) + raise error("use force=true", id=project_id) if not delete_contents: disassociated = defaultdict(int) for cls in ProjectBLL.child_classes: - disassociated[cls] = cls.objects(project__in=project_ids).update(project=None) + disassociated[cls] = cls.objects(project__in=project_ids).update( + project=None + ) res = DeleteProjectResult(disassociated_tasks=disassociated[Task]) else: deleted_models, model_event_urls, model_urls = _delete_models( @@ -209,19 +287,14 @@ def _delete_models( "status": TaskStatus.published, }, update={ - "$set": { - "models.output.$[elem].model": deleted, - "last_change": now, - } + "$set": {"models.output.$[elem].model": deleted, "last_change": now,} }, array_filters=[{"elem.model": {"$in": model_ids}}], upsert=False, ) # update unpublished tasks Task.objects( - id__in=model_tasks, - project__nin=projects, - status__ne=TaskStatus.published, + id__in=model_tasks, project__nin=projects, status__ne=TaskStatus.published, ).update(pull__models__output__model__in=model_ids, set__last_change=now) event_urls, model_urls = set(), set() diff --git a/apiserver/schema/services/projects.conf b/apiserver/schema/services/projects.conf index f852317..0bbd5d4 100644 --- a/apiserver/schema/services/projects.conf +++ b/apiserver/schema/services/projects.conf @@ -808,6 +808,26 @@ validate_delete { } } } + "999.0": ${validate_delete."2.14"} { + response.properties { + reports { + description: "The total number of reports under the project and all its children" + type: integer + } + non_archived_reports { + description: "The total number of non-archived reports under the project and all its children" + type: integer + } + pipelines { + description: "The total number of pipelines with active controllers under the project and all its children" + type: integer + } + datasets { + description: "The total number of non-empty datasets under the project and all its children" + type: integer + } + } + } } delete { "2.1" { diff --git a/apiserver/tests/automated/test_project_delete.py b/apiserver/tests/automated/test_project_delete.py index 698a0f3..4195d72 100644 --- a/apiserver/tests/automated/test_project_delete.py +++ b/apiserver/tests/automated/test_project_delete.py @@ -5,19 +5,16 @@ from apiserver.database.utils import id as db_id class TestProjectsDelete(TestService): - def setUp(self, version="2.14"): - super().setUp(version=version) - - def new_task(self, **kwargs): + def new_task(self, type="testing", **kwargs): return self.create_temp( - "tasks", type="testing", name=db_id(), **kwargs + "tasks", type=type, name=db_id(), **kwargs ) def new_model(self, **kwargs): return self.create_temp("models", uri="file:///a/b", name=db_id(), labels={}, **kwargs) - def new_project(self, **kwargs): - return self.create_temp("projects", name=db_id(), description="", **kwargs) + def new_project(self, name=None, **kwargs): + return self.create_temp("projects", name=name or db_id(), description="", **kwargs) def test_delete_fails_with_active_task(self): project = self.new_project() @@ -52,3 +49,33 @@ class TestProjectsDelete(TestService): self.assertEqual(res.models, 1) self.assertEqual(res.non_archived_models, 0) self.api.projects.delete(project=project) + + def test_delete_dataset(self): + name = "Test datasets delete" + project = self.new_project(name=name) + dataset = self.new_project(f"{name}/.datasets/test dataset", system_tags=["dataset"]) + task = self.new_task(project=dataset, system_tags=["dataset"]) + res = self.api.projects.validate_delete(project=project) + self.assertEqual(res.datasets, 1) + with self.api.raises(errors.bad_request.ProjectHasDatasets): + self.api.projects.delete(project=project) + + self.api.tasks.delete(task=task) + res = self.api.projects.validate_delete(project=project) + self.assertEqual(res.datasets, 0) + self.api.projects.delete(project=project) + + def test_delete_pipeline(self): + name = "Test pipelines delete" + project = self.new_project(name=name) + pipeline = self.new_project(f"{name}/.pipelines/test pipeline", system_tags=["pipeline"]) + task = self.new_task(project=pipeline, type="controller", system_tags=["pipeline"]) + res = self.api.projects.validate_delete(project=project) + self.assertEqual(res.pipelines, 1) + with self.api.raises(errors.bad_request.ProjectHasPipelines): + self.api.projects.delete(project=project) + + self.api.tasks.edit(task=task, system_tags=[EntityVisibility.archived.value]) + res = self.api.projects.validate_delete(project=project) + self.assertEqual(res.pipelines, 0) + self.api.projects.delete(project=project)