diff --git a/apiserver/bll/project/project_cleanup.py b/apiserver/bll/project/project_cleanup.py index ce1e87b..9906785 100644 --- a/apiserver/bll/project/project_cleanup.py +++ b/apiserver/bll/project/project_cleanup.py @@ -101,10 +101,13 @@ def delete_project( updated_count = cls.objects(project__in=project_ids).update(project=None) res = DeleteProjectResult(disassociated_tasks=updated_count) else: - deleted_models, model_urls = _delete_models(projects=project_ids) - deleted_tasks, event_urls, artifact_urls = _delete_tasks( + deleted_models, model_event_urls, model_urls = _delete_models( company=company, projects=project_ids ) + deleted_tasks, task_event_urls, artifact_urls = _delete_tasks( + company=company, projects=project_ids + ) + event_urls = task_event_urls | model_event_urls if delete_external_artifacts: scheduled = _schedule_for_delete( task_id=project_id, @@ -164,14 +167,16 @@ def _delete_tasks(company: str, projects: Sequence[str]) -> Tuple[int, Set, Set] return deleted, event_urls, artifact_urls -def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]: +def _delete_models( + company: str, projects: Sequence[str] +) -> Tuple[int, Set[str], Set[str]]: """ Delete project models and update the tasks from other projects that reference them to reference None. """ models = Model.objects(project__in=projects).only("task", "id", "uri") if not models: - return 0, set() + return 0, set(), set() model_ids = list({m.id for m in models}) @@ -198,6 +203,13 @@ def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]: upsert=False, ) - urls = {m.uri for m in models if m.uri} + event_urls, model_urls = set(), set() + for m in models: + event_urls.update(collect_debug_image_urls(company, m.id)) + event_urls.update(collect_plot_image_urls(company, m.id)) + if m.uri: + model_urls.add(m.uri) + + event_bll.delete_multi_task_events(company, model_ids) deleted = models.delete() - return deleted, urls + return deleted, event_urls, model_urls diff --git a/apiserver/bll/task/task_cleanup.py b/apiserver/bll/task/task_cleanup.py index 85321b9..0e106e9 100644 --- a/apiserver/bll/task/task_cleanup.py +++ b/apiserver/bll/task/task_cleanup.py @@ -23,6 +23,7 @@ from apiserver.database.model.url_to_delete import ( ) from apiserver.database.utils import id as db_id +log = config.logger(__file__) event_bll = EventBLL() @@ -66,12 +67,12 @@ class CleanupResult: ) -def collect_plot_image_urls(company: str, task: str) -> Set[str]: +def collect_plot_image_urls(company: str, task_or_model: str) -> Set[str]: urls = set() next_scroll_id = None while True: events, next_scroll_id = event_bll.get_plot_image_urls( - company_id=company, task_id=task, scroll_id=next_scroll_id + company_id=company, task_id=task_or_model, scroll_id=next_scroll_id ) if not events: break @@ -83,7 +84,7 @@ def collect_plot_image_urls(company: str, task: str) -> Set[str]: return urls -def collect_debug_image_urls(company: str, task: str) -> Set[str]: +def collect_debug_image_urls(company: str, task_or_model: str) -> Set[str]: """ Return the set of unique image urls Uses DebugImagesIterator to make sure that we do not retrieve recycled urls @@ -92,7 +93,7 @@ def collect_debug_image_urls(company: str, task: str) -> Set[str]: urls = set() while True: res, after_key = event_bll.get_debug_image_urls( - company_id=company, task_id=task, after_key=after_key, + company_id=company, task_id=task_or_model, after_key=after_key, ) urls.update(res) if not after_key: @@ -210,9 +211,21 @@ def cleanup_task( if not models: continue if delete_output_models and allow_delete: - deleted_models += Model.objects( - id__in=[m.id for m in models if m.id not in in_use_model_ids] - ).delete() + model_ids = set(m.id for m in models if m.id not in in_use_model_ids) + for m_id in model_ids: + if return_file_urls or delete_external_artifacts: + event_urls.update(collect_debug_image_urls(task.company, m_id)) + event_urls.update(collect_plot_image_urls(task.company, m_id)) + try: + event_bll.delete_task_events( + task.company, m_id, allow_locked=True, model=True + ) + except errors.bad_request.InvalidModelId as ex: + log.info( + f"Error deleting events for the model {m_id}: {str(ex)}" + ) + + deleted_models += Model.objects(id__in=list(model_ids)).delete() if in_use_model_ids: Model.objects(id__in=list(in_use_model_ids)).update(unset__task=1) continue diff --git a/apiserver/tests/automated/test_tasks_delete.py b/apiserver/tests/automated/test_tasks_delete.py index df16751..650b150 100644 --- a/apiserver/tests/automated/test_tasks_delete.py +++ b/apiserver/tests/automated/test_tasks_delete.py @@ -51,7 +51,6 @@ class TestTasksResetDelete(TestService): task = self.new_task() (_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task) - published_model_urls, draft_model_urls = self.create_task_models(task) artifact_urls = self.send_artifacts(task) event_urls = self.send_debug_image_events(task) event_urls.update(self.send_plot_events(task)) @@ -123,7 +122,6 @@ class TestTasksResetDelete(TestService): ) -> Tuple[str, Tuple[Set[str], Set[str]], Set[str], Set[str]]: task = self.new_task(**kwargs) (_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task, **kwargs) - published_model_urls, draft_model_urls = self.create_task_models(task, **kwargs) artifact_urls = self.send_artifacts(task) event_urls = self.send_debug_image_events(task) event_urls.update(self.send_plot_events(task))