Collect model event URLs during task and project cleanup

This commit is contained in:
allegroai 2022-11-29 17:38:03 +02:00
parent 97992b0d9e
commit 2d711e1500
3 changed files with 38 additions and 15 deletions

View File

@ -101,10 +101,13 @@ def delete_project(
updated_count = cls.objects(project__in=project_ids).update(project=None) updated_count = cls.objects(project__in=project_ids).update(project=None)
res = DeleteProjectResult(disassociated_tasks=updated_count) res = DeleteProjectResult(disassociated_tasks=updated_count)
else: else:
deleted_models, model_urls = _delete_models(projects=project_ids) deleted_models, model_event_urls, model_urls = _delete_models(
deleted_tasks, event_urls, artifact_urls = _delete_tasks(
company=company, projects=project_ids company=company, projects=project_ids
) )
deleted_tasks, task_event_urls, artifact_urls = _delete_tasks(
company=company, projects=project_ids
)
event_urls = task_event_urls | model_event_urls
if delete_external_artifacts: if delete_external_artifacts:
scheduled = _schedule_for_delete( scheduled = _schedule_for_delete(
task_id=project_id, task_id=project_id,
@ -164,14 +167,16 @@ def _delete_tasks(company: str, projects: Sequence[str]) -> Tuple[int, Set, Set]
return deleted, event_urls, artifact_urls return deleted, event_urls, artifact_urls
def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]: def _delete_models(
company: str, projects: Sequence[str]
) -> Tuple[int, Set[str], Set[str]]:
""" """
Delete project models and update the tasks from other projects Delete project models and update the tasks from other projects
that reference them to reference None. that reference them to reference None.
""" """
models = Model.objects(project__in=projects).only("task", "id", "uri") models = Model.objects(project__in=projects).only("task", "id", "uri")
if not models: if not models:
return 0, set() return 0, set(), set()
model_ids = list({m.id for m in models}) model_ids = list({m.id for m in models})
@ -198,6 +203,13 @@ def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]:
upsert=False, upsert=False,
) )
urls = {m.uri for m in models if m.uri} event_urls, model_urls = set(), set()
for m in models:
event_urls.update(collect_debug_image_urls(company, m.id))
event_urls.update(collect_plot_image_urls(company, m.id))
if m.uri:
model_urls.add(m.uri)
event_bll.delete_multi_task_events(company, model_ids)
deleted = models.delete() deleted = models.delete()
return deleted, urls return deleted, event_urls, model_urls

View File

@ -23,6 +23,7 @@ from apiserver.database.model.url_to_delete import (
) )
from apiserver.database.utils import id as db_id from apiserver.database.utils import id as db_id
log = config.logger(__file__)
event_bll = EventBLL() event_bll = EventBLL()
@ -66,12 +67,12 @@ class CleanupResult:
) )
def collect_plot_image_urls(company: str, task: str) -> Set[str]: def collect_plot_image_urls(company: str, task_or_model: str) -> Set[str]:
urls = set() urls = set()
next_scroll_id = None next_scroll_id = None
while True: while True:
events, next_scroll_id = event_bll.get_plot_image_urls( events, next_scroll_id = event_bll.get_plot_image_urls(
company_id=company, task_id=task, scroll_id=next_scroll_id company_id=company, task_id=task_or_model, scroll_id=next_scroll_id
) )
if not events: if not events:
break break
@ -83,7 +84,7 @@ def collect_plot_image_urls(company: str, task: str) -> Set[str]:
return urls return urls
def collect_debug_image_urls(company: str, task: str) -> Set[str]: def collect_debug_image_urls(company: str, task_or_model: str) -> Set[str]:
""" """
Return the set of unique image urls Return the set of unique image urls
Uses DebugImagesIterator to make sure that we do not retrieve recycled urls Uses DebugImagesIterator to make sure that we do not retrieve recycled urls
@ -92,7 +93,7 @@ def collect_debug_image_urls(company: str, task: str) -> Set[str]:
urls = set() urls = set()
while True: while True:
res, after_key = event_bll.get_debug_image_urls( res, after_key = event_bll.get_debug_image_urls(
company_id=company, task_id=task, after_key=after_key, company_id=company, task_id=task_or_model, after_key=after_key,
) )
urls.update(res) urls.update(res)
if not after_key: if not after_key:
@ -210,9 +211,21 @@ def cleanup_task(
if not models: if not models:
continue continue
if delete_output_models and allow_delete: if delete_output_models and allow_delete:
deleted_models += Model.objects( model_ids = set(m.id for m in models if m.id not in in_use_model_ids)
id__in=[m.id for m in models if m.id not in in_use_model_ids] for m_id in model_ids:
).delete() if return_file_urls or delete_external_artifacts:
event_urls.update(collect_debug_image_urls(task.company, m_id))
event_urls.update(collect_plot_image_urls(task.company, m_id))
try:
event_bll.delete_task_events(
task.company, m_id, allow_locked=True, model=True
)
except errors.bad_request.InvalidModelId as ex:
log.info(
f"Error deleting events for the model {m_id}: {str(ex)}"
)
deleted_models += Model.objects(id__in=list(model_ids)).delete()
if in_use_model_ids: if in_use_model_ids:
Model.objects(id__in=list(in_use_model_ids)).update(unset__task=1) Model.objects(id__in=list(in_use_model_ids)).update(unset__task=1)
continue continue

View File

@ -51,7 +51,6 @@ class TestTasksResetDelete(TestService):
task = self.new_task() task = self.new_task()
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task) (_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task)
published_model_urls, draft_model_urls = self.create_task_models(task)
artifact_urls = self.send_artifacts(task) artifact_urls = self.send_artifacts(task)
event_urls = self.send_debug_image_events(task) event_urls = self.send_debug_image_events(task)
event_urls.update(self.send_plot_events(task)) event_urls.update(self.send_plot_events(task))
@ -123,7 +122,6 @@ class TestTasksResetDelete(TestService):
) -> Tuple[str, Tuple[Set[str], Set[str]], Set[str], Set[str]]: ) -> Tuple[str, Tuple[Set[str], Set[str]], Set[str], Set[str]]:
task = self.new_task(**kwargs) task = self.new_task(**kwargs)
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task, **kwargs) (_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task, **kwargs)
published_model_urls, draft_model_urls = self.create_task_models(task, **kwargs)
artifact_urls = self.send_artifacts(task) artifact_urls = self.send_artifacts(task)
event_urls = self.send_debug_image_events(task) event_urls = self.send_debug_image_events(task)
event_urls.update(self.send_plot_events(task)) event_urls.update(self.send_plot_events(task))