Collect model event URLs during task and project cleanup

This commit is contained in:
allegroai 2022-11-29 17:38:03 +02:00
parent 97992b0d9e
commit 2d711e1500
3 changed files with 38 additions and 15 deletions

View File

@ -101,10 +101,13 @@ def delete_project(
updated_count = cls.objects(project__in=project_ids).update(project=None)
res = DeleteProjectResult(disassociated_tasks=updated_count)
else:
deleted_models, model_urls = _delete_models(projects=project_ids)
deleted_tasks, event_urls, artifact_urls = _delete_tasks(
deleted_models, model_event_urls, model_urls = _delete_models(
company=company, projects=project_ids
)
deleted_tasks, task_event_urls, artifact_urls = _delete_tasks(
company=company, projects=project_ids
)
event_urls = task_event_urls | model_event_urls
if delete_external_artifacts:
scheduled = _schedule_for_delete(
task_id=project_id,
@ -164,14 +167,16 @@ def _delete_tasks(company: str, projects: Sequence[str]) -> Tuple[int, Set, Set]
return deleted, event_urls, artifact_urls
def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]:
def _delete_models(
company: str, projects: Sequence[str]
) -> Tuple[int, Set[str], Set[str]]:
"""
Delete project models and update the tasks from other projects
that reference them to reference None.
"""
models = Model.objects(project__in=projects).only("task", "id", "uri")
if not models:
return 0, set()
return 0, set(), set()
model_ids = list({m.id for m in models})
@ -198,6 +203,13 @@ def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]:
upsert=False,
)
urls = {m.uri for m in models if m.uri}
event_urls, model_urls = set(), set()
for m in models:
event_urls.update(collect_debug_image_urls(company, m.id))
event_urls.update(collect_plot_image_urls(company, m.id))
if m.uri:
model_urls.add(m.uri)
event_bll.delete_multi_task_events(company, model_ids)
deleted = models.delete()
return deleted, urls
return deleted, event_urls, model_urls

View File

@ -23,6 +23,7 @@ from apiserver.database.model.url_to_delete import (
)
from apiserver.database.utils import id as db_id
log = config.logger(__file__)
event_bll = EventBLL()
@ -66,12 +67,12 @@ class CleanupResult:
)
def collect_plot_image_urls(company: str, task: str) -> Set[str]:
def collect_plot_image_urls(company: str, task_or_model: str) -> Set[str]:
urls = set()
next_scroll_id = None
while True:
events, next_scroll_id = event_bll.get_plot_image_urls(
company_id=company, task_id=task, scroll_id=next_scroll_id
company_id=company, task_id=task_or_model, scroll_id=next_scroll_id
)
if not events:
break
@ -83,7 +84,7 @@ def collect_plot_image_urls(company: str, task: str) -> Set[str]:
return urls
def collect_debug_image_urls(company: str, task: str) -> Set[str]:
def collect_debug_image_urls(company: str, task_or_model: str) -> Set[str]:
"""
Return the set of unique image urls
Uses DebugImagesIterator to make sure that we do not retrieve recycled urls
@ -92,7 +93,7 @@ def collect_debug_image_urls(company: str, task: str) -> Set[str]:
urls = set()
while True:
res, after_key = event_bll.get_debug_image_urls(
company_id=company, task_id=task, after_key=after_key,
company_id=company, task_id=task_or_model, after_key=after_key,
)
urls.update(res)
if not after_key:
@ -210,9 +211,21 @@ def cleanup_task(
if not models:
continue
if delete_output_models and allow_delete:
deleted_models += Model.objects(
id__in=[m.id for m in models if m.id not in in_use_model_ids]
).delete()
model_ids = set(m.id for m in models if m.id not in in_use_model_ids)
for m_id in model_ids:
if return_file_urls or delete_external_artifacts:
event_urls.update(collect_debug_image_urls(task.company, m_id))
event_urls.update(collect_plot_image_urls(task.company, m_id))
try:
event_bll.delete_task_events(
task.company, m_id, allow_locked=True, model=True
)
except errors.bad_request.InvalidModelId as ex:
log.info(
f"Error deleting events for the model {m_id}: {str(ex)}"
)
deleted_models += Model.objects(id__in=list(model_ids)).delete()
if in_use_model_ids:
Model.objects(id__in=list(in_use_model_ids)).update(unset__task=1)
continue

View File

@ -51,7 +51,6 @@ class TestTasksResetDelete(TestService):
task = self.new_task()
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task)
published_model_urls, draft_model_urls = self.create_task_models(task)
artifact_urls = self.send_artifacts(task)
event_urls = self.send_debug_image_events(task)
event_urls.update(self.send_plot_events(task))
@ -123,7 +122,6 @@ class TestTasksResetDelete(TestService):
) -> Tuple[str, Tuple[Set[str], Set[str]], Set[str], Set[str]]:
task = self.new_task(**kwargs)
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task, **kwargs)
published_model_urls, draft_model_urls = self.create_task_models(task, **kwargs)
artifact_urls = self.send_artifacts(task)
event_urls = self.send_debug_image_events(task)
event_urls.update(self.send_plot_events(task))