mirror of
https://github.com/clearml/clearml-server
synced 2025-03-09 21:51:54 +00:00
Collect model event URLs during task and project cleanup
This commit is contained in:
parent
97992b0d9e
commit
2d711e1500
@ -101,10 +101,13 @@ def delete_project(
|
||||
updated_count = cls.objects(project__in=project_ids).update(project=None)
|
||||
res = DeleteProjectResult(disassociated_tasks=updated_count)
|
||||
else:
|
||||
deleted_models, model_urls = _delete_models(projects=project_ids)
|
||||
deleted_tasks, event_urls, artifact_urls = _delete_tasks(
|
||||
deleted_models, model_event_urls, model_urls = _delete_models(
|
||||
company=company, projects=project_ids
|
||||
)
|
||||
deleted_tasks, task_event_urls, artifact_urls = _delete_tasks(
|
||||
company=company, projects=project_ids
|
||||
)
|
||||
event_urls = task_event_urls | model_event_urls
|
||||
if delete_external_artifacts:
|
||||
scheduled = _schedule_for_delete(
|
||||
task_id=project_id,
|
||||
@ -164,14 +167,16 @@ def _delete_tasks(company: str, projects: Sequence[str]) -> Tuple[int, Set, Set]
|
||||
return deleted, event_urls, artifact_urls
|
||||
|
||||
|
||||
def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]:
|
||||
def _delete_models(
|
||||
company: str, projects: Sequence[str]
|
||||
) -> Tuple[int, Set[str], Set[str]]:
|
||||
"""
|
||||
Delete project models and update the tasks from other projects
|
||||
that reference them to reference None.
|
||||
"""
|
||||
models = Model.objects(project__in=projects).only("task", "id", "uri")
|
||||
if not models:
|
||||
return 0, set()
|
||||
return 0, set(), set()
|
||||
|
||||
model_ids = list({m.id for m in models})
|
||||
|
||||
@ -198,6 +203,13 @@ def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]:
|
||||
upsert=False,
|
||||
)
|
||||
|
||||
urls = {m.uri for m in models if m.uri}
|
||||
event_urls, model_urls = set(), set()
|
||||
for m in models:
|
||||
event_urls.update(collect_debug_image_urls(company, m.id))
|
||||
event_urls.update(collect_plot_image_urls(company, m.id))
|
||||
if m.uri:
|
||||
model_urls.add(m.uri)
|
||||
|
||||
event_bll.delete_multi_task_events(company, model_ids)
|
||||
deleted = models.delete()
|
||||
return deleted, urls
|
||||
return deleted, event_urls, model_urls
|
||||
|
@ -23,6 +23,7 @@ from apiserver.database.model.url_to_delete import (
|
||||
)
|
||||
from apiserver.database.utils import id as db_id
|
||||
|
||||
log = config.logger(__file__)
|
||||
event_bll = EventBLL()
|
||||
|
||||
|
||||
@ -66,12 +67,12 @@ class CleanupResult:
|
||||
)
|
||||
|
||||
|
||||
def collect_plot_image_urls(company: str, task: str) -> Set[str]:
|
||||
def collect_plot_image_urls(company: str, task_or_model: str) -> Set[str]:
|
||||
urls = set()
|
||||
next_scroll_id = None
|
||||
while True:
|
||||
events, next_scroll_id = event_bll.get_plot_image_urls(
|
||||
company_id=company, task_id=task, scroll_id=next_scroll_id
|
||||
company_id=company, task_id=task_or_model, scroll_id=next_scroll_id
|
||||
)
|
||||
if not events:
|
||||
break
|
||||
@ -83,7 +84,7 @@ def collect_plot_image_urls(company: str, task: str) -> Set[str]:
|
||||
return urls
|
||||
|
||||
|
||||
def collect_debug_image_urls(company: str, task: str) -> Set[str]:
|
||||
def collect_debug_image_urls(company: str, task_or_model: str) -> Set[str]:
|
||||
"""
|
||||
Return the set of unique image urls
|
||||
Uses DebugImagesIterator to make sure that we do not retrieve recycled urls
|
||||
@ -92,7 +93,7 @@ def collect_debug_image_urls(company: str, task: str) -> Set[str]:
|
||||
urls = set()
|
||||
while True:
|
||||
res, after_key = event_bll.get_debug_image_urls(
|
||||
company_id=company, task_id=task, after_key=after_key,
|
||||
company_id=company, task_id=task_or_model, after_key=after_key,
|
||||
)
|
||||
urls.update(res)
|
||||
if not after_key:
|
||||
@ -210,9 +211,21 @@ def cleanup_task(
|
||||
if not models:
|
||||
continue
|
||||
if delete_output_models and allow_delete:
|
||||
deleted_models += Model.objects(
|
||||
id__in=[m.id for m in models if m.id not in in_use_model_ids]
|
||||
).delete()
|
||||
model_ids = set(m.id for m in models if m.id not in in_use_model_ids)
|
||||
for m_id in model_ids:
|
||||
if return_file_urls or delete_external_artifacts:
|
||||
event_urls.update(collect_debug_image_urls(task.company, m_id))
|
||||
event_urls.update(collect_plot_image_urls(task.company, m_id))
|
||||
try:
|
||||
event_bll.delete_task_events(
|
||||
task.company, m_id, allow_locked=True, model=True
|
||||
)
|
||||
except errors.bad_request.InvalidModelId as ex:
|
||||
log.info(
|
||||
f"Error deleting events for the model {m_id}: {str(ex)}"
|
||||
)
|
||||
|
||||
deleted_models += Model.objects(id__in=list(model_ids)).delete()
|
||||
if in_use_model_ids:
|
||||
Model.objects(id__in=list(in_use_model_ids)).update(unset__task=1)
|
||||
continue
|
||||
|
@ -51,7 +51,6 @@ class TestTasksResetDelete(TestService):
|
||||
|
||||
task = self.new_task()
|
||||
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task)
|
||||
published_model_urls, draft_model_urls = self.create_task_models(task)
|
||||
artifact_urls = self.send_artifacts(task)
|
||||
event_urls = self.send_debug_image_events(task)
|
||||
event_urls.update(self.send_plot_events(task))
|
||||
@ -123,7 +122,6 @@ class TestTasksResetDelete(TestService):
|
||||
) -> Tuple[str, Tuple[Set[str], Set[str]], Set[str], Set[str]]:
|
||||
task = self.new_task(**kwargs)
|
||||
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task, **kwargs)
|
||||
published_model_urls, draft_model_urls = self.create_task_models(task, **kwargs)
|
||||
artifact_urls = self.send_artifacts(task)
|
||||
event_urls = self.send_debug_image_events(task)
|
||||
event_urls.update(self.send_plot_events(task))
|
||||
|
Loading…
Reference in New Issue
Block a user