mirror of
https://github.com/clearml/clearml-server
synced 2025-05-19 10:51:22 +00:00
Collect model event URLs during task and project cleanup
This commit is contained in:
parent
97992b0d9e
commit
2d711e1500
@ -101,10 +101,13 @@ def delete_project(
|
|||||||
updated_count = cls.objects(project__in=project_ids).update(project=None)
|
updated_count = cls.objects(project__in=project_ids).update(project=None)
|
||||||
res = DeleteProjectResult(disassociated_tasks=updated_count)
|
res = DeleteProjectResult(disassociated_tasks=updated_count)
|
||||||
else:
|
else:
|
||||||
deleted_models, model_urls = _delete_models(projects=project_ids)
|
deleted_models, model_event_urls, model_urls = _delete_models(
|
||||||
deleted_tasks, event_urls, artifact_urls = _delete_tasks(
|
|
||||||
company=company, projects=project_ids
|
company=company, projects=project_ids
|
||||||
)
|
)
|
||||||
|
deleted_tasks, task_event_urls, artifact_urls = _delete_tasks(
|
||||||
|
company=company, projects=project_ids
|
||||||
|
)
|
||||||
|
event_urls = task_event_urls | model_event_urls
|
||||||
if delete_external_artifacts:
|
if delete_external_artifacts:
|
||||||
scheduled = _schedule_for_delete(
|
scheduled = _schedule_for_delete(
|
||||||
task_id=project_id,
|
task_id=project_id,
|
||||||
@ -164,14 +167,16 @@ def _delete_tasks(company: str, projects: Sequence[str]) -> Tuple[int, Set, Set]
|
|||||||
return deleted, event_urls, artifact_urls
|
return deleted, event_urls, artifact_urls
|
||||||
|
|
||||||
|
|
||||||
def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]:
|
def _delete_models(
|
||||||
|
company: str, projects: Sequence[str]
|
||||||
|
) -> Tuple[int, Set[str], Set[str]]:
|
||||||
"""
|
"""
|
||||||
Delete project models and update the tasks from other projects
|
Delete project models and update the tasks from other projects
|
||||||
that reference them to reference None.
|
that reference them to reference None.
|
||||||
"""
|
"""
|
||||||
models = Model.objects(project__in=projects).only("task", "id", "uri")
|
models = Model.objects(project__in=projects).only("task", "id", "uri")
|
||||||
if not models:
|
if not models:
|
||||||
return 0, set()
|
return 0, set(), set()
|
||||||
|
|
||||||
model_ids = list({m.id for m in models})
|
model_ids = list({m.id for m in models})
|
||||||
|
|
||||||
@ -198,6 +203,13 @@ def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]:
|
|||||||
upsert=False,
|
upsert=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
urls = {m.uri for m in models if m.uri}
|
event_urls, model_urls = set(), set()
|
||||||
|
for m in models:
|
||||||
|
event_urls.update(collect_debug_image_urls(company, m.id))
|
||||||
|
event_urls.update(collect_plot_image_urls(company, m.id))
|
||||||
|
if m.uri:
|
||||||
|
model_urls.add(m.uri)
|
||||||
|
|
||||||
|
event_bll.delete_multi_task_events(company, model_ids)
|
||||||
deleted = models.delete()
|
deleted = models.delete()
|
||||||
return deleted, urls
|
return deleted, event_urls, model_urls
|
||||||
|
@ -23,6 +23,7 @@ from apiserver.database.model.url_to_delete import (
|
|||||||
)
|
)
|
||||||
from apiserver.database.utils import id as db_id
|
from apiserver.database.utils import id as db_id
|
||||||
|
|
||||||
|
log = config.logger(__file__)
|
||||||
event_bll = EventBLL()
|
event_bll = EventBLL()
|
||||||
|
|
||||||
|
|
||||||
@ -66,12 +67,12 @@ class CleanupResult:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def collect_plot_image_urls(company: str, task: str) -> Set[str]:
|
def collect_plot_image_urls(company: str, task_or_model: str) -> Set[str]:
|
||||||
urls = set()
|
urls = set()
|
||||||
next_scroll_id = None
|
next_scroll_id = None
|
||||||
while True:
|
while True:
|
||||||
events, next_scroll_id = event_bll.get_plot_image_urls(
|
events, next_scroll_id = event_bll.get_plot_image_urls(
|
||||||
company_id=company, task_id=task, scroll_id=next_scroll_id
|
company_id=company, task_id=task_or_model, scroll_id=next_scroll_id
|
||||||
)
|
)
|
||||||
if not events:
|
if not events:
|
||||||
break
|
break
|
||||||
@ -83,7 +84,7 @@ def collect_plot_image_urls(company: str, task: str) -> Set[str]:
|
|||||||
return urls
|
return urls
|
||||||
|
|
||||||
|
|
||||||
def collect_debug_image_urls(company: str, task: str) -> Set[str]:
|
def collect_debug_image_urls(company: str, task_or_model: str) -> Set[str]:
|
||||||
"""
|
"""
|
||||||
Return the set of unique image urls
|
Return the set of unique image urls
|
||||||
Uses DebugImagesIterator to make sure that we do not retrieve recycled urls
|
Uses DebugImagesIterator to make sure that we do not retrieve recycled urls
|
||||||
@ -92,7 +93,7 @@ def collect_debug_image_urls(company: str, task: str) -> Set[str]:
|
|||||||
urls = set()
|
urls = set()
|
||||||
while True:
|
while True:
|
||||||
res, after_key = event_bll.get_debug_image_urls(
|
res, after_key = event_bll.get_debug_image_urls(
|
||||||
company_id=company, task_id=task, after_key=after_key,
|
company_id=company, task_id=task_or_model, after_key=after_key,
|
||||||
)
|
)
|
||||||
urls.update(res)
|
urls.update(res)
|
||||||
if not after_key:
|
if not after_key:
|
||||||
@ -210,9 +211,21 @@ def cleanup_task(
|
|||||||
if not models:
|
if not models:
|
||||||
continue
|
continue
|
||||||
if delete_output_models and allow_delete:
|
if delete_output_models and allow_delete:
|
||||||
deleted_models += Model.objects(
|
model_ids = set(m.id for m in models if m.id not in in_use_model_ids)
|
||||||
id__in=[m.id for m in models if m.id not in in_use_model_ids]
|
for m_id in model_ids:
|
||||||
).delete()
|
if return_file_urls or delete_external_artifacts:
|
||||||
|
event_urls.update(collect_debug_image_urls(task.company, m_id))
|
||||||
|
event_urls.update(collect_plot_image_urls(task.company, m_id))
|
||||||
|
try:
|
||||||
|
event_bll.delete_task_events(
|
||||||
|
task.company, m_id, allow_locked=True, model=True
|
||||||
|
)
|
||||||
|
except errors.bad_request.InvalidModelId as ex:
|
||||||
|
log.info(
|
||||||
|
f"Error deleting events for the model {m_id}: {str(ex)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
deleted_models += Model.objects(id__in=list(model_ids)).delete()
|
||||||
if in_use_model_ids:
|
if in_use_model_ids:
|
||||||
Model.objects(id__in=list(in_use_model_ids)).update(unset__task=1)
|
Model.objects(id__in=list(in_use_model_ids)).update(unset__task=1)
|
||||||
continue
|
continue
|
||||||
|
@ -51,7 +51,6 @@ class TestTasksResetDelete(TestService):
|
|||||||
|
|
||||||
task = self.new_task()
|
task = self.new_task()
|
||||||
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task)
|
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task)
|
||||||
published_model_urls, draft_model_urls = self.create_task_models(task)
|
|
||||||
artifact_urls = self.send_artifacts(task)
|
artifact_urls = self.send_artifacts(task)
|
||||||
event_urls = self.send_debug_image_events(task)
|
event_urls = self.send_debug_image_events(task)
|
||||||
event_urls.update(self.send_plot_events(task))
|
event_urls.update(self.send_plot_events(task))
|
||||||
@ -123,7 +122,6 @@ class TestTasksResetDelete(TestService):
|
|||||||
) -> Tuple[str, Tuple[Set[str], Set[str]], Set[str], Set[str]]:
|
) -> Tuple[str, Tuple[Set[str], Set[str]], Set[str], Set[str]]:
|
||||||
task = self.new_task(**kwargs)
|
task = self.new_task(**kwargs)
|
||||||
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task, **kwargs)
|
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task, **kwargs)
|
||||||
published_model_urls, draft_model_urls = self.create_task_models(task, **kwargs)
|
|
||||||
artifact_urls = self.send_artifacts(task)
|
artifact_urls = self.send_artifacts(task)
|
||||||
event_urls = self.send_debug_image_events(task)
|
event_urls = self.send_debug_image_events(task)
|
||||||
event_urls.update(self.send_plot_events(task))
|
event_urls.update(self.send_plot_events(task))
|
||||||
|
Loading…
Reference in New Issue
Block a user