mirror of
https://github.com/clearml/clearml-server
synced 2025-06-26 23:15:47 +00:00
Add model task cleanup on tasks.reset
This commit is contained in:
parent
d117a4f022
commit
52c0c4d438
@ -1,10 +1,9 @@
|
|||||||
from itertools import chain
|
from itertools import chain
|
||||||
from operator import attrgetter
|
from operator import attrgetter
|
||||||
from typing import Sequence, Generic, Callable, Type, Iterable, TypeVar, List, Set
|
from typing import Sequence, Set, Tuple
|
||||||
|
|
||||||
import attr
|
import attr
|
||||||
from boltons.iterutils import partition
|
from boltons.iterutils import partition
|
||||||
from mongoengine import QuerySet, Document
|
|
||||||
|
|
||||||
from apiserver.apierrors import errors
|
from apiserver.apierrors import errors
|
||||||
from apiserver.bll.event import EventBLL
|
from apiserver.bll.event import EventBLL
|
||||||
@ -16,49 +15,6 @@ from apiserver.database.model.task.task import Task, TaskStatus, ArtifactModes
|
|||||||
from apiserver.timing_context import TimingContext
|
from apiserver.timing_context import TimingContext
|
||||||
|
|
||||||
event_bll = EventBLL()
|
event_bll = EventBLL()
|
||||||
T = TypeVar("T", bound=Document)
|
|
||||||
|
|
||||||
|
|
||||||
class DocumentGroup(List[T]):
|
|
||||||
"""
|
|
||||||
Operate on a list of documents as if they were a query result
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, document_type: Type[T], documents: Iterable[T]):
|
|
||||||
super(DocumentGroup, self).__init__(documents)
|
|
||||||
self.type = document_type
|
|
||||||
|
|
||||||
@property
|
|
||||||
def ids(self) -> Set[str]:
|
|
||||||
return {obj.id for obj in self}
|
|
||||||
|
|
||||||
def objects(self, *args, **kwargs) -> QuerySet:
|
|
||||||
return self.type.objects(id__in=self.ids, *args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
class TaskOutputs(Generic[T]):
|
|
||||||
"""
|
|
||||||
Split task outputs of the same type by the ready state
|
|
||||||
"""
|
|
||||||
|
|
||||||
published: DocumentGroup[T]
|
|
||||||
draft: DocumentGroup[T]
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
is_published: Callable[[T], bool],
|
|
||||||
document_type: Type[T],
|
|
||||||
children: Iterable[T],
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
:param is_published: predicate returning whether items is considered published
|
|
||||||
:param document_type: type of output
|
|
||||||
:param children: output documents
|
|
||||||
"""
|
|
||||||
self.published, self.draft = map(
|
|
||||||
lambda x: DocumentGroup(document_type, x),
|
|
||||||
partition(children, key=is_published),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@attr.s(auto_attribs=True)
|
@attr.s(auto_attribs=True)
|
||||||
@ -166,7 +122,9 @@ def cleanup_task(
|
|||||||
:param force: whether to delete task with published outputs
|
:param force: whether to delete task with published outputs
|
||||||
:return: count of delete and modified items
|
:return: count of delete and modified items
|
||||||
"""
|
"""
|
||||||
models = verify_task_children_and_ouptuts(task, force)
|
published_models, draft_models, in_use_model_ids = verify_task_children_and_ouptuts(
|
||||||
|
task, force
|
||||||
|
)
|
||||||
|
|
||||||
event_urls, artifact_urls, model_urls = set(), set(), set()
|
event_urls, artifact_urls, model_urls = set(), set(), set()
|
||||||
if return_file_urls:
|
if return_file_urls:
|
||||||
@ -178,7 +136,9 @@ def cleanup_task(
|
|||||||
for a in task.execution.artifacts.values()
|
for a in task.execution.artifacts.values()
|
||||||
if a.mode == ArtifactModes.output and a.uri
|
if a.mode == ArtifactModes.output and a.uri
|
||||||
}
|
}
|
||||||
model_urls = {m.uri for m in models.draft.objects().only("uri") if m.uri}
|
model_urls = {
|
||||||
|
m.uri for m in draft_models if m.uri and m.id not in in_use_model_ids
|
||||||
|
}
|
||||||
|
|
||||||
deleted_task_id = f"{deleted_prefix}{task.id}"
|
deleted_task_id = f"{deleted_prefix}{task.id}"
|
||||||
updated_children = 0
|
updated_children = 0
|
||||||
@ -190,16 +150,23 @@ def cleanup_task(
|
|||||||
|
|
||||||
deleted_models = 0
|
deleted_models = 0
|
||||||
updated_models = 0
|
updated_models = 0
|
||||||
if models.draft:
|
for models, allow_delete in ((draft_models, True), (published_models, False)):
|
||||||
if delete_output_models:
|
if not models:
|
||||||
deleted_models = models.draft.objects().delete()
|
continue
|
||||||
elif update_children:
|
if delete_output_models and allow_delete:
|
||||||
updated_models = models.draft.objects().update(task=deleted_task_id)
|
deleted_models += Model.objects(
|
||||||
else:
|
id__in=[m.id for m in models if m.id not in in_use_model_ids]
|
||||||
models.draft.objects().update(unset__task=1)
|
).delete()
|
||||||
|
if in_use_model_ids:
|
||||||
|
Model.objects(id__in=list(in_use_model_ids)).update(unset__task=1)
|
||||||
|
continue
|
||||||
|
|
||||||
if models.published and update_children:
|
if update_children:
|
||||||
updated_models += models.published.objects().update(task=deleted_task_id)
|
updated_models += Model.objects(id__in=[m.id for m in models]).update(
|
||||||
|
task=deleted_task_id
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
Model.objects(id__in=[m.id for m in models]).update(unset__task=1)
|
||||||
|
|
||||||
event_bll.delete_task_events(task.company, task.id, allow_locked=force)
|
event_bll.delete_task_events(task.company, task.id, allow_locked=force)
|
||||||
|
|
||||||
@ -217,7 +184,9 @@ def cleanup_task(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def verify_task_children_and_ouptuts(task: Task, force: bool) -> TaskOutputs[Model]:
|
def verify_task_children_and_ouptuts(
|
||||||
|
task, force: bool
|
||||||
|
) -> Tuple[Sequence[Model], Sequence[Model], Set[str]]:
|
||||||
if not force:
|
if not force:
|
||||||
with TimingContext("mongo", "count_published_children"):
|
with TimingContext("mongo", "count_published_children"):
|
||||||
published_children_count = Task.objects(
|
published_children_count = Task.objects(
|
||||||
@ -230,49 +199,42 @@ def verify_task_children_and_ouptuts(task: Task, force: bool) -> TaskOutputs[Mod
|
|||||||
children=published_children_count,
|
children=published_children_count,
|
||||||
)
|
)
|
||||||
|
|
||||||
with TimingContext("mongo", "get_task_models"):
|
model_fields = ["id", "ready", "uri"]
|
||||||
models = TaskOutputs(
|
published_models, draft_models = partition(
|
||||||
attrgetter("ready"),
|
Model.objects(task=task.id).only(*model_fields), key=attrgetter("ready"),
|
||||||
Model,
|
)
|
||||||
Model.objects(task=task.id).only("id", "task", "ready"),
|
if not force and published_models:
|
||||||
|
raise errors.bad_request.TaskCannotBeDeleted(
|
||||||
|
"has output models, use force=True",
|
||||||
|
task=task.id,
|
||||||
|
models=len(published_models),
|
||||||
)
|
)
|
||||||
if not force and models.published:
|
|
||||||
raise errors.bad_request.TaskCannotBeDeleted(
|
|
||||||
"has output models, use force=True",
|
|
||||||
task=task.id,
|
|
||||||
models=len(models.published),
|
|
||||||
)
|
|
||||||
|
|
||||||
if task.models and task.models.output:
|
if task.models and task.models.output:
|
||||||
with TimingContext("mongo", "get_task_output_model"):
|
model_ids = [m.model for m in task.models.output]
|
||||||
model_ids = [m.model for m in task.models.output]
|
for output_model in Model.objects(id__in=model_ids).only(*model_fields):
|
||||||
for output_model in Model.objects(id__in=model_ids):
|
if output_model.ready:
|
||||||
if output_model.ready:
|
if not force:
|
||||||
if not force:
|
raise errors.bad_request.TaskCannotBeDeleted(
|
||||||
raise errors.bad_request.TaskCannotBeDeleted(
|
"has output model, use force=True",
|
||||||
"has output model, use force=True",
|
task=task.id,
|
||||||
task=task.id,
|
model=output_model.id,
|
||||||
model=output_model.id,
|
)
|
||||||
)
|
published_models.append(output_model)
|
||||||
models.published.append(output_model)
|
else:
|
||||||
else:
|
draft_models.append(output_model)
|
||||||
models.draft.append(output_model)
|
|
||||||
|
|
||||||
if models.draft:
|
in_use_model_ids = {}
|
||||||
with TimingContext("mongo", "get_execution_models"):
|
if draft_models:
|
||||||
model_ids = models.draft.ids
|
model_ids = {m.id for m in draft_models}
|
||||||
dependent_tasks = Task.objects(models__input__model__in=model_ids).only(
|
dependent_tasks = Task.objects(models__input__model__in=list(model_ids)).only(
|
||||||
"id", "models"
|
"id", "models"
|
||||||
|
)
|
||||||
|
in_use_model_ids = model_ids & {
|
||||||
|
m.model
|
||||||
|
for m in chain.from_iterable(
|
||||||
|
t.models.input for t in dependent_tasks if t.models
|
||||||
)
|
)
|
||||||
input_models = {
|
}
|
||||||
m.model
|
|
||||||
for m in chain.from_iterable(
|
|
||||||
t.models.input for t in dependent_tasks if t.models
|
|
||||||
)
|
|
||||||
}
|
|
||||||
if input_models:
|
|
||||||
models.draft = DocumentGroup(
|
|
||||||
Model, (m for m in models.draft if m.id not in input_models)
|
|
||||||
)
|
|
||||||
|
|
||||||
return models
|
return published_models, draft_models, in_use_model_ids
|
||||||
|
Loading…
Reference in New Issue
Block a user