mirror of
https://github.com/clearml/clearml-server
synced 2025-04-22 23:24:24 +00:00
Optimize events deletion in tasks.delete_many/reset_many and models.delete_many operations
This commit is contained in:
parent
5a5f02cead
commit
7b5679fd70
@ -41,7 +41,7 @@ from apiserver.bll.event.event_metrics import EventMetrics
|
||||
from apiserver.bll.task import TaskBLL
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.database.model.task.task import Task, TaskStatus
|
||||
from apiserver.database.model.task.task import TaskStatus
|
||||
from apiserver.redis_manager import redman
|
||||
from apiserver.service_repo.auth import Identity
|
||||
from apiserver.utilities.dicts import nested_get
|
||||
@ -1149,34 +1149,6 @@ class EventBLL(object):
|
||||
for tb in es_res["aggregations"]["tasks"]["buckets"]
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _validate_model_state(
|
||||
company_id: str, model_id: str, allow_locked: bool = False
|
||||
):
|
||||
extra_msg = None
|
||||
query = Q(id=model_id, company=company_id)
|
||||
if not allow_locked:
|
||||
query &= Q(ready__ne=True)
|
||||
extra_msg = "or model published"
|
||||
res = Model.objects(query).only("id").first()
|
||||
if not res:
|
||||
raise errors.bad_request.InvalidModelId(
|
||||
extra_msg, company=company_id, id=model_id
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _validate_task_state(company_id: str, task_id: str, allow_locked: bool = False):
|
||||
extra_msg = None
|
||||
query = Q(id=task_id, company=company_id)
|
||||
if not allow_locked:
|
||||
query &= Q(status__nin=LOCKED_TASK_STATUSES)
|
||||
extra_msg = "or task published"
|
||||
res = Task.objects(query).only("id").first()
|
||||
if not res:
|
||||
raise errors.bad_request.InvalidTaskId(
|
||||
extra_msg, company=company_id, id=task_id
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _get_events_deletion_params(async_delete: bool) -> dict:
|
||||
if async_delete:
|
||||
@ -1189,28 +1161,26 @@ class EventBLL(object):
|
||||
|
||||
return {"refresh": True}
|
||||
|
||||
def delete_task_events(self, company_id, task_id, allow_locked=False, model=False):
|
||||
if model:
|
||||
self._validate_model_state(
|
||||
company_id=company_id,
|
||||
model_id=task_id,
|
||||
allow_locked=allow_locked,
|
||||
)
|
||||
else:
|
||||
self._validate_task_state(
|
||||
company_id=company_id, task_id=task_id, allow_locked=allow_locked
|
||||
)
|
||||
def delete_task_events(self, company_id, task_ids: Union[str, Sequence[str]], model=False):
|
||||
"""
|
||||
Delete task events. No check is done for tasks write access
|
||||
so it should be checked by the calling code
|
||||
"""
|
||||
if isinstance(task_ids, str):
|
||||
task_ids = [task_ids]
|
||||
deleted = 0
|
||||
with translate_errors_context():
|
||||
async_delete = async_task_events_delete
|
||||
if async_delete:
|
||||
if async_delete and len(task_ids) < 100:
|
||||
total = self.events_iterator.count_task_events(
|
||||
event_type=EventType.all,
|
||||
company_id=company_id,
|
||||
task_ids=[task_id],
|
||||
task_ids=task_ids,
|
||||
)
|
||||
if total <= async_delete_threshold:
|
||||
async_delete = False
|
||||
es_req = {"query": {"term": {"task": task_id}}}
|
||||
with translate_errors_context():
|
||||
for tasks in chunked_iter(task_ids, 100):
|
||||
es_req = {"query": {"terms": {"task": tasks}}}
|
||||
es_res = delete_company_events(
|
||||
es=self.es,
|
||||
company_id=company_id,
|
||||
@ -1218,22 +1188,20 @@ class EventBLL(object):
|
||||
body=es_req,
|
||||
**self._get_events_deletion_params(async_delete),
|
||||
)
|
||||
if not async_delete:
|
||||
deleted += es_res.get("deleted", 0)
|
||||
|
||||
if not async_delete:
|
||||
return es_res.get("deleted", 0)
|
||||
return deleted
|
||||
|
||||
def clear_task_log(
|
||||
self,
|
||||
company_id: str,
|
||||
task_id: str,
|
||||
allow_locked: bool = False,
|
||||
threshold_sec: int = None,
|
||||
include_metrics: Sequence[str] = None,
|
||||
exclude_metrics: Sequence[str] = None,
|
||||
):
|
||||
self._validate_task_state(
|
||||
company_id=company_id, task_id=task_id, allow_locked=allow_locked
|
||||
)
|
||||
if check_empty_data(
|
||||
self.es, company_id=company_id, event_type=EventType.task_log
|
||||
):
|
||||
@ -1275,39 +1243,6 @@ class EventBLL(object):
|
||||
)
|
||||
return es_res.get("deleted", 0)
|
||||
|
||||
def delete_multi_task_events(
|
||||
self, company_id: str, task_ids: Sequence[str], model=False
|
||||
):
|
||||
"""
|
||||
Delete multiple task events. No check is done for tasks write access
|
||||
so it should be checked by the calling code
|
||||
"""
|
||||
deleted = 0
|
||||
with translate_errors_context():
|
||||
async_delete = async_task_events_delete
|
||||
if async_delete and len(task_ids) < 100:
|
||||
total = self.events_iterator.count_task_events(
|
||||
event_type=EventType.all,
|
||||
company_id=company_id,
|
||||
task_ids=task_ids,
|
||||
)
|
||||
if total <= async_delete_threshold:
|
||||
async_delete = False
|
||||
for tasks in chunked_iter(task_ids, 100):
|
||||
es_req = {"query": {"terms": {"task": tasks}}}
|
||||
es_res = delete_company_events(
|
||||
es=self.es,
|
||||
company_id=company_id,
|
||||
event_type=EventType.all,
|
||||
body=es_req,
|
||||
**self._get_events_deletion_params(async_delete),
|
||||
)
|
||||
if not async_delete:
|
||||
deleted += es_res.get("deleted", 0)
|
||||
|
||||
if not async_delete:
|
||||
return deleted
|
||||
|
||||
def clear_scroll(self, scroll_id: str):
|
||||
if scroll_id == self.empty_scroll:
|
||||
return
|
||||
|
@ -6,7 +6,6 @@ from mongoengine import Q
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.apimodels.models import ModelTaskPublishResponse
|
||||
from apiserver.bll.task.utils import deleted_prefix, get_last_metric_updates
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.model import EntityVisibility
|
||||
from apiserver.database.model.model import Model
|
||||
from apiserver.database.model.task.task import Task, TaskStatus
|
||||
@ -15,8 +14,6 @@ from .metadata import Metadata
|
||||
|
||||
|
||||
class ModelBLL:
|
||||
event_bll = None
|
||||
|
||||
@classmethod
|
||||
def get_company_model_by_id(
|
||||
cls, company_id: str, model_id: str, only_fields=None
|
||||
@ -94,7 +91,7 @@ class ModelBLL:
|
||||
|
||||
@classmethod
|
||||
def delete_model(
|
||||
cls, model_id: str, company_id: str, user_id: str, force: bool, delete_external_artifacts: bool = True,
|
||||
cls, model_id: str, company_id: str, user_id: str, force: bool
|
||||
) -> Tuple[int, Model]:
|
||||
model = cls.get_company_model_by_id(
|
||||
company_id=company_id,
|
||||
@ -147,34 +144,6 @@ class ModelBLL:
|
||||
set__last_changed_by=user_id,
|
||||
)
|
||||
|
||||
delete_external_artifacts = delete_external_artifacts and config.get(
|
||||
"services.async_urls_delete.enabled", True
|
||||
)
|
||||
if delete_external_artifacts:
|
||||
from apiserver.bll.task.task_cleanup import (
|
||||
collect_debug_image_urls,
|
||||
collect_plot_image_urls,
|
||||
_schedule_for_delete,
|
||||
)
|
||||
urls = set()
|
||||
urls.update(collect_debug_image_urls(company_id, model_id))
|
||||
urls.update(collect_plot_image_urls(company_id, model_id))
|
||||
if model.uri:
|
||||
urls.add(model.uri)
|
||||
if urls:
|
||||
_schedule_for_delete(
|
||||
task_id=model_id,
|
||||
company=company_id,
|
||||
user=user_id,
|
||||
urls=urls,
|
||||
can_delete_folders=False,
|
||||
)
|
||||
|
||||
if not cls.event_bll:
|
||||
from apiserver.bll.event import EventBLL
|
||||
cls.event_bll = EventBLL()
|
||||
|
||||
cls.event_bll.delete_task_events(company_id, model_id, allow_locked=True, model=True)
|
||||
del_count = Model.objects(id=model_id, company=company_id).delete()
|
||||
return del_count, model
|
||||
|
||||
|
@ -8,10 +8,9 @@ from mongoengine import Q
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.bll.event import EventBLL
|
||||
from apiserver.bll.task.task_cleanup import (
|
||||
collect_debug_image_urls,
|
||||
collect_plot_image_urls,
|
||||
TaskUrls,
|
||||
_schedule_for_delete,
|
||||
schedule_for_delete,
|
||||
delete_task_events_and_collect_urls,
|
||||
)
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.model import EntityVisibility
|
||||
@ -192,7 +191,7 @@ def delete_project(
|
||||
)
|
||||
event_urls = task_event_urls | model_event_urls
|
||||
if delete_external_artifacts:
|
||||
scheduled = _schedule_for_delete(
|
||||
scheduled = schedule_for_delete(
|
||||
task_id=project_id,
|
||||
company=company,
|
||||
user=user,
|
||||
@ -206,7 +205,6 @@ def delete_project(
|
||||
deleted_models=deleted_models,
|
||||
urls=TaskUrls(
|
||||
model_urls=list(model_urls),
|
||||
event_urls=list(event_urls),
|
||||
artifact_urls=list(artifact_urls),
|
||||
),
|
||||
)
|
||||
@ -243,9 +241,6 @@ def _delete_tasks(
|
||||
last_changed_by=user,
|
||||
)
|
||||
|
||||
event_urls = collect_debug_image_urls(company, task_ids) | collect_plot_image_urls(
|
||||
company, task_ids
|
||||
)
|
||||
artifact_urls = set()
|
||||
for task in tasks:
|
||||
if task.execution and task.execution.artifacts:
|
||||
@ -257,8 +252,11 @@ def _delete_tasks(
|
||||
}
|
||||
)
|
||||
|
||||
event_bll.delete_multi_task_events(company, task_ids)
|
||||
event_urls = delete_task_events_and_collect_urls(
|
||||
company=company, task_ids=task_ids
|
||||
)
|
||||
deleted = tasks.delete()
|
||||
|
||||
return deleted, event_urls, artifact_urls
|
||||
|
||||
|
||||
@ -317,11 +315,10 @@ def _delete_models(
|
||||
set__last_changed_by=user,
|
||||
)
|
||||
|
||||
event_urls = collect_debug_image_urls(company, model_ids) | collect_plot_image_urls(
|
||||
company, model_ids
|
||||
)
|
||||
model_urls = {m.uri for m in models if m.uri}
|
||||
|
||||
event_bll.delete_multi_task_events(company, model_ids, model=True)
|
||||
event_urls = delete_task_events_and_collect_urls(
|
||||
company=company, task_ids=model_ids, model=True
|
||||
)
|
||||
deleted = models.delete()
|
||||
|
||||
return deleted, event_urls, model_urls
|
||||
|
@ -31,8 +31,8 @@ event_bll = EventBLL()
|
||||
@attr.s(auto_attribs=True)
|
||||
class TaskUrls:
|
||||
model_urls: Sequence[str]
|
||||
event_urls: Sequence[str]
|
||||
artifact_urls: Sequence[str]
|
||||
event_urls: Sequence[str] = [] # left here is in order not to break the api
|
||||
|
||||
def __add__(self, other: "TaskUrls"):
|
||||
if not other:
|
||||
@ -40,7 +40,6 @@ class TaskUrls:
|
||||
|
||||
return TaskUrls(
|
||||
model_urls=list(set(self.model_urls) | set(other.model_urls)),
|
||||
event_urls=list(set(self.event_urls) | set(other.event_urls)),
|
||||
artifact_urls=list(set(self.artifact_urls) | set(other.artifact_urls)),
|
||||
)
|
||||
|
||||
@ -54,8 +53,23 @@ class CleanupResult:
|
||||
updated_children: int
|
||||
updated_models: int
|
||||
deleted_models: int
|
||||
deleted_model_ids: Set[str]
|
||||
urls: TaskUrls = None
|
||||
|
||||
def to_res_dict(self, return_file_urls: bool) -> dict:
|
||||
remove_fields = ["deleted_model_ids"]
|
||||
if not return_file_urls:
|
||||
remove_fields.append("urls")
|
||||
|
||||
# noinspection PyTypeChecker
|
||||
res = attr.asdict(
|
||||
self, filter=lambda attrib, value: attrib.name not in remove_fields
|
||||
)
|
||||
if not return_file_urls:
|
||||
res["urls"] = None
|
||||
|
||||
return res
|
||||
|
||||
def __add__(self, other: "CleanupResult"):
|
||||
if not other:
|
||||
return self
|
||||
@ -65,6 +79,16 @@ class CleanupResult:
|
||||
updated_models=self.updated_models + other.updated_models,
|
||||
deleted_models=self.deleted_models + other.deleted_models,
|
||||
urls=self.urls + other.urls if self.urls else other.urls,
|
||||
deleted_model_ids=self.deleted_model_ids | other.deleted_model_ids
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def empty():
|
||||
return CleanupResult(
|
||||
updated_children=0,
|
||||
updated_models=0,
|
||||
deleted_models=0,
|
||||
deleted_model_ids=set(),
|
||||
)
|
||||
|
||||
|
||||
@ -130,7 +154,7 @@ supported_storage_types.update(
|
||||
)
|
||||
|
||||
|
||||
def _schedule_for_delete(
|
||||
def schedule_for_delete(
|
||||
company: str,
|
||||
user: str,
|
||||
task_id: str,
|
||||
@ -197,15 +221,25 @@ def _schedule_for_delete(
|
||||
return processed_urls
|
||||
|
||||
|
||||
def delete_task_events_and_collect_urls(
|
||||
company: str, task_ids: Sequence[str], model=False
|
||||
) -> Set[str]:
|
||||
event_urls = collect_debug_image_urls(
|
||||
company, task_ids
|
||||
) | collect_plot_image_urls(company, task_ids)
|
||||
|
||||
event_bll.delete_task_events(company, task_ids, model=model)
|
||||
|
||||
return event_urls
|
||||
|
||||
|
||||
def cleanup_task(
|
||||
company: str,
|
||||
user: str,
|
||||
task: Task,
|
||||
force: bool = False,
|
||||
update_children=True,
|
||||
return_file_urls=False,
|
||||
delete_output_models=True,
|
||||
delete_external_artifacts=True,
|
||||
) -> CleanupResult:
|
||||
"""
|
||||
Validate task deletion and delete/modify all its output.
|
||||
@ -216,19 +250,11 @@ def cleanup_task(
|
||||
published_models, draft_models, in_use_model_ids = verify_task_children_and_ouptuts(
|
||||
task, force
|
||||
)
|
||||
delete_external_artifacts = delete_external_artifacts and config.get(
|
||||
"services.async_urls_delete.enabled", True
|
||||
)
|
||||
event_urls, artifact_urls, model_urls = set(), set(), set()
|
||||
if return_file_urls or delete_external_artifacts:
|
||||
event_urls = collect_debug_image_urls(task.company, task.id)
|
||||
event_urls.update(collect_plot_image_urls(task.company, task.id))
|
||||
if task.execution and task.execution.artifacts:
|
||||
artifact_urls = {
|
||||
a.uri
|
||||
for a in task.execution.artifacts.values()
|
||||
if a.mode == ArtifactModes.output and a.uri
|
||||
}
|
||||
} if task.execution and task.execution.artifacts else {}
|
||||
model_urls = {
|
||||
m.uri for m in draft_models if m.uri and m.id not in in_use_model_ids
|
||||
}
|
||||
@ -245,22 +271,15 @@ def cleanup_task(
|
||||
|
||||
deleted_models = 0
|
||||
updated_models = 0
|
||||
deleted_model_ids = set()
|
||||
for models, allow_delete in ((draft_models, True), (published_models, False)):
|
||||
if not models:
|
||||
continue
|
||||
if delete_output_models and allow_delete:
|
||||
model_ids = list({m.id for m in models if m.id not in in_use_model_ids})
|
||||
if model_ids:
|
||||
if return_file_urls or delete_external_artifacts:
|
||||
event_urls.update(collect_debug_image_urls(task.company, model_ids))
|
||||
event_urls.update(collect_plot_image_urls(task.company, model_ids))
|
||||
|
||||
event_bll.delete_multi_task_events(
|
||||
task.company,
|
||||
model_ids,
|
||||
model=True,
|
||||
)
|
||||
deleted_models += Model.objects(id__in=model_ids).delete()
|
||||
deleted_model_ids.update(model_ids)
|
||||
|
||||
if in_use_model_ids:
|
||||
Model.objects(id__in=list(in_use_model_ids)).update(
|
||||
@ -283,30 +302,15 @@ def cleanup_task(
|
||||
set__last_changed_by=user,
|
||||
)
|
||||
|
||||
event_bll.delete_task_events(task.company, task.id, allow_locked=force)
|
||||
|
||||
if delete_external_artifacts:
|
||||
scheduled = _schedule_for_delete(
|
||||
task_id=task.id,
|
||||
company=company,
|
||||
user=user,
|
||||
urls=event_urls | model_urls | artifact_urls,
|
||||
can_delete_folders=not in_use_model_ids and not published_models,
|
||||
)
|
||||
for urls in (event_urls, model_urls, artifact_urls):
|
||||
urls.difference_update(scheduled)
|
||||
|
||||
return CleanupResult(
|
||||
deleted_models=deleted_models,
|
||||
updated_children=updated_children,
|
||||
updated_models=updated_models,
|
||||
urls=TaskUrls(
|
||||
event_urls=list(event_urls),
|
||||
artifact_urls=list(artifact_urls),
|
||||
model_urls=list(model_urls),
|
||||
)
|
||||
if return_file_urls
|
||||
else None,
|
||||
),
|
||||
deleted_model_ids=deleted_model_ids,
|
||||
)
|
||||
|
||||
|
||||
|
@ -295,11 +295,9 @@ def delete_task(
|
||||
identity: Identity,
|
||||
move_to_trash: bool,
|
||||
force: bool,
|
||||
return_file_urls: bool,
|
||||
delete_output_models: bool,
|
||||
status_message: str,
|
||||
status_reason: str,
|
||||
delete_external_artifacts: bool,
|
||||
include_pipeline_steps: bool,
|
||||
) -> Tuple[int, Task, CleanupResult]:
|
||||
user_id = identity.user
|
||||
@ -319,7 +317,7 @@ def delete_task(
|
||||
current=task.status,
|
||||
)
|
||||
|
||||
def delete_task_core(task_: Task, force_: bool):
|
||||
def delete_task_core(task_: Task, force_: bool) -> CleanupResult:
|
||||
try:
|
||||
TaskBLL.dequeue_and_change_status(
|
||||
task_,
|
||||
@ -338,9 +336,7 @@ def delete_task(
|
||||
user=user_id,
|
||||
task=task_,
|
||||
force=force_,
|
||||
return_file_urls=return_file_urls,
|
||||
delete_output_models=delete_output_models,
|
||||
delete_external_artifacts=delete_external_artifacts,
|
||||
)
|
||||
|
||||
if move_to_trash:
|
||||
@ -354,11 +350,12 @@ def delete_task(
|
||||
return res
|
||||
|
||||
task_ids = [task.id]
|
||||
cleanup_res = CleanupResult.empty()
|
||||
if include_pipeline_steps and (
|
||||
step_tasks := _get_pipeline_steps_for_controller_task(task, company_id)
|
||||
):
|
||||
for step in step_tasks:
|
||||
delete_task_core(step, True)
|
||||
cleanup_res += delete_task_core(step, True)
|
||||
task_ids.append(step.id)
|
||||
|
||||
cleanup_res = delete_task_core(task, force)
|
||||
@ -374,10 +371,8 @@ def reset_task(
|
||||
company_id: str,
|
||||
identity: Identity,
|
||||
force: bool,
|
||||
return_file_urls: bool,
|
||||
delete_output_models: bool,
|
||||
clear_all: bool,
|
||||
delete_external_artifacts: bool,
|
||||
) -> Tuple[dict, CleanupResult, dict]:
|
||||
user_id = identity.user
|
||||
task = get_task_with_write_access(
|
||||
@ -404,9 +399,7 @@ def reset_task(
|
||||
task=task,
|
||||
force=force,
|
||||
update_children=False,
|
||||
return_file_urls=return_file_urls,
|
||||
delete_output_models=delete_output_models,
|
||||
delete_external_artifacts=delete_external_artifacts,
|
||||
)
|
||||
|
||||
updates.update(
|
||||
|
@ -42,6 +42,7 @@ from apiserver.apimodels.events import (
|
||||
LegacyMultiTaskEventsRequest,
|
||||
)
|
||||
from apiserver.bll.event import EventBLL
|
||||
from apiserver.bll.event.event_bll import LOCKED_TASK_STATUSES
|
||||
from apiserver.bll.event.event_common import EventType, MetricVariants, TaskCompanies
|
||||
from apiserver.bll.event.events_iterator import Scroll
|
||||
from apiserver.bll.event.scalar_key import ScalarKeyEnum, ScalarKey
|
||||
@ -52,6 +53,7 @@ from apiserver.config_repo import config
|
||||
from apiserver.database.model.model import Model
|
||||
from apiserver.database.model.task.task import Task
|
||||
from apiserver.service_repo import APICall, endpoint
|
||||
from apiserver.service_repo.auth import Identity
|
||||
from apiserver.utilities import json, extract_properties_to_lists
|
||||
|
||||
task_bll = TaskBLL()
|
||||
@ -1001,18 +1003,49 @@ def get_multi_task_metrics(call: APICall, company_id, request: MultiTaskMetricsR
|
||||
call.result.data = {"metrics": sorted(res, key=itemgetter("metric"))}
|
||||
|
||||
|
||||
def _validate_task_for_events_update(
|
||||
company_id: str, task_id: str, identity: Identity, allow_locked: bool
|
||||
):
|
||||
task = get_task_with_write_access(
|
||||
task_id=task_id,
|
||||
company_id=company_id,
|
||||
identity=identity,
|
||||
only=("id", "status"),
|
||||
)
|
||||
if not allow_locked and task.status in LOCKED_TASK_STATUSES:
|
||||
raise errors.bad_request.InvalidTaskId(
|
||||
replacement_msg="Cannot update events for a published task",
|
||||
company=company_id,
|
||||
id=task_id,
|
||||
)
|
||||
|
||||
|
||||
@endpoint("events.delete_for_task")
|
||||
def delete_for_task(call, company_id, request: TaskRequest):
|
||||
task_id = request.task
|
||||
allow_locked = call.data.get("allow_locked", False)
|
||||
|
||||
get_task_with_write_access(
|
||||
task_id=task_id, company_id=company_id, identity=call.identity, only=("id",)
|
||||
_validate_task_for_events_update(
|
||||
company_id=company_id,
|
||||
task_id=task_id,
|
||||
identity=call.identity,
|
||||
allow_locked=allow_locked,
|
||||
)
|
||||
|
||||
call.result.data = dict(
|
||||
deleted=event_bll.delete_task_events(
|
||||
company_id, task_id, allow_locked=allow_locked
|
||||
deleted=event_bll.delete_task_events(company_id, task_id)
|
||||
)
|
||||
|
||||
|
||||
def _validate_model_for_events_update(
|
||||
company_id: str, model_id: str, allow_locked: bool
|
||||
):
|
||||
model = model_bll.assert_exists(company_id, model_id, only=("id", "ready"))[0]
|
||||
if not allow_locked and model.ready:
|
||||
raise errors.bad_request.InvalidModelId(
|
||||
replacement_msg="Cannot update events for a published model",
|
||||
company=company_id,
|
||||
id=model_id,
|
||||
)
|
||||
|
||||
|
||||
@ -1021,10 +1054,13 @@ def delete_for_model(call: APICall, company_id: str, request: ModelRequest):
|
||||
model_id = request.model
|
||||
allow_locked = call.data.get("allow_locked", False)
|
||||
|
||||
model_bll.assert_exists(company_id, model_id, return_models=False)
|
||||
_validate_model_for_events_update(
|
||||
company_id=company_id, model_id=model_id, allow_locked=allow_locked
|
||||
)
|
||||
|
||||
call.result.data = dict(
|
||||
deleted=event_bll.delete_task_events(
|
||||
company_id, model_id, allow_locked=allow_locked, model=True
|
||||
company_id, model_id, model=True
|
||||
)
|
||||
)
|
||||
|
||||
@ -1033,14 +1069,17 @@ def delete_for_model(call: APICall, company_id: str, request: ModelRequest):
|
||||
def clear_task_log(call: APICall, company_id: str, request: ClearTaskLogRequest):
|
||||
task_id = request.task
|
||||
|
||||
get_task_with_write_access(
|
||||
task_id=task_id, company_id=company_id, identity=call.identity, only=("id",)
|
||||
_validate_task_for_events_update(
|
||||
company_id=company_id,
|
||||
task_id=task_id,
|
||||
identity=call.identity,
|
||||
allow_locked=request.allow_locked,
|
||||
)
|
||||
|
||||
call.result.data = dict(
|
||||
deleted=event_bll.clear_task_log(
|
||||
company_id=company_id,
|
||||
task_id=task_id,
|
||||
allow_locked=request.allow_locked,
|
||||
threshold_sec=request.threshold_sec,
|
||||
exclude_metrics=request.exclude_metrics,
|
||||
include_metrics=request.include_metrics,
|
||||
|
@ -27,10 +27,15 @@ from apiserver.apimodels.models import (
|
||||
UpdateModelRequest,
|
||||
)
|
||||
from apiserver.apimodels.tasks import UpdateTagsRequest
|
||||
from apiserver.bll.event import EventBLL
|
||||
from apiserver.bll.model import ModelBLL, Metadata
|
||||
from apiserver.bll.organization import OrgBLL, Tags
|
||||
from apiserver.bll.project import ProjectBLL
|
||||
from apiserver.bll.task import TaskBLL
|
||||
from apiserver.bll.task.task_cleanup import (
|
||||
schedule_for_delete,
|
||||
delete_task_events_and_collect_urls,
|
||||
)
|
||||
from apiserver.bll.task.task_operations import publish_task
|
||||
from apiserver.bll.task.utils import get_task_with_write_access
|
||||
from apiserver.bll.util import run_batch_operation
|
||||
@ -64,6 +69,7 @@ from apiserver.services.utils import (
|
||||
log = config.logger(__file__)
|
||||
org_bll = OrgBLL()
|
||||
project_bll = ProjectBLL()
|
||||
event_bll = EventBLL()
|
||||
|
||||
|
||||
def conform_model_data(call: APICall, model_data: Union[Sequence[dict], dict]):
|
||||
@ -555,16 +561,59 @@ def publish_many(call: APICall, company_id, request: ModelsPublishManyRequest):
|
||||
)
|
||||
|
||||
|
||||
def _delete_model_events(
|
||||
company_id: str,
|
||||
user_id: str,
|
||||
models: Sequence[Model],
|
||||
delete_external_artifacts: bool,
|
||||
):
|
||||
model_ids = [m.id for m in models]
|
||||
delete_external_artifacts = delete_external_artifacts and config.get(
|
||||
"services.async_urls_delete.enabled", True
|
||||
)
|
||||
if delete_external_artifacts:
|
||||
for m in models:
|
||||
if not m.uri:
|
||||
continue
|
||||
schedule_for_delete(
|
||||
task_id=m.id,
|
||||
company=company_id,
|
||||
user=user_id,
|
||||
urls=m.uri,
|
||||
can_delete_folders=False,
|
||||
)
|
||||
|
||||
event_urls = delete_task_events_and_collect_urls(
|
||||
company=company_id, task_ids=model_ids, model=True
|
||||
)
|
||||
if event_urls:
|
||||
schedule_for_delete(
|
||||
task_id=model_ids[0],
|
||||
company=company_id,
|
||||
user=user_id,
|
||||
urls=event_urls,
|
||||
can_delete_folders=False,
|
||||
)
|
||||
|
||||
event_bll.delete_task_events(company_id, model_ids, model=True)
|
||||
|
||||
|
||||
@endpoint("models.delete", request_data_model=DeleteModelRequest)
|
||||
def delete(call: APICall, company_id, request: DeleteModelRequest):
|
||||
user_id = call.identity.user
|
||||
del_count, model = ModelBLL.delete_model(
|
||||
model_id=request.model,
|
||||
company_id=company_id,
|
||||
user_id=call.identity.user,
|
||||
user_id=user_id,
|
||||
force=request.force,
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
)
|
||||
if del_count:
|
||||
_delete_model_events(
|
||||
company_id=company_id,
|
||||
user_id=user_id,
|
||||
models=[model],
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
)
|
||||
_reset_cached_tags(
|
||||
company_id, projects=[model.project] if model.project else []
|
||||
)
|
||||
@ -578,26 +627,36 @@ def delete(call: APICall, company_id, request: DeleteModelRequest):
|
||||
response_data_model=BatchResponse,
|
||||
)
|
||||
def delete(call: APICall, company_id, request: ModelsDeleteManyRequest):
|
||||
user_id = call.identity.user
|
||||
|
||||
results, failures = run_batch_operation(
|
||||
func=partial(
|
||||
ModelBLL.delete_model,
|
||||
company_id=company_id,
|
||||
user_id=call.identity.user,
|
||||
force=request.force,
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
),
|
||||
ids=request.ids,
|
||||
)
|
||||
|
||||
if results:
|
||||
projects = set(model.project for _, (_, model) in results)
|
||||
succeeded = []
|
||||
deleted_models = []
|
||||
for _id, (deleted, model) in results:
|
||||
succeeded.append(dict(id=_id, deleted=bool(deleted), url=model.uri))
|
||||
deleted_models.append(model)
|
||||
|
||||
if deleted_models:
|
||||
_delete_model_events(
|
||||
company_id=company_id,
|
||||
user_id=user_id,
|
||||
models=deleted_models,
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
)
|
||||
projects = set(model.project for model in deleted_models)
|
||||
_reset_cached_tags(company_id, projects=list(projects))
|
||||
|
||||
call.result.data_model = BatchResponse(
|
||||
succeeded=[
|
||||
dict(id=_id, deleted=bool(deleted), url=model.uri)
|
||||
for _id, (deleted, model) in results
|
||||
],
|
||||
succeeded=succeeded,
|
||||
failed=failures,
|
||||
)
|
||||
|
||||
|
@ -1,8 +1,6 @@
|
||||
import re
|
||||
from functools import partial
|
||||
|
||||
import attr
|
||||
|
||||
from apiserver.apierrors.errors.bad_request import CannotRemoveAllRuns
|
||||
from apiserver.apimodels.pipelines import (
|
||||
StartPipelineRequest,
|
||||
@ -18,6 +16,7 @@ from apiserver.database.model.project import Project
|
||||
from apiserver.database.model.task.task import Task, TaskType
|
||||
from apiserver.service_repo import APICall, endpoint
|
||||
from apiserver.utilities.dicts import nested_get
|
||||
from .tasks import _delete_task_events
|
||||
|
||||
org_bll = OrgBLL()
|
||||
project_bll = ProjectBLL()
|
||||
@ -62,21 +61,30 @@ def delete_runs(call: APICall, company_id: str, request: DeleteRunsRequest):
|
||||
identity=call.identity,
|
||||
move_to_trash=False,
|
||||
force=True,
|
||||
return_file_urls=False,
|
||||
delete_output_models=True,
|
||||
status_message="",
|
||||
status_reason="Pipeline run deleted",
|
||||
delete_external_artifacts=True,
|
||||
include_pipeline_steps=True,
|
||||
),
|
||||
ids=list(ids),
|
||||
)
|
||||
|
||||
succeeded = []
|
||||
tasks = {}
|
||||
if results:
|
||||
for _id, (deleted, task, cleanup_res) in results:
|
||||
if deleted:
|
||||
tasks[_id] = cleanup_res
|
||||
succeeded.append(
|
||||
dict(id=_id, deleted=bool(deleted), **attr.asdict(cleanup_res))
|
||||
dict(id=_id, deleted=bool(deleted), **cleanup_res.to_res_dict(False))
|
||||
)
|
||||
|
||||
if tasks:
|
||||
_delete_task_events(
|
||||
company_id=company_id,
|
||||
user_id=call.identity.user,
|
||||
tasks=tasks,
|
||||
delete_external_artifacts=True
|
||||
)
|
||||
|
||||
call.result.data = dict(succeeded=succeeded, failed=failures)
|
||||
|
@ -370,6 +370,7 @@ def delete(call: APICall, company_id: str, request: DeleteRequest):
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
)
|
||||
_reset_cached_tags(company_id, projects=list(affected_projects))
|
||||
# noinspection PyTypeChecker
|
||||
call.result.data = {**attr.asdict(res)}
|
||||
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
import itertools
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from functools import partial
|
||||
from typing import Sequence, Union, Tuple
|
||||
from typing import Sequence, Union, Tuple, Mapping
|
||||
|
||||
import attr
|
||||
from mongoengine import EmbeddedDocument, Q
|
||||
from mongoengine.queryset.transform import COMPARISON_OPERATORS
|
||||
from pymongo import UpdateOne
|
||||
@ -80,6 +80,11 @@ from apiserver.bll.task import (
|
||||
TaskBLL,
|
||||
ChangeStatusRequest,
|
||||
)
|
||||
from apiserver.bll.task.task_cleanup import (
|
||||
delete_task_events_and_collect_urls,
|
||||
schedule_for_delete,
|
||||
CleanupResult,
|
||||
)
|
||||
from apiserver.bll.task.artifacts import (
|
||||
artifacts_prepare_for_save,
|
||||
artifacts_unprepare_from_saved,
|
||||
@ -109,6 +114,7 @@ from apiserver.bll.task.utils import (
|
||||
get_task_with_write_access,
|
||||
)
|
||||
from apiserver.bll.util import run_batch_operation, update_project_time
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.errors import translate_errors_context
|
||||
from apiserver.database.model import EntityVisibility
|
||||
from apiserver.database.model.project import Project
|
||||
@ -295,9 +301,7 @@ def get_types(call: APICall, company_id, request: GetTypesRequest):
|
||||
}
|
||||
|
||||
|
||||
@endpoint(
|
||||
"tasks.stop", response_data_model=UpdateResponse
|
||||
)
|
||||
@endpoint("tasks.stop", response_data_model=UpdateResponse)
|
||||
def stop(call: APICall, company_id, request: StopRequest):
|
||||
"""
|
||||
stop
|
||||
@ -1016,21 +1020,88 @@ def dequeue_many(call: APICall, company_id, request: DequeueManyRequest):
|
||||
)
|
||||
|
||||
|
||||
def _delete_task_events(
|
||||
company_id: str,
|
||||
user_id: str,
|
||||
tasks: Mapping[str, CleanupResult],
|
||||
delete_external_artifacts: bool,
|
||||
):
|
||||
task_ids = list(tasks)
|
||||
deleted_model_ids = set(
|
||||
itertools.chain.from_iterable(
|
||||
cr.deleted_model_ids for cr in tasks.values() if cr.deleted_model_ids
|
||||
)
|
||||
)
|
||||
|
||||
delete_external_artifacts = delete_external_artifacts and config.get(
|
||||
"services.async_urls_delete.enabled", True
|
||||
)
|
||||
if delete_external_artifacts:
|
||||
for t_id, cleanup_res in tasks.items():
|
||||
urls = set(cleanup_res.urls.model_urls) | set(
|
||||
cleanup_res.urls.artifact_urls
|
||||
)
|
||||
if urls:
|
||||
schedule_for_delete(
|
||||
task_id=t_id,
|
||||
company=company_id,
|
||||
user=user_id,
|
||||
urls=urls,
|
||||
can_delete_folders=False,
|
||||
)
|
||||
|
||||
event_urls = delete_task_events_and_collect_urls(
|
||||
company=company_id, task_ids=task_ids
|
||||
)
|
||||
if deleted_model_ids:
|
||||
event_urls.update(
|
||||
delete_task_events_and_collect_urls(
|
||||
company=company_id,
|
||||
task_ids=list(deleted_model_ids),
|
||||
model=True,
|
||||
)
|
||||
)
|
||||
|
||||
if event_urls:
|
||||
schedule_for_delete(
|
||||
task_id=task_ids[0],
|
||||
company=company_id,
|
||||
user=user_id,
|
||||
urls=event_urls,
|
||||
can_delete_folders=False,
|
||||
)
|
||||
else:
|
||||
event_bll.delete_task_events(company_id, task_ids)
|
||||
if deleted_model_ids:
|
||||
event_bll.delete_task_events(
|
||||
company_id, list(deleted_model_ids), model=True
|
||||
)
|
||||
|
||||
|
||||
@endpoint(
|
||||
"tasks.reset", request_data_model=ResetRequest, response_data_model=ResetResponse
|
||||
)
|
||||
def reset(call: APICall, company_id, request: ResetRequest):
|
||||
task_id = request.task
|
||||
dequeued, cleanup_res, updates = reset_task(
|
||||
task_id=request.task,
|
||||
task_id=task_id,
|
||||
company_id=company_id,
|
||||
identity=call.identity,
|
||||
force=request.force,
|
||||
return_file_urls=request.return_file_urls,
|
||||
delete_output_models=request.delete_output_models,
|
||||
clear_all=request.clear_all,
|
||||
)
|
||||
_delete_task_events(
|
||||
company_id=company_id,
|
||||
user_id=call.identity.user,
|
||||
tasks={task_id: cleanup_res},
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
)
|
||||
res = ResetResponse(**updates, **attr.asdict(cleanup_res), dequeued=dequeued)
|
||||
res = ResetResponse(
|
||||
**updates,
|
||||
**cleanup_res.to_res_dict(request.return_file_urls),
|
||||
dequeued=dequeued,
|
||||
)
|
||||
call.result.data_model = res
|
||||
|
||||
|
||||
@ -1046,25 +1117,32 @@ def reset_many(call: APICall, company_id, request: ResetManyRequest):
|
||||
company_id=company_id,
|
||||
identity=call.identity,
|
||||
force=request.force,
|
||||
return_file_urls=request.return_file_urls,
|
||||
delete_output_models=request.delete_output_models,
|
||||
clear_all=request.clear_all,
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
),
|
||||
ids=request.ids,
|
||||
)
|
||||
|
||||
succeeded = []
|
||||
tasks = {}
|
||||
for _id, (dequeued, cleanup, res) in results:
|
||||
tasks[_id] = cleanup
|
||||
succeeded.append(
|
||||
ResetBatchItem(
|
||||
id=_id,
|
||||
dequeued=bool(dequeued.get("removed")) if dequeued else False,
|
||||
**attr.asdict(cleanup),
|
||||
**cleanup.to_res_dict(request.return_file_urls),
|
||||
**res,
|
||||
)
|
||||
)
|
||||
|
||||
_delete_task_events(
|
||||
company_id=company_id,
|
||||
user_id=call.identity.user,
|
||||
tasks=tasks,
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
)
|
||||
|
||||
call.result.data_model = ResetManyResponse(
|
||||
succeeded=succeeded,
|
||||
failed=failures,
|
||||
@ -1160,16 +1238,22 @@ def delete(call: APICall, company_id, request: DeleteRequest):
|
||||
identity=call.identity,
|
||||
move_to_trash=request.move_to_trash,
|
||||
force=request.force,
|
||||
return_file_urls=request.return_file_urls,
|
||||
delete_output_models=request.delete_output_models,
|
||||
status_message=request.status_message,
|
||||
status_reason=request.status_reason,
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
include_pipeline_steps=request.include_pipeline_steps,
|
||||
)
|
||||
if deleted:
|
||||
_delete_task_events(
|
||||
company_id=company_id,
|
||||
user_id=call.identity.user,
|
||||
tasks={request.task: cleanup_res},
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
)
|
||||
_reset_cached_tags(company_id, projects=[task.project] if task.project else [])
|
||||
call.result.data = dict(deleted=bool(deleted), **attr.asdict(cleanup_res))
|
||||
call.result.data = dict(
|
||||
deleted=bool(deleted), **cleanup_res.to_res_dict(request.return_file_urls)
|
||||
)
|
||||
|
||||
|
||||
@endpoint("tasks.delete_many", request_data_model=DeleteManyRequest)
|
||||
@ -1181,25 +1265,41 @@ def delete_many(call: APICall, company_id, request: DeleteManyRequest):
|
||||
identity=call.identity,
|
||||
move_to_trash=request.move_to_trash,
|
||||
force=request.force,
|
||||
return_file_urls=request.return_file_urls,
|
||||
delete_output_models=request.delete_output_models,
|
||||
status_message=request.status_message,
|
||||
status_reason=request.status_reason,
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
include_pipeline_steps=request.include_pipeline_steps,
|
||||
),
|
||||
ids=request.ids,
|
||||
)
|
||||
|
||||
succeeded = []
|
||||
tasks = {}
|
||||
if results:
|
||||
projects = set(task.project for _, (_, task, _) in results)
|
||||
projects = set()
|
||||
for _id, (deleted, task, cleanup_res) in results:
|
||||
if deleted:
|
||||
projects.add(task.project)
|
||||
tasks[_id] = cleanup_res
|
||||
succeeded.append(
|
||||
dict(
|
||||
id=_id,
|
||||
deleted=bool(deleted),
|
||||
**cleanup_res.to_res_dict(request.return_file_urls),
|
||||
)
|
||||
)
|
||||
|
||||
if tasks:
|
||||
_delete_task_events(
|
||||
company_id=company_id,
|
||||
user_id=call.identity.user,
|
||||
tasks=tasks,
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
)
|
||||
_reset_cached_tags(company_id, projects=list(projects))
|
||||
|
||||
call.result.data = dict(
|
||||
succeeded=[
|
||||
dict(id=_id, deleted=bool(deleted), **attr.asdict(cleanup_res))
|
||||
for _id, (deleted, _, cleanup_res) in results
|
||||
],
|
||||
succeeded=succeeded,
|
||||
failed=failures,
|
||||
)
|
||||
|
||||
|
@ -59,7 +59,7 @@ class TestTasksResetDelete(TestService):
|
||||
event_urls.update(self.send_model_events(model))
|
||||
res = self.assert_delete_task(task, force=True, return_file_urls=True)
|
||||
self.assertEqual(set(res.urls.model_urls), draft_model_urls)
|
||||
self.assertEqual(set(res.urls.event_urls), event_urls)
|
||||
self.assertFalse(set(res.urls.event_urls)) # event urls are not returned anymore
|
||||
self.assertEqual(set(res.urls.artifact_urls), artifact_urls)
|
||||
|
||||
def test_reset(self):
|
||||
@ -84,7 +84,7 @@ class TestTasksResetDelete(TestService):
|
||||
) = self.create_task_with_data()
|
||||
res = self.api.tasks.reset(task=task, force=True, return_file_urls=True)
|
||||
self.assertEqual(set(res.urls.model_urls), draft_model_urls)
|
||||
self.assertEqual(set(res.urls.event_urls), event_urls)
|
||||
self.assertFalse(res.urls.event_urls) # event urls are not returned anymore
|
||||
self.assertEqual(set(res.urls.artifact_urls), artifact_urls)
|
||||
|
||||
def test_model_delete(self):
|
||||
@ -124,7 +124,7 @@ class TestTasksResetDelete(TestService):
|
||||
self.assertEqual(res.disassociated_tasks, 0)
|
||||
self.assertEqual(res.deleted_tasks, 1)
|
||||
self.assertEqual(res.deleted_models, 2)
|
||||
self.assertEqual(set(res.urls.event_urls), event_urls)
|
||||
self.assertFalse(set(res.urls.event_urls)) # event urls are not returned anymore
|
||||
self.assertEqual(set(res.urls.artifact_urls), artifact_urls)
|
||||
with self.api.raises(errors.bad_request.InvalidTaskId):
|
||||
self.api.tasks.get_by_id(task=task)
|
||||
|
Loading…
Reference in New Issue
Block a user