mirror of
https://github.com/clearml/clearml-server
synced 2025-05-09 14:21:16 +00:00
Schedule external file URLs for deletion on models deletion
This commit is contained in:
parent
0c37ced2a1
commit
9206a7c57d
@ -44,10 +44,12 @@ class ModelRequest(models.Base):
|
|||||||
|
|
||||||
class DeleteModelRequest(ModelRequest):
|
class DeleteModelRequest(ModelRequest):
|
||||||
force = fields.BoolField(default=False)
|
force = fields.BoolField(default=False)
|
||||||
|
delete_external_artifacts = fields.BoolField(default=True)
|
||||||
|
|
||||||
|
|
||||||
class ModelsDeleteManyRequest(BatchRequest):
|
class ModelsDeleteManyRequest(BatchRequest):
|
||||||
force = fields.BoolField(default=False)
|
force = fields.BoolField(default=False)
|
||||||
|
delete_external_artifacts = fields.BoolField(default=True)
|
||||||
|
|
||||||
|
|
||||||
class PublishModelRequest(ModelRequest):
|
class PublishModelRequest(ModelRequest):
|
||||||
|
@ -6,6 +6,7 @@ from mongoengine import Q
|
|||||||
from apiserver.apierrors import errors
|
from apiserver.apierrors import errors
|
||||||
from apiserver.apimodels.models import ModelTaskPublishResponse
|
from apiserver.apimodels.models import ModelTaskPublishResponse
|
||||||
from apiserver.bll.task.utils import deleted_prefix, get_last_metric_updates
|
from apiserver.bll.task.utils import deleted_prefix, get_last_metric_updates
|
||||||
|
from apiserver.config_repo import config
|
||||||
from apiserver.database.model import EntityVisibility
|
from apiserver.database.model import EntityVisibility
|
||||||
from apiserver.database.model.model import Model
|
from apiserver.database.model.model import Model
|
||||||
from apiserver.database.model.task.task import Task, TaskStatus
|
from apiserver.database.model.task.task import Task, TaskStatus
|
||||||
@ -82,7 +83,7 @@ class ModelBLL:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def delete_model(
|
def delete_model(
|
||||||
cls, model_id: str, company_id: str, force: bool
|
cls, model_id: str, company_id: str, user_id: str, force: bool, delete_external_artifacts: bool = True,
|
||||||
) -> Tuple[int, Model]:
|
) -> Tuple[int, Model]:
|
||||||
model = cls.get_company_model_by_id(
|
model = cls.get_company_model_by_id(
|
||||||
company_id=company_id,
|
company_id=company_id,
|
||||||
@ -128,7 +129,32 @@ class ModelBLL:
|
|||||||
upsert=False,
|
upsert=False,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
task.update(pull__models__output__model=model_id, set__last_change=now)
|
task.update(
|
||||||
|
pull__models__output__model=model_id, set__last_change=now
|
||||||
|
)
|
||||||
|
|
||||||
|
delete_external_artifacts = delete_external_artifacts and config.get(
|
||||||
|
"services.async_urls_delete.enabled", True
|
||||||
|
)
|
||||||
|
if delete_external_artifacts:
|
||||||
|
from apiserver.bll.task.task_cleanup import (
|
||||||
|
collect_debug_image_urls,
|
||||||
|
collect_plot_image_urls,
|
||||||
|
_schedule_for_delete,
|
||||||
|
)
|
||||||
|
urls = set()
|
||||||
|
urls.update(collect_debug_image_urls(company_id, model_id))
|
||||||
|
urls.update(collect_plot_image_urls(company_id, model_id))
|
||||||
|
if model.uri:
|
||||||
|
urls.add(model.uri)
|
||||||
|
if urls:
|
||||||
|
_schedule_for_delete(
|
||||||
|
task_id=model_id,
|
||||||
|
company=company_id,
|
||||||
|
user=user_id,
|
||||||
|
urls=urls,
|
||||||
|
can_delete_folders=False,
|
||||||
|
)
|
||||||
|
|
||||||
del_count = Model.objects(id=model_id, company=company_id).delete()
|
del_count = Model.objects(id=model_id, company=company_id).delete()
|
||||||
return del_count, model
|
return del_count, model
|
||||||
|
@ -77,7 +77,7 @@ def delete_project(
|
|||||||
raise errors.bad_request.InvalidProjectId(id=project_id)
|
raise errors.bad_request.InvalidProjectId(id=project_id)
|
||||||
|
|
||||||
delete_external_artifacts = delete_external_artifacts and config.get(
|
delete_external_artifacts = delete_external_artifacts and config.get(
|
||||||
"services.async_urls_delete.enabled", False
|
"services.async_urls_delete.enabled", True
|
||||||
)
|
)
|
||||||
is_pipeline = "pipeline" in (project.system_tags or [])
|
is_pipeline = "pipeline" in (project.system_tags or [])
|
||||||
project_ids = _ids_with_children([project_id])
|
project_ids = _ids_with_children([project_id])
|
||||||
|
@ -105,13 +105,21 @@ def collect_debug_image_urls(company: str, task_or_model: str) -> Set[str]:
|
|||||||
|
|
||||||
|
|
||||||
supported_storage_types = {
|
supported_storage_types = {
|
||||||
"https://": StorageType.fileserver,
|
|
||||||
"http://": StorageType.fileserver,
|
|
||||||
"s3://": StorageType.s3,
|
"s3://": StorageType.s3,
|
||||||
"azure://": StorageType.azure,
|
"azure://": StorageType.azure,
|
||||||
"gs://": StorageType.gs,
|
"gs://": StorageType.gs,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
supported_storage_types.update(
|
||||||
|
{
|
||||||
|
p: StorageType.fileserver
|
||||||
|
for p in config.get(
|
||||||
|
"services.async_urls_delete.fileserver.url_prefixes",
|
||||||
|
["https://", "http://"],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _schedule_for_delete(
|
def _schedule_for_delete(
|
||||||
company: str, user: str, task_id: str, urls: Set[str], can_delete_folders: bool,
|
company: str, user: str, task_id: str, urls: Set[str], can_delete_folders: bool,
|
||||||
@ -196,7 +204,7 @@ def cleanup_task(
|
|||||||
task, force
|
task, force
|
||||||
)
|
)
|
||||||
delete_external_artifacts = delete_external_artifacts and config.get(
|
delete_external_artifacts = delete_external_artifacts and config.get(
|
||||||
"services.async_urls_delete.enabled", False
|
"services.async_urls_delete.enabled", True
|
||||||
)
|
)
|
||||||
event_urls, artifact_urls, model_urls = set(), set(), set()
|
event_urls, artifact_urls, model_urls = set(), set(), set()
|
||||||
if return_file_urls or delete_external_artifacts:
|
if return_file_urls or delete_external_artifacts:
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
# if set to True then on task delete/reset external file urls for know storage types are scheduled for async delete
|
# if set to true then on task delete/reset external file urls for known storage types are scheduled for async delete
|
||||||
# otherwise they are returned to a client for the client side delete
|
# otherwise they are returned to a client for the client side delete
|
||||||
enabled: true
|
enabled: true
|
||||||
max_retries: 3
|
max_retries: 3
|
||||||
|
@ -522,7 +522,11 @@ def publish_many(call: APICall, company_id, request: ModelsPublishManyRequest):
|
|||||||
@endpoint("models.delete", request_data_model=DeleteModelRequest)
|
@endpoint("models.delete", request_data_model=DeleteModelRequest)
|
||||||
def delete(call: APICall, company_id, request: DeleteModelRequest):
|
def delete(call: APICall, company_id, request: DeleteModelRequest):
|
||||||
del_count, model = ModelBLL.delete_model(
|
del_count, model = ModelBLL.delete_model(
|
||||||
model_id=request.model, company_id=company_id, force=request.force
|
model_id=request.model,
|
||||||
|
company_id=company_id,
|
||||||
|
user_id=call.identity.user,
|
||||||
|
force=request.force,
|
||||||
|
delete_external_artifacts=request.delete_external_artifacts,
|
||||||
)
|
)
|
||||||
if del_count:
|
if del_count:
|
||||||
_reset_cached_tags(
|
_reset_cached_tags(
|
||||||
@ -539,7 +543,13 @@ def delete(call: APICall, company_id, request: DeleteModelRequest):
|
|||||||
)
|
)
|
||||||
def delete(call: APICall, company_id, request: ModelsDeleteManyRequest):
|
def delete(call: APICall, company_id, request: ModelsDeleteManyRequest):
|
||||||
results, failures = run_batch_operation(
|
results, failures = run_batch_operation(
|
||||||
func=partial(ModelBLL.delete_model, company_id=company_id, force=request.force),
|
func=partial(
|
||||||
|
ModelBLL.delete_model,
|
||||||
|
company_id=company_id,
|
||||||
|
user_id=call.identity.user,
|
||||||
|
force=request.force,
|
||||||
|
delete_external_artifacts=request.delete_external_artifacts,
|
||||||
|
),
|
||||||
ids=request.ids,
|
ids=request.ids,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -134,9 +134,7 @@ class TestModelsService(TestService):
|
|||||||
self._assert_model_ready(model_id, True)
|
self._assert_model_ready(model_id, True)
|
||||||
|
|
||||||
def test_publish_task_no_output_model(self):
|
def test_publish_task_no_output_model(self):
|
||||||
task_id = self.create_temp(
|
task_id = self.create_temp(service="tasks", type="testing", name="server-test")
|
||||||
service="tasks", type="testing", name="server-test"
|
|
||||||
)
|
|
||||||
self.api.tasks.started(task=task_id)
|
self.api.tasks.started(task=task_id)
|
||||||
self.api.tasks.stopped(task=task_id)
|
self.api.tasks.stopped(task=task_id)
|
||||||
|
|
||||||
@ -296,7 +294,10 @@ class TestModelsService(TestService):
|
|||||||
|
|
||||||
def _create_task_and_model(self):
|
def _create_task_and_model(self):
|
||||||
execution_model_id = self.create_temp(
|
execution_model_id = self.create_temp(
|
||||||
service="models", name="test", uri="file:///a", labels={}
|
service="models",
|
||||||
|
name="test",
|
||||||
|
uri="https://files.trains-master.hosted.allegro.ai/a.jpg",
|
||||||
|
labels={},
|
||||||
)
|
)
|
||||||
task_id = self.create_temp(
|
task_id = self.create_temp(
|
||||||
service="tasks",
|
service="tasks",
|
||||||
|
Loading…
Reference in New Issue
Block a user