Schedule external file URLs for deletion on models deletion

This commit is contained in:
allegroai 2023-05-25 19:36:28 +03:00
parent 0c37ced2a1
commit 9206a7c57d
7 changed files with 60 additions and 13 deletions

View File

@ -44,10 +44,12 @@ class ModelRequest(models.Base):
class DeleteModelRequest(ModelRequest): class DeleteModelRequest(ModelRequest):
force = fields.BoolField(default=False) force = fields.BoolField(default=False)
delete_external_artifacts = fields.BoolField(default=True)
class ModelsDeleteManyRequest(BatchRequest): class ModelsDeleteManyRequest(BatchRequest):
force = fields.BoolField(default=False) force = fields.BoolField(default=False)
delete_external_artifacts = fields.BoolField(default=True)
class PublishModelRequest(ModelRequest): class PublishModelRequest(ModelRequest):

View File

@ -6,6 +6,7 @@ from mongoengine import Q
from apiserver.apierrors import errors from apiserver.apierrors import errors
from apiserver.apimodels.models import ModelTaskPublishResponse from apiserver.apimodels.models import ModelTaskPublishResponse
from apiserver.bll.task.utils import deleted_prefix, get_last_metric_updates from apiserver.bll.task.utils import deleted_prefix, get_last_metric_updates
from apiserver.config_repo import config
from apiserver.database.model import EntityVisibility from apiserver.database.model import EntityVisibility
from apiserver.database.model.model import Model from apiserver.database.model.model import Model
from apiserver.database.model.task.task import Task, TaskStatus from apiserver.database.model.task.task import Task, TaskStatus
@ -82,7 +83,7 @@ class ModelBLL:
@classmethod @classmethod
def delete_model( def delete_model(
cls, model_id: str, company_id: str, force: bool cls, model_id: str, company_id: str, user_id: str, force: bool, delete_external_artifacts: bool = True,
) -> Tuple[int, Model]: ) -> Tuple[int, Model]:
model = cls.get_company_model_by_id( model = cls.get_company_model_by_id(
company_id=company_id, company_id=company_id,
@ -128,7 +129,32 @@ class ModelBLL:
upsert=False, upsert=False,
) )
else: else:
task.update(pull__models__output__model=model_id, set__last_change=now) task.update(
pull__models__output__model=model_id, set__last_change=now
)
delete_external_artifacts = delete_external_artifacts and config.get(
"services.async_urls_delete.enabled", True
)
if delete_external_artifacts:
from apiserver.bll.task.task_cleanup import (
collect_debug_image_urls,
collect_plot_image_urls,
_schedule_for_delete,
)
urls = set()
urls.update(collect_debug_image_urls(company_id, model_id))
urls.update(collect_plot_image_urls(company_id, model_id))
if model.uri:
urls.add(model.uri)
if urls:
_schedule_for_delete(
task_id=model_id,
company=company_id,
user=user_id,
urls=urls,
can_delete_folders=False,
)
del_count = Model.objects(id=model_id, company=company_id).delete() del_count = Model.objects(id=model_id, company=company_id).delete()
return del_count, model return del_count, model

View File

@ -77,7 +77,7 @@ def delete_project(
raise errors.bad_request.InvalidProjectId(id=project_id) raise errors.bad_request.InvalidProjectId(id=project_id)
delete_external_artifacts = delete_external_artifacts and config.get( delete_external_artifacts = delete_external_artifacts and config.get(
"services.async_urls_delete.enabled", False "services.async_urls_delete.enabled", True
) )
is_pipeline = "pipeline" in (project.system_tags or []) is_pipeline = "pipeline" in (project.system_tags or [])
project_ids = _ids_with_children([project_id]) project_ids = _ids_with_children([project_id])

View File

@ -105,13 +105,21 @@ def collect_debug_image_urls(company: str, task_or_model: str) -> Set[str]:
supported_storage_types = { supported_storage_types = {
"https://": StorageType.fileserver,
"http://": StorageType.fileserver,
"s3://": StorageType.s3, "s3://": StorageType.s3,
"azure://": StorageType.azure, "azure://": StorageType.azure,
"gs://": StorageType.gs, "gs://": StorageType.gs,
} }
supported_storage_types.update(
{
p: StorageType.fileserver
for p in config.get(
"services.async_urls_delete.fileserver.url_prefixes",
["https://", "http://"],
)
}
)
def _schedule_for_delete( def _schedule_for_delete(
company: str, user: str, task_id: str, urls: Set[str], can_delete_folders: bool, company: str, user: str, task_id: str, urls: Set[str], can_delete_folders: bool,
@ -196,7 +204,7 @@ def cleanup_task(
task, force task, force
) )
delete_external_artifacts = delete_external_artifacts and config.get( delete_external_artifacts = delete_external_artifacts and config.get(
"services.async_urls_delete.enabled", False "services.async_urls_delete.enabled", True
) )
event_urls, artifact_urls, model_urls = set(), set(), set() event_urls, artifact_urls, model_urls = set(), set(), set()
if return_file_urls or delete_external_artifacts: if return_file_urls or delete_external_artifacts:

View File

@ -1,4 +1,4 @@
# if set to True then on task delete/reset external file urls for know storage types are scheduled for async delete # if set to true then on task delete/reset external file urls for known storage types are scheduled for async delete
# otherwise they are returned to a client for the client side delete # otherwise they are returned to a client for the client side delete
enabled: true enabled: true
max_retries: 3 max_retries: 3

View File

@ -522,7 +522,11 @@ def publish_many(call: APICall, company_id, request: ModelsPublishManyRequest):
@endpoint("models.delete", request_data_model=DeleteModelRequest) @endpoint("models.delete", request_data_model=DeleteModelRequest)
def delete(call: APICall, company_id, request: DeleteModelRequest): def delete(call: APICall, company_id, request: DeleteModelRequest):
del_count, model = ModelBLL.delete_model( del_count, model = ModelBLL.delete_model(
model_id=request.model, company_id=company_id, force=request.force model_id=request.model,
company_id=company_id,
user_id=call.identity.user,
force=request.force,
delete_external_artifacts=request.delete_external_artifacts,
) )
if del_count: if del_count:
_reset_cached_tags( _reset_cached_tags(
@ -539,7 +543,13 @@ def delete(call: APICall, company_id, request: DeleteModelRequest):
) )
def delete(call: APICall, company_id, request: ModelsDeleteManyRequest): def delete(call: APICall, company_id, request: ModelsDeleteManyRequest):
results, failures = run_batch_operation( results, failures = run_batch_operation(
func=partial(ModelBLL.delete_model, company_id=company_id, force=request.force), func=partial(
ModelBLL.delete_model,
company_id=company_id,
user_id=call.identity.user,
force=request.force,
delete_external_artifacts=request.delete_external_artifacts,
),
ids=request.ids, ids=request.ids,
) )

View File

@ -134,9 +134,7 @@ class TestModelsService(TestService):
self._assert_model_ready(model_id, True) self._assert_model_ready(model_id, True)
def test_publish_task_no_output_model(self): def test_publish_task_no_output_model(self):
task_id = self.create_temp( task_id = self.create_temp(service="tasks", type="testing", name="server-test")
service="tasks", type="testing", name="server-test"
)
self.api.tasks.started(task=task_id) self.api.tasks.started(task=task_id)
self.api.tasks.stopped(task=task_id) self.api.tasks.stopped(task=task_id)
@ -296,7 +294,10 @@ class TestModelsService(TestService):
def _create_task_and_model(self): def _create_task_and_model(self):
execution_model_id = self.create_temp( execution_model_id = self.create_temp(
service="models", name="test", uri="file:///a", labels={} service="models",
name="test",
uri="https://files.trains-master.hosted.allegro.ai/a.jpg",
labels={},
) )
task_id = self.create_temp( task_id = self.create_temp(
service="tasks", service="tasks",