diff --git a/apiserver/apimodels/models.py b/apiserver/apimodels/models.py index 1844e8a..be48464 100644 --- a/apiserver/apimodels/models.py +++ b/apiserver/apimodels/models.py @@ -44,10 +44,12 @@ class ModelRequest(models.Base): class DeleteModelRequest(ModelRequest): force = fields.BoolField(default=False) + delete_external_artifacts = fields.BoolField(default=True) class ModelsDeleteManyRequest(BatchRequest): force = fields.BoolField(default=False) + delete_external_artifacts = fields.BoolField(default=True) class PublishModelRequest(ModelRequest): diff --git a/apiserver/bll/model/__init__.py b/apiserver/bll/model/__init__.py index 92e19b3..e8bd658 100644 --- a/apiserver/bll/model/__init__.py +++ b/apiserver/bll/model/__init__.py @@ -6,6 +6,7 @@ from mongoengine import Q from apiserver.apierrors import errors from apiserver.apimodels.models import ModelTaskPublishResponse from apiserver.bll.task.utils import deleted_prefix, get_last_metric_updates +from apiserver.config_repo import config from apiserver.database.model import EntityVisibility from apiserver.database.model.model import Model from apiserver.database.model.task.task import Task, TaskStatus @@ -82,7 +83,7 @@ class ModelBLL: @classmethod def delete_model( - cls, model_id: str, company_id: str, force: bool + cls, model_id: str, company_id: str, user_id: str, force: bool, delete_external_artifacts: bool = True, ) -> Tuple[int, Model]: model = cls.get_company_model_by_id( company_id=company_id, @@ -128,7 +129,32 @@ class ModelBLL: upsert=False, ) else: - task.update(pull__models__output__model=model_id, set__last_change=now) + task.update( + pull__models__output__model=model_id, set__last_change=now + ) + + delete_external_artifacts = delete_external_artifacts and config.get( + "services.async_urls_delete.enabled", True + ) + if delete_external_artifacts: + from apiserver.bll.task.task_cleanup import ( + collect_debug_image_urls, + collect_plot_image_urls, + _schedule_for_delete, + ) + urls = set() + urls.update(collect_debug_image_urls(company_id, model_id)) + urls.update(collect_plot_image_urls(company_id, model_id)) + if model.uri: + urls.add(model.uri) + if urls: + _schedule_for_delete( + task_id=model_id, + company=company_id, + user=user_id, + urls=urls, + can_delete_folders=False, + ) del_count = Model.objects(id=model_id, company=company_id).delete() return del_count, model diff --git a/apiserver/bll/project/project_cleanup.py b/apiserver/bll/project/project_cleanup.py index b93f0aa..cb3b4aa 100644 --- a/apiserver/bll/project/project_cleanup.py +++ b/apiserver/bll/project/project_cleanup.py @@ -77,7 +77,7 @@ def delete_project( raise errors.bad_request.InvalidProjectId(id=project_id) delete_external_artifacts = delete_external_artifacts and config.get( - "services.async_urls_delete.enabled", False + "services.async_urls_delete.enabled", True ) is_pipeline = "pipeline" in (project.system_tags or []) project_ids = _ids_with_children([project_id]) diff --git a/apiserver/bll/task/task_cleanup.py b/apiserver/bll/task/task_cleanup.py index e060fce..83f84f4 100644 --- a/apiserver/bll/task/task_cleanup.py +++ b/apiserver/bll/task/task_cleanup.py @@ -105,13 +105,21 @@ def collect_debug_image_urls(company: str, task_or_model: str) -> Set[str]: supported_storage_types = { - "https://": StorageType.fileserver, - "http://": StorageType.fileserver, "s3://": StorageType.s3, "azure://": StorageType.azure, "gs://": StorageType.gs, } +supported_storage_types.update( + { + p: StorageType.fileserver + for p in config.get( + "services.async_urls_delete.fileserver.url_prefixes", + ["https://", "http://"], + ) + } +) + def _schedule_for_delete( company: str, user: str, task_id: str, urls: Set[str], can_delete_folders: bool, @@ -196,7 +204,7 @@ def cleanup_task( task, force ) delete_external_artifacts = delete_external_artifacts and config.get( - "services.async_urls_delete.enabled", False + "services.async_urls_delete.enabled", True ) event_urls, artifact_urls, model_urls = set(), set(), set() if return_file_urls or delete_external_artifacts: diff --git a/apiserver/config/default/services/async_urls_delete.conf b/apiserver/config/default/services/async_urls_delete.conf index 26dc5e4..ce78dc3 100644 --- a/apiserver/config/default/services/async_urls_delete.conf +++ b/apiserver/config/default/services/async_urls_delete.conf @@ -1,4 +1,4 @@ -# if set to True then on task delete/reset external file urls for know storage types are scheduled for async delete +# if set to true then on task delete/reset external file urls for known storage types are scheduled for async delete # otherwise they are returned to a client for the client side delete enabled: true max_retries: 3 diff --git a/apiserver/services/models.py b/apiserver/services/models.py index f570419..a51aca5 100644 --- a/apiserver/services/models.py +++ b/apiserver/services/models.py @@ -522,7 +522,11 @@ def publish_many(call: APICall, company_id, request: ModelsPublishManyRequest): @endpoint("models.delete", request_data_model=DeleteModelRequest) def delete(call: APICall, company_id, request: DeleteModelRequest): del_count, model = ModelBLL.delete_model( - model_id=request.model, company_id=company_id, force=request.force + model_id=request.model, + company_id=company_id, + user_id=call.identity.user, + force=request.force, + delete_external_artifacts=request.delete_external_artifacts, ) if del_count: _reset_cached_tags( @@ -539,7 +543,13 @@ def delete(call: APICall, company_id, request: DeleteModelRequest): ) def delete(call: APICall, company_id, request: ModelsDeleteManyRequest): results, failures = run_batch_operation( - func=partial(ModelBLL.delete_model, company_id=company_id, force=request.force), + func=partial( + ModelBLL.delete_model, + company_id=company_id, + user_id=call.identity.user, + force=request.force, + delete_external_artifacts=request.delete_external_artifacts, + ), ids=request.ids, ) diff --git a/apiserver/tests/automated/test_models.py b/apiserver/tests/automated/test_models.py index c4b97bd..36dd7c5 100644 --- a/apiserver/tests/automated/test_models.py +++ b/apiserver/tests/automated/test_models.py @@ -134,9 +134,7 @@ class TestModelsService(TestService): self._assert_model_ready(model_id, True) def test_publish_task_no_output_model(self): - task_id = self.create_temp( - service="tasks", type="testing", name="server-test" - ) + task_id = self.create_temp(service="tasks", type="testing", name="server-test") self.api.tasks.started(task=task_id) self.api.tasks.stopped(task=task_id) @@ -296,7 +294,10 @@ class TestModelsService(TestService): def _create_task_and_model(self): execution_model_id = self.create_temp( - service="models", name="test", uri="file:///a", labels={} + service="models", + name="test", + uri="https://files.trains-master.hosted.allegro.ai/a.jpg", + labels={}, ) task_id = self.create_temp( service="tasks",