mirror of
https://github.com/clearml/clearml-server
synced 2025-04-28 01:41:07 +00:00
Add server-side support for deleting files from fileserver on task delete
This commit is contained in:
parent
6c49e96ff0
commit
0c9e2f92ee
@ -103,6 +103,7 @@ class DeleteRequest(UpdateRequest):
|
||||
move_to_trash = BoolField(default=True)
|
||||
return_file_urls = BoolField(default=False)
|
||||
delete_output_models = BoolField(default=True)
|
||||
delete_external_artifacts = BoolField(default=True)
|
||||
|
||||
|
||||
class SetRequirementsRequest(TaskRequest):
|
||||
@ -180,6 +181,7 @@ class ResetRequest(UpdateRequest):
|
||||
clear_all = BoolField(default=False)
|
||||
return_file_urls = BoolField(default=False)
|
||||
delete_output_models = BoolField(default=True)
|
||||
delete_external_artifacts = BoolField(default=True)
|
||||
|
||||
|
||||
class MultiTaskRequest(models.Base):
|
||||
@ -280,6 +282,7 @@ class DeleteManyRequest(TaskBatchRequest):
|
||||
return_file_urls = BoolField(default=False)
|
||||
delete_output_models = BoolField(default=True)
|
||||
force = BoolField(default=False)
|
||||
delete_external_artifacts = BoolField(default=True)
|
||||
|
||||
|
||||
class ResetManyRequest(TaskBatchRequest):
|
||||
@ -287,6 +290,7 @@ class ResetManyRequest(TaskBatchRequest):
|
||||
return_file_urls = BoolField(default=False)
|
||||
delete_output_models = BoolField(default=True)
|
||||
force = BoolField(default=False)
|
||||
delete_external_artifacts = BoolField(default=True)
|
||||
|
||||
|
||||
class PublishManyRequest(TaskBatchRequest):
|
||||
|
@ -1,17 +1,23 @@
|
||||
from datetime import datetime
|
||||
from itertools import chain
|
||||
from operator import attrgetter
|
||||
from typing import Sequence, Set, Tuple
|
||||
|
||||
import attr
|
||||
from boltons.iterutils import partition
|
||||
from boltons.iterutils import partition, bucketize, first
|
||||
from mongoengine import NotUniqueError
|
||||
from pymongo.errors import DuplicateKeyError
|
||||
|
||||
from apiserver.apierrors import errors
|
||||
from apiserver.bll.event import EventBLL
|
||||
from apiserver.bll.event.event_bll import PlotFields
|
||||
from apiserver.bll.task.utils import deleted_prefix
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database.model.model import Model
|
||||
from apiserver.database.model.task.task import Task, TaskStatus, ArtifactModes
|
||||
from apiserver.database.model.url_to_delete import StorageType, UrlToDelete, FileType, DeletionStatus
|
||||
from apiserver.timing_context import TimingContext
|
||||
from apiserver.database.utils import id as db_id
|
||||
|
||||
event_bll = EventBLL()
|
||||
|
||||
@ -94,12 +100,76 @@ def collect_debug_image_urls(company: str, task: str) -> Set[str]:
|
||||
return urls
|
||||
|
||||
|
||||
supported_storage_types = {
|
||||
"https://files": StorageType.fileserver,
|
||||
}
|
||||
|
||||
|
||||
def _schedule_for_delete(
|
||||
company: str, user: str, task_id: str, urls: Set[str], can_delete_folders: bool,
|
||||
) -> Set[str]:
|
||||
urls_per_storage = bucketize(
|
||||
urls,
|
||||
key=lambda u: first(
|
||||
type_
|
||||
for prefix, type_ in supported_storage_types.items()
|
||||
if u.startswith(prefix)
|
||||
),
|
||||
)
|
||||
urls_per_storage.pop(None, None)
|
||||
|
||||
processed_urls = set()
|
||||
for storage_type, storage_urls in urls_per_storage.items():
|
||||
delete_folders = (storage_type == StorageType.fileserver) and can_delete_folders
|
||||
scheduled_to_delete = set()
|
||||
for url in storage_urls:
|
||||
folder = None
|
||||
if delete_folders:
|
||||
folder, _, _ = url.rpartition("/")
|
||||
|
||||
to_delete = folder or url
|
||||
if to_delete in scheduled_to_delete:
|
||||
processed_urls.add(url)
|
||||
continue
|
||||
|
||||
try:
|
||||
UrlToDelete(
|
||||
id=db_id(),
|
||||
company=company,
|
||||
user=user,
|
||||
url=to_delete,
|
||||
task=task_id,
|
||||
created=datetime.utcnow(),
|
||||
storage_type=storage_type,
|
||||
type=FileType.folder if folder else FileType.file,
|
||||
).save()
|
||||
except (DuplicateKeyError, NotUniqueError):
|
||||
existing = UrlToDelete.objects(company=company, url=to_delete).first()
|
||||
if existing:
|
||||
existing.update(
|
||||
user=user,
|
||||
task=task_id,
|
||||
created=datetime.utcnow(),
|
||||
retry_count=0,
|
||||
unset__last_failure_time=1,
|
||||
unset__last_failure_reason=1,
|
||||
status=DeletionStatus.created,
|
||||
)
|
||||
processed_urls.add(url)
|
||||
scheduled_to_delete.add(to_delete)
|
||||
|
||||
return processed_urls
|
||||
|
||||
|
||||
def cleanup_task(
|
||||
company: str,
|
||||
user: str,
|
||||
task: Task,
|
||||
force: bool = False,
|
||||
update_children=True,
|
||||
return_file_urls=False,
|
||||
delete_output_models=True,
|
||||
delete_external_artifacts=True,
|
||||
) -> CleanupResult:
|
||||
"""
|
||||
Validate task deletion and delete/modify all its output.
|
||||
@ -155,6 +225,19 @@ def cleanup_task(
|
||||
|
||||
event_bll.delete_task_events(task.company, task.id, allow_locked=force)
|
||||
|
||||
if delete_external_artifacts and config.get(
|
||||
"services.async_urls_delete.enabled", False
|
||||
):
|
||||
scheduled = _schedule_for_delete(
|
||||
task_id=task.id,
|
||||
company=company,
|
||||
user=user,
|
||||
urls=event_urls | model_urls | artifact_urls,
|
||||
can_delete_folders=not in_use_model_ids and not published_models,
|
||||
)
|
||||
for urls in (event_urls, model_urls, artifact_urls):
|
||||
urls.difference_update(scheduled)
|
||||
|
||||
return CleanupResult(
|
||||
deleted_models=deleted_models,
|
||||
updated_children=updated_children,
|
||||
|
@ -191,12 +191,14 @@ def move_tasks_to_trash(tasks: Sequence[str]) -> int:
|
||||
def delete_task(
|
||||
task_id: str,
|
||||
company_id: str,
|
||||
user_id: str,
|
||||
move_to_trash: bool,
|
||||
force: bool,
|
||||
return_file_urls: bool,
|
||||
delete_output_models: bool,
|
||||
status_message: str,
|
||||
status_reason: str,
|
||||
delete_external_artifacts: bool,
|
||||
) -> Tuple[int, Task, CleanupResult]:
|
||||
task = TaskBLL.get_task_with_access(
|
||||
task_id, company_id=company_id, requires_write_access=True
|
||||
@ -226,10 +228,13 @@ def delete_task(
|
||||
pass
|
||||
|
||||
cleanup_res = cleanup_task(
|
||||
task,
|
||||
company=company_id,
|
||||
user=user_id,
|
||||
task=task,
|
||||
force=force,
|
||||
return_file_urls=return_file_urls,
|
||||
delete_output_models=delete_output_models,
|
||||
delete_external_artifacts=delete_external_artifacts,
|
||||
)
|
||||
|
||||
if move_to_trash:
|
||||
@ -246,10 +251,12 @@ def delete_task(
|
||||
def reset_task(
|
||||
task_id: str,
|
||||
company_id: str,
|
||||
user_id: str,
|
||||
force: bool,
|
||||
return_file_urls: bool,
|
||||
delete_output_models: bool,
|
||||
clear_all: bool,
|
||||
delete_external_artifacts: bool,
|
||||
) -> Tuple[dict, CleanupResult, dict]:
|
||||
task = TaskBLL.get_task_with_access(
|
||||
task_id, company_id=company_id, requires_write_access=True
|
||||
@ -268,11 +275,14 @@ def reset_task(
|
||||
pass
|
||||
|
||||
cleaned_up = cleanup_task(
|
||||
task,
|
||||
company=company_id,
|
||||
user=user_id,
|
||||
task=task,
|
||||
force=force,
|
||||
update_children=False,
|
||||
return_file_urls=return_file_urls,
|
||||
delete_output_models=delete_output_models,
|
||||
delete_external_artifacts=delete_external_artifacts,
|
||||
)
|
||||
|
||||
updates.update(
|
||||
|
@ -1,3 +1,5 @@
|
||||
fileserver = "http://localhost:8081"
|
||||
|
||||
elastic {
|
||||
events {
|
||||
hosts: [{host: "127.0.0.1", port: 9200}]
|
||||
|
5
apiserver/config/default/services/async_urls_delete.conf
Normal file
5
apiserver/config/default/services/async_urls_delete.conf
Normal file
@ -0,0 +1,5 @@
|
||||
# if set to True then on task delete/reset external file urls for know storage types are scheduled for async delete
|
||||
# otherwise they are returned to a client for the client side delete
|
||||
enabled: false
|
||||
max_retries: 3
|
||||
retry_timeout_sec: 60
|
51
apiserver/database/model/url_to_delete.py
Normal file
51
apiserver/database/model/url_to_delete.py
Normal file
@ -0,0 +1,51 @@
|
||||
from enum import Enum
|
||||
|
||||
from mongoengine import StringField, DateTimeField, IntField, EnumField
|
||||
|
||||
from apiserver.database import Database, strict
|
||||
from apiserver.database.model import AttributedDocument
|
||||
|
||||
|
||||
class StorageType(str, Enum):
|
||||
fileserver = "fileserver"
|
||||
unknown = "unknown"
|
||||
|
||||
|
||||
class FileType(str, Enum):
|
||||
file = "file"
|
||||
folder = "folder"
|
||||
|
||||
|
||||
class DeletionStatus(str, Enum):
|
||||
created = "created"
|
||||
retrying = "retrying"
|
||||
failed = "failed"
|
||||
|
||||
|
||||
class UrlToDelete(AttributedDocument):
|
||||
_field_collation_overrides = {
|
||||
"url": AttributedDocument._numeric_locale,
|
||||
}
|
||||
|
||||
meta = {
|
||||
"db_alias": Database.backend,
|
||||
"strict": strict,
|
||||
"indexes": [
|
||||
("company", "user", "task"),
|
||||
"storage_type",
|
||||
"created",
|
||||
"retry_count",
|
||||
"type",
|
||||
],
|
||||
}
|
||||
|
||||
id = StringField(primary_key=True)
|
||||
url = StringField(required=True, unique_with="company")
|
||||
task = StringField(required=True)
|
||||
created = DateTimeField(required=True)
|
||||
storage_type = EnumField(StorageType, default=StorageType.unknown)
|
||||
type = EnumField(FileType, default=FileType.file)
|
||||
retry_count = IntField(default=0)
|
||||
last_failure_time = DateTimeField()
|
||||
last_failure_reason = StringField()
|
||||
status = EnumField(DeletionStatus, default=DeletionStatus.created)
|
174
apiserver/jobs/async_urls_delete.py
Normal file
174
apiserver/jobs/async_urls_delete.py
Normal file
@ -0,0 +1,174 @@
|
||||
from argparse import ArgumentParser
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta
|
||||
from functools import partial
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from time import sleep
|
||||
from typing import Sequence
|
||||
|
||||
import requests
|
||||
from furl import furl
|
||||
from mongoengine import Q
|
||||
|
||||
from apiserver.config_repo import config
|
||||
from apiserver.database import db
|
||||
from apiserver.database.model.url_to_delete import UrlToDelete, DeletionStatus, StorageType
|
||||
|
||||
log = config.logger(f"JOB-{Path(__file__).name}")
|
||||
conf = config.get("services.async_urls_delete")
|
||||
max_retries = conf.get("max_retries", 3)
|
||||
retry_timeout = timedelta(seconds=conf.get("retry_timeout_sec", 60))
|
||||
token_expiration_sec = 600
|
||||
|
||||
|
||||
def validate_fileserver_access(fileserver_host: str) -> str:
|
||||
fileserver_host = fileserver_host or config.get("hosts.fileserver", None)
|
||||
if not fileserver_host:
|
||||
log.error(f"Fileserver host not configured")
|
||||
exit(1)
|
||||
|
||||
res = requests.get(
|
||||
url=fileserver_host
|
||||
)
|
||||
res.raise_for_status()
|
||||
|
||||
return fileserver_host
|
||||
|
||||
|
||||
def mark_retry_failed(ids: Sequence[str], reason: str):
|
||||
UrlToDelete.objects(id__in=ids).update(
|
||||
last_failure_time=datetime.utcnow(),
|
||||
last_failure_reason=reason,
|
||||
inc__retry_count=1,
|
||||
)
|
||||
UrlToDelete.objects(id__in=ids, retry_count__gte=max_retries).update(
|
||||
status=DeletionStatus.failed
|
||||
)
|
||||
|
||||
|
||||
def mark_failed(query: Q, reason: str):
|
||||
UrlToDelete.objects(query).update(
|
||||
status=DeletionStatus.failed,
|
||||
last_failure_time=datetime.utcnow(),
|
||||
last_failure_reason=reason,
|
||||
)
|
||||
|
||||
|
||||
def delete_fileserver_urls(urls_query: Q, fileserver_host: str):
|
||||
to_delete = list(UrlToDelete.objects(urls_query).limit(10000))
|
||||
if not to_delete:
|
||||
return
|
||||
|
||||
paths = set()
|
||||
path_to_id_mapping = defaultdict(list)
|
||||
for url in to_delete:
|
||||
try:
|
||||
path = str(furl(url.url).path)
|
||||
path = path.strip("/")
|
||||
if not path:
|
||||
raise ValueError("Empty path")
|
||||
except Exception as ex:
|
||||
err = str(ex)
|
||||
log.warn(f"Error getting path for {url.url}: {err}")
|
||||
mark_failed(Q(id=url.id), err)
|
||||
continue
|
||||
|
||||
paths.add(path)
|
||||
path_to_id_mapping[path].append(url.id)
|
||||
|
||||
if not paths:
|
||||
return
|
||||
|
||||
ids_to_delete = set(chain.from_iterable(path_to_id_mapping.values()))
|
||||
try:
|
||||
res = requests.post(
|
||||
url=furl(fileserver_host).add(path="delete_many").url,
|
||||
json={"files": list(paths)},
|
||||
)
|
||||
res.raise_for_status()
|
||||
except Exception as ex:
|
||||
err = str(ex)
|
||||
log.warn(f"Error deleting {len(paths)} files from fileserver: {err}")
|
||||
mark_failed(Q(id__in=list(ids_to_delete)), err)
|
||||
return
|
||||
|
||||
res_data = res.json()
|
||||
deleted_ids = set(
|
||||
chain.from_iterable(
|
||||
path_to_id_mapping.get(path, []) for path in list(res_data.get("deleted", {}))
|
||||
)
|
||||
)
|
||||
if deleted_ids:
|
||||
UrlToDelete.objects(id__in=list(deleted_ids)).delete()
|
||||
log.info(f"{len(deleted_ids)} files deleted from the fileserver")
|
||||
|
||||
failed_ids = set()
|
||||
for err, error_ids in res_data.get("errors", {}).items():
|
||||
error_ids = list(
|
||||
chain.from_iterable(path_to_id_mapping.get(path, []) for path in error_ids)
|
||||
)
|
||||
mark_retry_failed(error_ids, err)
|
||||
log.warning(
|
||||
f"Failed to delete {len(error_ids)} files from the fileserver due to: {err}"
|
||||
)
|
||||
failed_ids.update(error_ids)
|
||||
|
||||
missing_ids = ids_to_delete - deleted_ids - failed_ids
|
||||
if missing_ids:
|
||||
mark_retry_failed(list(missing_ids), "Not succeeded")
|
||||
|
||||
|
||||
def run_delete_loop(fileserver_host: str):
|
||||
fileserver_host = validate_fileserver_access(fileserver_host)
|
||||
storage_delete_funcs = {
|
||||
StorageType.fileserver: partial(
|
||||
delete_fileserver_urls, fileserver_host=fileserver_host
|
||||
),
|
||||
}
|
||||
while True:
|
||||
now = datetime.utcnow()
|
||||
urls_query = (
|
||||
Q(status__ne=DeletionStatus.failed)
|
||||
& Q(retry_count__lt=max_retries)
|
||||
& (
|
||||
Q(last_failure_time__exists=False)
|
||||
| Q(last_failure_time__lt=now - retry_timeout)
|
||||
)
|
||||
)
|
||||
|
||||
url_to_delete: UrlToDelete = UrlToDelete.objects(
|
||||
urls_query & Q(storage_type__in=list(storage_delete_funcs))
|
||||
).order_by("retry_count").limit(1).first()
|
||||
if not url_to_delete:
|
||||
sleep(10)
|
||||
continue
|
||||
|
||||
company = url_to_delete.company
|
||||
user = url_to_delete.user
|
||||
storage_type = url_to_delete.storage_type
|
||||
log.info(
|
||||
f"Deleting {storage_type} objects for company: {company}, user: {user}"
|
||||
)
|
||||
company_storage_urls_query = urls_query & Q(
|
||||
company=company, storage_type=storage_type,
|
||||
)
|
||||
storage_delete_funcs[storage_type](
|
||||
urls_query=company_storage_urls_query
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
|
||||
parser.add_argument(
|
||||
"--fileserver-host", "-fh", help="Fileserver host address", type=str,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
db.initialize()
|
||||
run_delete_loop(args.fileserver_host)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1489,6 +1489,13 @@ reset {
|
||||
}
|
||||
}
|
||||
}
|
||||
"2.21": ${reset."2.13"} {
|
||||
request.properties.delete_external_artifacts {
|
||||
description: "If set to 'true' then BE will try to delete the extenal artifacts associated with the task from the fileserver (if configured to do so)"
|
||||
type: boolean
|
||||
default: true
|
||||
}
|
||||
}
|
||||
}
|
||||
reset_many {
|
||||
"2.13": ${_definitions.batch_operation} {
|
||||
@ -1541,6 +1548,13 @@ reset_many {
|
||||
}
|
||||
}
|
||||
}
|
||||
"2.21": ${reset_many."2.13"} {
|
||||
request.properties.delete_external_artifacts {
|
||||
description: "If set to 'true' then BE will try to delete the extenal artifacts associated with the tasks from the fileserver (if configured to do so)"
|
||||
type: boolean
|
||||
default: true
|
||||
}
|
||||
}
|
||||
}
|
||||
delete_many {
|
||||
"2.13": ${_definitions.batch_operation} {
|
||||
@ -1591,6 +1605,13 @@ delete_many {
|
||||
}
|
||||
}
|
||||
}
|
||||
"2.21": ${delete_many."2.13"} {
|
||||
request.properties.delete_external_artifacts {
|
||||
description: "If set to 'true' then BE will try to delete the extenal artifacts associated with the tasks from the fileserver (if configured to do so)"
|
||||
type: boolean
|
||||
default: true
|
||||
}
|
||||
}
|
||||
}
|
||||
delete {
|
||||
"2.1" {
|
||||
@ -1655,6 +1676,13 @@ delete {
|
||||
}
|
||||
}
|
||||
}
|
||||
"2.21": ${delete."2.13"} {
|
||||
request.properties.delete_external_artifacts {
|
||||
description: "If set to 'true' then BE will try to delete the extenal artifacts associated with the task from the fileserver (if configured to do so)"
|
||||
type: boolean
|
||||
default: true
|
||||
}
|
||||
}
|
||||
}
|
||||
archive {
|
||||
"2.12" {
|
||||
|
@ -81,7 +81,6 @@ from apiserver.bll.task.artifacts import (
|
||||
Artifacts,
|
||||
)
|
||||
from apiserver.bll.task.hyperparams import HyperParams
|
||||
from apiserver.bll.task.non_responsive_tasks_watchdog import NonResponsiveTasksWatchdog
|
||||
from apiserver.bll.task.param_utils import (
|
||||
params_prepare_for_save,
|
||||
params_unprepare_from_saved,
|
||||
@ -942,10 +941,12 @@ def reset(call: APICall, company_id, request: ResetRequest):
|
||||
dequeued, cleanup_res, updates = reset_task(
|
||||
task_id=request.task,
|
||||
company_id=company_id,
|
||||
user_id=call.identity.user,
|
||||
force=request.force,
|
||||
return_file_urls=request.return_file_urls,
|
||||
delete_output_models=request.delete_output_models,
|
||||
clear_all=request.clear_all,
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
)
|
||||
|
||||
res = ResetResponse(**updates, dequeued=dequeued)
|
||||
@ -968,10 +969,12 @@ def reset_many(call: APICall, company_id, request: ResetManyRequest):
|
||||
func=partial(
|
||||
reset_task,
|
||||
company_id=company_id,
|
||||
user_id=call.identity.user,
|
||||
force=request.force,
|
||||
return_file_urls=request.return_file_urls,
|
||||
delete_output_models=request.delete_output_models,
|
||||
clear_all=request.clear_all,
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
),
|
||||
ids=request.ids,
|
||||
)
|
||||
@ -1069,12 +1072,14 @@ def delete(call: APICall, company_id, request: DeleteRequest):
|
||||
deleted, task, cleanup_res = delete_task(
|
||||
task_id=request.task,
|
||||
company_id=company_id,
|
||||
user_id=call.identity.user,
|
||||
move_to_trash=request.move_to_trash,
|
||||
force=request.force,
|
||||
return_file_urls=request.return_file_urls,
|
||||
delete_output_models=request.delete_output_models,
|
||||
status_message=request.status_message,
|
||||
status_reason=request.status_reason,
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
)
|
||||
if deleted:
|
||||
if request.move_to_trash:
|
||||
@ -1089,12 +1094,14 @@ def delete_many(call: APICall, company_id, request: DeleteManyRequest):
|
||||
func=partial(
|
||||
delete_task,
|
||||
company_id=company_id,
|
||||
user_id=call.identity.user,
|
||||
move_to_trash=request.move_to_trash,
|
||||
force=request.force,
|
||||
return_file_urls=request.return_file_urls,
|
||||
delete_output_models=request.delete_output_models,
|
||||
status_message=request.status_message,
|
||||
status_reason=request.status_reason,
|
||||
delete_external_artifacts=request.delete_external_artifacts,
|
||||
),
|
||||
ids=request.ids,
|
||||
)
|
||||
|
@ -6,6 +6,10 @@ download {
|
||||
cache_timeout_sec: 300
|
||||
}
|
||||
|
||||
delete {
|
||||
allow_batch: false
|
||||
}
|
||||
|
||||
cors {
|
||||
origins: "*"
|
||||
}
|
@ -2,7 +2,9 @@
|
||||
import json
|
||||
import mimetypes
|
||||
import os
|
||||
import shutil
|
||||
from argparse import ArgumentParser
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
from boltons.iterutils import first
|
||||
@ -15,6 +17,7 @@ from werkzeug.security import safe_join
|
||||
from config import config
|
||||
from utils import get_env_bool
|
||||
|
||||
log = config.logger(__file__)
|
||||
DEFAULT_UPLOAD_FOLDER = "/mnt/fileserver"
|
||||
|
||||
app = Flask(__name__)
|
||||
@ -32,6 +35,11 @@ app.config["SEND_FILE_MAX_AGE_DEFAULT"] = config.get(
|
||||
)
|
||||
|
||||
|
||||
@app.route("/", methods=["GET"])
|
||||
def ping():
|
||||
return "OK", 200
|
||||
|
||||
|
||||
@app.before_request
|
||||
def before_request():
|
||||
if request.content_encoding:
|
||||
@ -60,6 +68,8 @@ def upload():
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
file.save(str(target))
|
||||
results.append(file_path)
|
||||
|
||||
log.info(f"Uploaded {len(results)} files")
|
||||
return json.dumps(results), 200
|
||||
|
||||
|
||||
@ -83,19 +93,84 @@ def download(path):
|
||||
headers["Cache-control"] = "no-cache"
|
||||
headers["Pragma"] = "no-cache"
|
||||
headers["Expires"] = "0"
|
||||
|
||||
log.info(f"Downloaded file {str(path)}")
|
||||
return response
|
||||
|
||||
|
||||
def _get_full_path(path: str) -> Path:
|
||||
return Path(safe_join(os.fspath(app.config["UPLOAD_FOLDER"]), os.fspath(path)))
|
||||
|
||||
|
||||
@app.route("/<path:path>", methods=["DELETE"])
|
||||
def delete(path):
|
||||
real_path = Path(safe_join(os.fspath(app.config["UPLOAD_FOLDER"]), os.fspath(path)))
|
||||
real_path = _get_full_path(path)
|
||||
if not real_path.exists() or not real_path.is_file():
|
||||
abort(Response(f"File {str(path)} not found", 404))
|
||||
log.error(f"Error deleting file {str(real_path)}. Not found or not a file")
|
||||
abort(Response(f"File {str(real_path)} not found", 404))
|
||||
|
||||
real_path.unlink()
|
||||
|
||||
log.info(f"Deleted file {str(real_path)}")
|
||||
return json.dumps(str(path)), 200
|
||||
|
||||
|
||||
def batch_delete():
|
||||
body = request.get_json(force=True, silent=False)
|
||||
if not body:
|
||||
abort(Response("Json payload is missing", 400))
|
||||
files = body.get("files")
|
||||
if not files:
|
||||
abort(Response("files are missing", 400))
|
||||
|
||||
deleted = {}
|
||||
errors = defaultdict(list)
|
||||
log_errors = defaultdict(list)
|
||||
|
||||
def record_error(msg: str, file_: str, path_: Path):
|
||||
errors[msg].append(file_)
|
||||
log_errors[msg].append(str(path_))
|
||||
|
||||
for file in files:
|
||||
if not file or not file.strip("/"):
|
||||
# empty path may result in deleting all company data. Too dangerous
|
||||
record_error("Empty path not allowed", file, file)
|
||||
continue
|
||||
|
||||
path = _get_full_path(file)
|
||||
|
||||
if not path.exists():
|
||||
record_error("Not found", file, path)
|
||||
continue
|
||||
|
||||
try:
|
||||
if path.is_file():
|
||||
path.unlink()
|
||||
elif path.is_dir():
|
||||
shutil.rmtree(path)
|
||||
else:
|
||||
record_error("Not a file or folder", file, path)
|
||||
continue
|
||||
except OSError as ex:
|
||||
record_error(ex.strerror, file, path)
|
||||
continue
|
||||
except Exception as ex:
|
||||
record_error(str(ex).replace(str(path), ""), file, path)
|
||||
continue
|
||||
|
||||
deleted[file] = str(path)
|
||||
|
||||
for error, paths in log_errors.items():
|
||||
log.error(f"{len(paths)} files/folders cannot be deleted due to the {error}")
|
||||
|
||||
log.info(f"Deleted {len(deleted)} files/folders")
|
||||
return json.dumps({"deleted": deleted, "errors": errors}), 200
|
||||
|
||||
|
||||
if config.get("fileserver.delete.allow_batch"):
|
||||
app.route("/delete_many", methods=["POST"])(batch_delete)
|
||||
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
|
Loading…
Reference in New Issue
Block a user