clearml-server/fileserver/fileserver.py

220 lines
6.3 KiB
Python
Raw Normal View History

2019-06-10 21:24:35 +00:00
""" A Simple file server for uploading and downloading files """
import json
import mimetypes
2019-06-10 21:24:35 +00:00
import os
import shutil
2023-07-26 15:42:26 +00:00
import urllib.parse
2019-06-10 21:24:35 +00:00
from argparse import ArgumentParser
from collections import defaultdict
2019-06-10 21:24:35 +00:00
from pathlib import Path
2021-05-03 14:00:38 +00:00
from boltons.iterutils import first
from flask import Flask, request, send_from_directory, abort, Response
2019-07-17 15:16:43 +00:00
from flask_compress import Compress
from flask_cors import CORS
from werkzeug.exceptions import NotFound
from werkzeug.security import safe_join
2019-06-10 21:24:35 +00:00
2024-06-20 14:48:54 +00:00
from auth import AuthHandler
2019-07-17 15:16:43 +00:00
from config import config
from utils import get_env_bool
2019-06-10 21:24:35 +00:00
log = config.logger(__file__)
DEFAULT_UPLOAD_FOLDER = "/mnt/fileserver"
2019-06-10 21:24:35 +00:00
app = Flask(__name__)
2019-07-17 15:16:43 +00:00
CORS(app, **config.get("fileserver.cors"))
if get_env_bool("CLEARML_COMPRESS_RESP", default=True):
Compress(app)
2019-06-10 21:24:35 +00:00
2021-05-03 14:00:38 +00:00
app.config["UPLOAD_FOLDER"] = first(
(os.environ.get(f"{prefix}_UPLOAD_FOLDER") for prefix in ("CLEARML", "TRAINS")),
default=DEFAULT_UPLOAD_FOLDER,
2021-05-03 13:58:11 +00:00
)
app.config["SEND_FILE_MAX_AGE_DEFAULT"] = config.get(
"fileserver.download.cache_timeout_sec", 5 * 60
)
if max_upload_size := config.get("fileserver.upload.max_upload_size_mb", None):
2024-06-26 08:21:53 +00:00
app.config["MAX_CONTENT_LENGTH"] = max_upload_size * 1024 * 1024
2024-06-20 14:48:54 +00:00
auth_handler = AuthHandler.instance()
2019-06-10 21:24:35 +00:00
@app.route("/", methods=["GET"])
def ping():
2024-06-20 14:48:54 +00:00
if auth_handler and auth_handler.get_token(request):
auth_handler.validate(request)
return "OK", 200
@app.before_request
def before_request():
if request.content_encoding:
return f"Content encoding is not supported ({request.content_encoding})", 415
2022-03-15 14:21:52 +00:00
@app.after_request
def after_request(response):
response.headers["server"] = config.get(
"fileserver.response.headers.server", "clearml"
)
2022-03-15 14:21:52 +00:00
return response
2019-06-10 21:24:35 +00:00
@app.route("/", methods=["POST"])
def upload():
2024-06-20 14:48:54 +00:00
if auth_handler:
auth_handler.validate(request)
2019-06-10 21:24:35 +00:00
results = []
for filename, file in request.files.items():
if not filename:
continue
file_path = filename.lstrip(os.sep)
safe_path = safe_join(app.config["UPLOAD_FOLDER"], file_path)
if safe_path is None:
raise NotFound()
target = Path(safe_path)
2019-06-10 21:24:35 +00:00
target.parent.mkdir(parents=True, exist_ok=True)
file.save(str(target))
results.append(file_path)
log.info(f"Uploaded {len(results)} files")
2021-05-03 14:00:38 +00:00
return json.dumps(results), 200
2019-06-10 21:24:35 +00:00
@app.route("/<path:path>", methods=["GET"])
def download(path):
2024-06-20 14:48:54 +00:00
if auth_handler:
auth_handler.validate(request)
2021-01-05 15:31:24 +00:00
as_attachment = "download" in request.args
_, encoding = mimetypes.guess_type(os.path.basename(path))
mimetype = "application/octet-stream" if encoding == "gzip" else None
2021-05-03 13:58:11 +00:00
response = send_from_directory(
app.config["UPLOAD_FOLDER"],
path,
as_attachment=as_attachment,
mimetype=mimetype,
2021-05-03 13:58:11 +00:00
)
2019-07-17 15:16:43 +00:00
if config.get("fileserver.download.disable_browser_caching", False):
headers = response.headers
headers["Pragma-directive"] = "no-cache"
headers["Cache-directive"] = "no-cache"
headers["Cache-control"] = "no-cache"
headers["Pragma"] = "no-cache"
headers["Expires"] = "0"
log.info(f"Downloaded file {str(path)}")
2019-07-17 15:16:43 +00:00
return response
2019-06-10 21:24:35 +00:00
def _get_full_path(path: str) -> Path:
return Path(safe_join(os.fspath(app.config["UPLOAD_FOLDER"]), os.fspath(path)))
@app.route("/<path:path>", methods=["DELETE"])
def delete(path):
2024-06-20 14:48:54 +00:00
if auth_handler:
auth_handler.validate(request)
full_path = _get_full_path(path)
if not full_path.exists() or not full_path.is_file():
log.error(f"Error deleting file {str(full_path)}. Not found or not a file")
abort(Response(f"File {str(path)} not found", 404))
2021-05-03 13:58:11 +00:00
full_path.unlink()
log.info(f"Deleted file {str(full_path)}")
2021-05-03 13:58:11 +00:00
return json.dumps(str(path)), 200
def batch_delete():
2024-06-20 14:48:54 +00:00
if auth_handler:
auth_handler.validate(request)
body = request.get_json(force=True, silent=False)
if not body:
abort(Response("Json payload is missing", 400))
files = body.get("files")
if not files:
abort(Response("files are missing", 400))
deleted = {}
errors = defaultdict(list)
log_errors = defaultdict(list)
def record_error(msg: str, file_, path_):
errors[msg].append(str(file_))
log_errors[msg].append(str(path_))
for file in files:
2023-07-26 15:42:26 +00:00
path = urllib.parse.unquote_plus(file)
if not path or not path.strip("/"):
# empty path may result in deleting all company data. Too dangerous
record_error("Empty path not allowed", file, path)
continue
full_path = _get_full_path(path)
if not full_path.exists():
record_error("Not found", file, path)
continue
try:
if full_path.is_file():
full_path.unlink()
elif full_path.is_dir():
shutil.rmtree(full_path)
else:
record_error("Not a file or folder", file, path)
continue
except OSError as ex:
record_error(ex.strerror, file, path)
continue
except Exception as ex:
record_error(str(ex).replace(str(full_path), ""), file, path)
continue
deleted[file] = str(path)
for error, paths in log_errors.items():
log.error(f"{len(paths)} files/folders cannot be deleted due to the {error}")
log.info(f"Deleted {len(deleted)} files/folders")
return json.dumps({"deleted": deleted, "errors": errors}), 200
if config.get("fileserver.delete.allow_batch"):
app.route("/delete_many", methods=["POST"])(batch_delete)
2019-06-10 21:24:35 +00:00
def main():
parser = ArgumentParser(description=__doc__)
parser.add_argument(
"--port", "-p", type=int, default=8081, help="Port (default %(default)d)"
)
parser.add_argument(
"--ip", "-i", type=str, default="0.0.0.0", help="Address (default %(default)s)"
)
parser.add_argument("--debug", action="store_true", default=False)
parser.add_argument(
"--upload-folder",
"-u",
help=f"Upload folder (default {DEFAULT_UPLOAD_FOLDER})",
2019-06-10 21:24:35 +00:00
)
args = parser.parse_args()
if args.upload_folder is not None:
app.config["UPLOAD_FOLDER"] = args.upload_folder
2019-06-10 21:24:35 +00:00
app.run(debug=args.debug, host=args.ip, port=args.port, threaded=True)
if __name__ == "__main__":
main()