From 7f822e7626a531f55d4f05eb2bf21938514f5b22 Mon Sep 17 00:00:00 2001 From: vignesh Date: Fri, 18 Apr 2025 18:41:47 +0530 Subject: [PATCH] Fix local file uploads without scheme (#1326) * handle scenarios of local file upload without file:// scheme. Fetch the absolute path for the file in such cases and use it as source url * Update README gifs (#1327) * skip adding scheme to the path if the scheme is already provided * handle scenarios of local file upload without file:// scheme. Fetch the absolute path for the file in such cases and use it as source url * skip adding scheme to the path if the scheme is already provided --------- Co-authored-by: Vignesh D Co-authored-by: pollfly <75068813+pollfly@users.noreply.github.com> --- clearml/datasets/dataset.py | 4 +++- clearml/storage/helper.py | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py index b8ea6317..e6d91068 100644 --- a/clearml/datasets/dataset.py +++ b/clearml/datasets/dataset.py @@ -3639,6 +3639,8 @@ class Dataset(object): # noinspection PyBroadException try: if StorageManager.exists_file(source_url): + # handle local path provided without scheme + source_url = StorageHelper.sanitize_url(source_url) remote_objects = [StorageManager.get_metadata(source_url, return_full_path=True)] elif not source_url.startswith(("http://", "https://")): if source_url[-1] != "/": @@ -3655,7 +3657,7 @@ class Dataset(object): link = remote_object.get("name") relative_path = link[len(source_url) :] if not relative_path: - relative_path = source_url.split("/")[-1] + relative_path = os.path.basename(source_url) if not matches_any_wildcard(relative_path, wildcard, recursive=recursive): continue try: diff --git a/clearml/storage/helper.py b/clearml/storage/helper.py index e08173c8..9e0d234b 100644 --- a/clearml/storage/helper.py +++ b/clearml/storage/helper.py @@ -3507,6 +3507,14 @@ class StorageHelper(object): container_name=self._container.name if self._container else "", object_name=object_name, ) + + @classmethod + def sanitize_url(cls, remote_url): + base_url = cls._resolve_base_url(remote_url) + if base_url != 'file://' or remote_url.startswith("file://"): + return remote_url + absoulte_path = os.path.abspath(remote_url) + return base_url + absoulte_path def normalize_local_path(local_path: str) -> Path: