From 03d2f808b3b321e2c0b4e74e9c21b8caa98618f4 Mon Sep 17 00:00:00 2001 From: Vignesh D Date: Sun, 15 Sep 2024 21:21:37 +0530 Subject: [PATCH 1/2] handle scenarios of local file upload without file:// scheme. Fetch the absolute path for the file in such cases and use it as source url --- clearml/datasets/dataset.py | 4 +++- clearml/storage/helper.py | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py index 4ac0e001..c646b749 100644 --- a/clearml/datasets/dataset.py +++ b/clearml/datasets/dataset.py @@ -3350,6 +3350,8 @@ class Dataset(object): # noinspection PyBroadException try: if StorageManager.exists_file(source_url): + # handle local path provided without scheme + source_url = StorageHelper.sanitize_url(source_url) remote_objects = [StorageManager.get_metadata(source_url, return_full_path=True)] elif not source_url.startswith(("http://", "https://")): if source_url[-1] != "/": @@ -3368,7 +3370,7 @@ class Dataset(object): link = remote_object.get("name") relative_path = link[len(source_url):] if not relative_path: - relative_path = source_url.split("/")[-1] + relative_path = os.path.basename(source_url) if not matches_any_wildcard(relative_path, wildcard, recursive=recursive): continue try: diff --git a/clearml/storage/helper.py b/clearml/storage/helper.py index 564342d6..8d10669c 100644 --- a/clearml/storage/helper.py +++ b/clearml/storage/helper.py @@ -3067,6 +3067,14 @@ class StorageHelper(object): return self._driver.exists_file( container_name=self._container.name if self._container else "", object_name=object_name ) + + @classmethod + def sanitize_url(cls, remote_url): + base_url = cls._resolve_base_url(remote_url) + if base_url != 'file://': + return remote_url + absoulte_path = os.path.abspath(remote_url) + return base_url + absoulte_path def normalize_local_path(local_path): From 2b609464fda3150406e238b9c45e763ddb3f45d6 Mon Sep 17 00:00:00 2001 From: Vignesh D Date: Wed, 18 Sep 2024 20:55:57 +0530 Subject: [PATCH 2/2] skip adding scheme to the path if the scheme is already provided --- clearml/storage/helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml/storage/helper.py b/clearml/storage/helper.py index 8d10669c..0877cf79 100644 --- a/clearml/storage/helper.py +++ b/clearml/storage/helper.py @@ -3071,7 +3071,7 @@ class StorageHelper(object): @classmethod def sanitize_url(cls, remote_url): base_url = cls._resolve_base_url(remote_url) - if base_url != 'file://': + if base_url != 'file://' or remote_url.startswith("file://"): return remote_url absoulte_path = os.path.abspath(remote_url) return base_url + absoulte_path