mirror of
				https://github.com/clearml/clearml
				synced 2025-06-26 18:16:07 +00:00 
			
		
		
		
	Support providing list of links in clearml-data
This commit is contained in:
		
							parent
							
								
									a6104347f2
								
							
						
					
					
						commit
						c2b4f728f4
					
				| @ -373,7 +373,7 @@ class Dataset(object): | ||||
| 
 | ||||
|     def add_external_files( | ||||
|         self, | ||||
|         source_url,  # type: str | ||||
|         source_url,  # type: Union[str, Sequence[str]] | ||||
|         wildcard=None,  # type: Optional[Union[str, Sequence[str]]] | ||||
|         dataset_path=None,  # type: Optional[str] | ||||
|         recursive=True,  # type: bool | ||||
| @ -396,8 +396,8 @@ class Dataset(object): | ||||
|         - Add the local file "/folder/local_file.jpg" to the dataset. | ||||
|         add_external_files(source_url="file:///folder/local_file.jpg", dataset_path="/my_dataset/new_folder/") | ||||
| 
 | ||||
|         :param source_url: Source url link to add to the dataset, | ||||
|             e.g. s3://bucket/folder/path, s3://bucket/folder/file.csv | ||||
|         :param source_url: Source url link (e.g. s3://bucket/folder/path) or list/tuple of links to add to | ||||
|             the dataset (e.g. [s3://bucket/folder/file.csv, http://web.com/file.txt]) | ||||
|         :param wildcard: add only specific set of files. | ||||
|             Wildcard matching, can be a single string or a list of wildcards. | ||||
|         :param dataset_path: The location in the dataset where the file will be downloaded into. | ||||
| @ -407,7 +407,18 @@ class Dataset(object): | ||||
|         :param verbose: If True print to console files added/modified | ||||
|         :return: number of file links added | ||||
|         """ | ||||
|         num_added = 0 | ||||
|         self._dirty = True | ||||
|         if not isinstance(source_url, str): | ||||
|             for source_url_ in source_url: | ||||
|                 num_added += self.add_external_files( | ||||
|                         source_url_, | ||||
|                         wildcard=wildcard, | ||||
|                         dataset_path=dataset_path, | ||||
|                         recursive=recursive, | ||||
|                         verbose=verbose | ||||
|                 ) | ||||
|             return num_added | ||||
|         if dataset_path: | ||||
|             dataset_path = dataset_path.lstrip("/") | ||||
|         # noinspection PyBroadException | ||||
| @ -419,11 +430,10 @@ class Dataset(object): | ||||
|                     source_url = source_url + "/" | ||||
|                 links = StorageManager.list(source_url, return_full_path=True) | ||||
|         except Exception: | ||||
|             self._task.get_logger().warning( | ||||
|                 "Could not list remote file(s) when adding {}".format(source_url) | ||||
|             self._task.get_logger().report_text( | ||||
|                 "Could not list/find remote file(s) when adding {}".format(source_url) | ||||
|             ) | ||||
|             return 0 | ||||
|         num_added = 0 | ||||
|         num_modified = 0 | ||||
|         for link in links: | ||||
|             relative_path = link[len(source_url):] | ||||
|  | ||||
| @ -518,7 +518,7 @@ class StorageHelper(object): | ||||
|         try: | ||||
|             if isinstance(self._driver, _HttpDriver) and obj: | ||||
|                 obj = self._driver._get_download_object(obj)  # noqa | ||||
|                 size = obj.headers.get("Content-Length", 0) | ||||
|                 size = int(obj.headers.get("Content-Length", 0)) | ||||
|             elif hasattr(obj, "size"): | ||||
|                 size = obj.size | ||||
|                 # Google storage has the option to reload the object to get the size | ||||
|  | ||||
| @ -9,6 +9,7 @@ from typing import List, Optional | ||||
| from zipfile import ZipFile | ||||
| from six.moves.urllib.parse import urlparse | ||||
| 
 | ||||
| import requests | ||||
| from pathlib2 import Path | ||||
| 
 | ||||
| from .cache import CacheManager | ||||
| @ -313,8 +314,10 @@ class StorageManager(object): | ||||
| 
 | ||||
|         :return: True is the remote_url stores a file and False otherwise | ||||
|         """ | ||||
|         if remote_url.startswith('file://'): | ||||
|             return os.path.isfile(remote_url[len('file://'):]) | ||||
|         if remote_url.startswith("file://"): | ||||
|             return os.path.isfile(remote_url[len("file://"):]) | ||||
|         if remote_url.startswith("http://") or remote_url.startswith("https://"): | ||||
|             return requests.head(remote_url).status_code == requests.codes.ok | ||||
|         helper = StorageHelper.get(remote_url) | ||||
|         obj = helper.get_object(remote_url) | ||||
|         if not obj: | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 allegroai
						allegroai