mirror of
				https://github.com/clearml/clearml
				synced 2025-06-26 18:16:07 +00:00 
			
		
		
		
	Adding appropriate error when inputing URL's in add_files function (instead of using add_external_files)
This commit is contained in:
		
							parent
							
								
									eaeadb18e3
								
							
						
					
					
						commit
						50ad7b09a1
					
				| @ -380,58 +380,75 @@ class Dataset(object): | |||||||
|         :param tags: A list of tags which describe the Task to add. |         :param tags: A list of tags which describe the Task to add. | ||||||
|         """ |         """ | ||||||
|         self._task.add_tags(tags) |         self._task.add_tags(tags) | ||||||
|  | import re | ||||||
| 
 | 
 | ||||||
|     def add_files( | def is_url(path): | ||||||
|             self, |     """ | ||||||
|             path,  # type: Union[str, Path, _Path] |     Helper function to check if the provided path is an external URL (e.g., s3://, http://). | ||||||
|             wildcard=None,  # type: Optional[Union[str, Sequence[str]]] |     """ | ||||||
|             local_base_folder=None,  # type: Optional[str] |     url_regex = re.compile( | ||||||
|             dataset_path=None,  # type: Optional[str] |         r'^(?:http|ftp|s3|gs|azure)://'  # schemes: http, ftp, s3, gs, azure | ||||||
|             recursive=True,  # type: bool |     ) | ||||||
|             verbose=False,  # type: bool |     return url_regex.match(path) is not None | ||||||
|             max_workers=None,  # type: Optional[int] |  | ||||||
|     ): |  | ||||||
|         # type: (...) -> () |  | ||||||
|         """ |  | ||||||
|         Add a folder into the current dataset. calculate file hash, |  | ||||||
|         and compare against parent, mark files to be uploaded |  | ||||||
| 
 | 
 | ||||||
|         :param path: Add a folder/file to the dataset | def add_files( | ||||||
|         :param wildcard: add only specific set of files. |         self, | ||||||
|             Wildcard matching, can be a single string or a list of wildcards. |         path,  # type: Union[str, Path, _Path] | ||||||
|         :param local_base_folder: files will be located based on their relative path from local_base_folder |         wildcard=None,  # type: Optional[Union[str, Sequence[str]]] | ||||||
|         :param dataset_path: where in the dataset the folder/files should be located |         local_base_folder=None,  # type: Optional[str] | ||||||
|         :param recursive: If True, match all wildcard files recursively |         dataset_path=None,  # type: Optional[str] | ||||||
|         :param verbose: If True, print to console files added/modified |         recursive=True,  # type: bool | ||||||
|         :param max_workers: The number of threads to add the files with. Defaults to the number of logical cores |         verbose=False,  # type: bool | ||||||
|         :return: number of files added |         max_workers=None,  # type: Optional[int] | ||||||
|         """ | ): | ||||||
|         max_workers = max_workers or psutil.cpu_count() |     # type: (...) -> () | ||||||
|         self._dirty = True |     """ | ||||||
|         self._task.get_logger().report_text( |     Add a folder into the current dataset. calculate file hash, | ||||||
|             'Adding files to dataset: {}'.format( |     and compare against parent, mark files to be uploaded | ||||||
|                 dict(path=path, wildcard=wildcard, local_base_folder=local_base_folder, |  | ||||||
|                      dataset_path=dataset_path, recursive=recursive, verbose=verbose)), |  | ||||||
|             print_console=False) |  | ||||||
| 
 | 
 | ||||||
|         num_added, num_modified = self._add_files( |     :param path: Add a folder/file to the dataset | ||||||
|             path=path, |     :param wildcard: add only specific set of files. | ||||||
|             wildcard=wildcard, |         Wildcard matching, can be a single string or a list of wildcards. | ||||||
|             local_base_folder=local_base_folder, |     :param local_base_folder: files will be located based on their relative path from local_base_folder | ||||||
|             dataset_path=dataset_path, |     :param dataset_path: where in the dataset the folder/files should be located | ||||||
|             recursive=recursive, |     :param recursive: If True, match all wildcard files recursively | ||||||
|             verbose=verbose, |     :param verbose: If True, print to console files added/modified | ||||||
|             max_workers=max_workers, |     :param max_workers: The number of threads to add the files with. Defaults to the number of logical cores | ||||||
|  |     :return: number of files added | ||||||
|  |     """ | ||||||
|  |     # Check if the path provided is a URL, if so, raise an error and suggest using add_external_files | ||||||
|  |     if is_url(path): | ||||||
|  |         raise ValueError( | ||||||
|  |             "The path provided seems to be an external URL (e.g., s3://, http://). " | ||||||
|  |             "Please use `add_external_files()` to add external files to the dataset." | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         # update the task script |     max_workers = max_workers or psutil.cpu_count() | ||||||
|         self._add_script_call( |     self._dirty = True | ||||||
|             'add_files', path=path, wildcard=wildcard, local_base_folder=local_base_folder, |     self._task.get_logger().report_text( | ||||||
|             dataset_path=dataset_path, recursive=recursive) |         'Adding files to dataset: {}'.format( | ||||||
|  |             dict(path=path, wildcard=wildcard, local_base_folder=local_base_folder, | ||||||
|  |                  dataset_path=dataset_path, recursive=recursive, verbose=verbose)), | ||||||
|  |         print_console=False) | ||||||
| 
 | 
 | ||||||
|         self._serialize() |     num_added, num_modified = self._add_files( | ||||||
|  |         path=path, | ||||||
|  |         wildcard=wildcard, | ||||||
|  |         local_base_folder=local_base_folder, | ||||||
|  |         dataset_path=dataset_path, | ||||||
|  |         recursive=recursive, | ||||||
|  |         verbose=verbose, | ||||||
|  |         max_workers=max_workers, | ||||||
|  |     ) | ||||||
| 
 | 
 | ||||||
|         return num_added |     # update the task script | ||||||
|  |     self._add_script_call( | ||||||
|  |         'add_files', path=path, wildcard=wildcard, local_base_folder=local_base_folder, | ||||||
|  |         dataset_path=dataset_path, recursive=recursive) | ||||||
|  | 
 | ||||||
|  |     self._serialize() | ||||||
|  | 
 | ||||||
|  |     return num_added | ||||||
| 
 | 
 | ||||||
|     def add_external_files( |     def add_external_files( | ||||||
|         self, |         self, | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user