mirror of
				https://github.com/clearml/clearml
				synced 2025-06-26 18:16:07 +00:00 
			
		
		
		
	Fix StorageManager.list() does not return size metadata (#865)
				
					
				
			This commit is contained in:
		
							parent
							
								
									2120fc85f5
								
							
						
					
					
						commit
						1a62d4b6e1
					
				| @ -2030,30 +2030,53 @@ class Dataset(object): | |||||||
|         modified_files_size = 0 |         modified_files_size = 0 | ||||||
|         removed_files_count = 0 |         removed_files_count = 0 | ||||||
|         removed_files_size = 0 |         removed_files_size = 0 | ||||||
|  | 
 | ||||||
|  |         def update_changes(entries, parent_entries): | ||||||
|  |             nonlocal total_size | ||||||
|  |             nonlocal modified_files_count | ||||||
|  |             nonlocal modified_files_size | ||||||
|  |             nonlocal added_files_count | ||||||
|  |             nonlocal added_files_size | ||||||
|  |             nonlocal removed_files_count | ||||||
|  |             nonlocal removed_files_size | ||||||
|  | 
 | ||||||
|  |             for file in entries.values(): | ||||||
|  |                 # noinspection PyBroadException | ||||||
|  |                 try: | ||||||
|  |                     total_size += file.size | ||||||
|  |                     if file.parent_dataset_id == self._id: | ||||||
|  |                         if file.relative_path in parent_file_entries: | ||||||
|  |                             modified_files_count += 1 | ||||||
|  |                             modified_files_size += file.size - parent_file_entries[file.relative_path].size | ||||||
|  |                         else: | ||||||
|  |                             added_files_count += 1 | ||||||
|  |                             added_files_size += file.size | ||||||
|  |                 except Exception: | ||||||
|  |                     pass | ||||||
|  |             for parent_entry_key, parent_entry_value in parent_entries.items(): | ||||||
|  |                 # noinspection PyBroadException | ||||||
|  |                 try: | ||||||
|  |                     if parent_entry_key not in entries: | ||||||
|  |                         removed_files_count += 1 | ||||||
|  |                         removed_files_size -= parent_entry_value.size | ||||||
|  |                 except Exception: | ||||||
|  |                     pass | ||||||
|  | 
 | ||||||
|         parent_datasets_ids = self._dependency_graph[self._id] |         parent_datasets_ids = self._dependency_graph[self._id] | ||||||
|         parent_file_entries = dict()  # type: Dict[str, FileEntry] |         parent_file_entries = dict()  # type: Dict[str, FileEntry] | ||||||
|  |         parent_link_entries = dict()  # type: Dict[str, LinkEntry] | ||||||
|         for parent_dataset_id in parent_datasets_ids: |         for parent_dataset_id in parent_datasets_ids: | ||||||
|             if parent_dataset_id == self._id: |             if parent_dataset_id == self._id: | ||||||
|                 continue |                 continue | ||||||
|             parent_dataset = self.get(parent_dataset_id) |             parent_dataset = self.get(parent_dataset_id) | ||||||
|             parent_file_entries.update(parent_dataset._dataset_file_entries) |             parent_file_entries.update(parent_dataset._dataset_file_entries) | ||||||
|  |             parent_link_entries.update(parent_dataset._dataset_link_entries) | ||||||
|         # we have to do this after we update the parent_file_entries because we might |         # we have to do this after we update the parent_file_entries because we might | ||||||
|         # have duplicate file entries |         # have duplicate file entries | ||||||
|         for parent_file_entry_key, parent_file_entry_value in parent_file_entries.items(): |         update_changes(self._dataset_file_entries, parent_file_entries) | ||||||
|             if parent_file_entry_key not in self._dataset_file_entries: |         update_changes(self._dataset_link_entries, parent_link_entries) | ||||||
|                 removed_files_count += 1 |  | ||||||
|                 removed_files_size -= parent_file_entry_value.size |  | ||||||
|         for file in self._dataset_file_entries.values(): |  | ||||||
|             total_size += file.size |  | ||||||
|             if file.parent_dataset_id == self._id: |  | ||||||
|                 if file.relative_path in parent_file_entries: |  | ||||||
|                     modified_files_count += 1 |  | ||||||
|                     modified_files_size += file.size - parent_file_entries[file.relative_path].size |  | ||||||
|                 else: |  | ||||||
|                     added_files_count += 1 |  | ||||||
|                     added_files_size += file.size |  | ||||||
|         state = dict( |         state = dict( | ||||||
|             file_count=len(self._dataset_file_entries), |             file_count=len(self._dataset_file_entries) + len(self._dataset_link_entries), | ||||||
|             total_size=total_size, |             total_size=total_size, | ||||||
|             dataset_file_entries=[f.as_dict() for f in self._dataset_file_entries.values()], |             dataset_file_entries=[f.as_dict() for f in self._dataset_file_entries.values()], | ||||||
|             dataset_link_entries=[link.as_dict() for link in self._dataset_link_entries.values()], |             dataset_link_entries=[link.as_dict() for link in self._dataset_link_entries.values()], | ||||||
| @ -2743,7 +2766,7 @@ class Dataset(object): | |||||||
|                 file_name = file.link |                 file_name = file.link | ||||||
|             dataset_details += "{}, {}, {}".format( |             dataset_details += "{}, {}, {}".format( | ||||||
|                 file_name, |                 file_name, | ||||||
|                 file.size if file.size is not None and not hasattr(file, "link") else "", |                 file.size if file.size is not None else "", | ||||||
|                 file.hash if file.hash else "", |                 file.hash if file.hash else "", | ||||||
|             ) |             ) | ||||||
|             preview_index += 1 |             preview_index += 1 | ||||||
|  | |||||||
| @ -619,13 +619,6 @@ class StorageHelper(object): | |||||||
|             if isinstance(self._driver, _HttpDriver) and obj: |             if isinstance(self._driver, _HttpDriver) and obj: | ||||||
|                 obj = self._driver._get_download_object(obj)  # noqa |                 obj = self._driver._get_download_object(obj)  # noqa | ||||||
|                 size = int(obj.headers.get("Content-Length", 0)) |                 size = int(obj.headers.get("Content-Length", 0)) | ||||||
|             elif isinstance(self._driver, _Boto3Driver) and obj: |  | ||||||
|                 # noinspection PyBroadException |  | ||||||
|                 try: |  | ||||||
|                     # To catch botocore exceptions |  | ||||||
|                     size = obj.content_length  # noqa |  | ||||||
|                 except Exception: |  | ||||||
|                     pass |  | ||||||
|             elif hasattr(obj, "size"): |             elif hasattr(obj, "size"): | ||||||
|                 size = obj.size |                 size = obj.size | ||||||
|                 # Google storage has the option to reload the object to get the size |                 # Google storage has the option to reload the object to get the size | ||||||
| @ -633,7 +626,12 @@ class StorageHelper(object): | |||||||
|                     obj.reload() |                     obj.reload() | ||||||
|                     size = obj.size |                     size = obj.size | ||||||
|             elif hasattr(obj, "content_length"): |             elif hasattr(obj, "content_length"): | ||||||
|                 size = obj.content_length |                 # noinspection PyBroadException | ||||||
|  |                 try: | ||||||
|  |                     # To catch botocore exceptions | ||||||
|  |                     size = obj.content_length  # noqa | ||||||
|  |                 except Exception: | ||||||
|  |                     pass | ||||||
|         except (ValueError, AttributeError, KeyError): |         except (ValueError, AttributeError, KeyError): | ||||||
|             pass |             pass | ||||||
|         return size |         return size | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 allegroai
						allegroai