mirror of
https://github.com/clearml/clearml
synced 2025-03-03 10:42:00 +00:00
Fix StorageManager.list()
does not return size metadata (#865)
This commit is contained in:
parent
2120fc85f5
commit
1a62d4b6e1
@ -2030,30 +2030,53 @@ class Dataset(object):
|
||||
modified_files_size = 0
|
||||
removed_files_count = 0
|
||||
removed_files_size = 0
|
||||
|
||||
def update_changes(entries, parent_entries):
|
||||
nonlocal total_size
|
||||
nonlocal modified_files_count
|
||||
nonlocal modified_files_size
|
||||
nonlocal added_files_count
|
||||
nonlocal added_files_size
|
||||
nonlocal removed_files_count
|
||||
nonlocal removed_files_size
|
||||
|
||||
for file in entries.values():
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
total_size += file.size
|
||||
if file.parent_dataset_id == self._id:
|
||||
if file.relative_path in parent_file_entries:
|
||||
modified_files_count += 1
|
||||
modified_files_size += file.size - parent_file_entries[file.relative_path].size
|
||||
else:
|
||||
added_files_count += 1
|
||||
added_files_size += file.size
|
||||
except Exception:
|
||||
pass
|
||||
for parent_entry_key, parent_entry_value in parent_entries.items():
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
if parent_entry_key not in entries:
|
||||
removed_files_count += 1
|
||||
removed_files_size -= parent_entry_value.size
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
parent_datasets_ids = self._dependency_graph[self._id]
|
||||
parent_file_entries = dict() # type: Dict[str, FileEntry]
|
||||
parent_link_entries = dict() # type: Dict[str, LinkEntry]
|
||||
for parent_dataset_id in parent_datasets_ids:
|
||||
if parent_dataset_id == self._id:
|
||||
continue
|
||||
parent_dataset = self.get(parent_dataset_id)
|
||||
parent_file_entries.update(parent_dataset._dataset_file_entries)
|
||||
parent_link_entries.update(parent_dataset._dataset_link_entries)
|
||||
# we have to do this after we update the parent_file_entries because we might
|
||||
# have duplicate file entries
|
||||
for parent_file_entry_key, parent_file_entry_value in parent_file_entries.items():
|
||||
if parent_file_entry_key not in self._dataset_file_entries:
|
||||
removed_files_count += 1
|
||||
removed_files_size -= parent_file_entry_value.size
|
||||
for file in self._dataset_file_entries.values():
|
||||
total_size += file.size
|
||||
if file.parent_dataset_id == self._id:
|
||||
if file.relative_path in parent_file_entries:
|
||||
modified_files_count += 1
|
||||
modified_files_size += file.size - parent_file_entries[file.relative_path].size
|
||||
else:
|
||||
added_files_count += 1
|
||||
added_files_size += file.size
|
||||
update_changes(self._dataset_file_entries, parent_file_entries)
|
||||
update_changes(self._dataset_link_entries, parent_link_entries)
|
||||
state = dict(
|
||||
file_count=len(self._dataset_file_entries),
|
||||
file_count=len(self._dataset_file_entries) + len(self._dataset_link_entries),
|
||||
total_size=total_size,
|
||||
dataset_file_entries=[f.as_dict() for f in self._dataset_file_entries.values()],
|
||||
dataset_link_entries=[link.as_dict() for link in self._dataset_link_entries.values()],
|
||||
@ -2743,7 +2766,7 @@ class Dataset(object):
|
||||
file_name = file.link
|
||||
dataset_details += "{}, {}, {}".format(
|
||||
file_name,
|
||||
file.size if file.size is not None and not hasattr(file, "link") else "",
|
||||
file.size if file.size is not None else "",
|
||||
file.hash if file.hash else "",
|
||||
)
|
||||
preview_index += 1
|
||||
|
@ -619,13 +619,6 @@ class StorageHelper(object):
|
||||
if isinstance(self._driver, _HttpDriver) and obj:
|
||||
obj = self._driver._get_download_object(obj) # noqa
|
||||
size = int(obj.headers.get("Content-Length", 0))
|
||||
elif isinstance(self._driver, _Boto3Driver) and obj:
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
# To catch botocore exceptions
|
||||
size = obj.content_length # noqa
|
||||
except Exception:
|
||||
pass
|
||||
elif hasattr(obj, "size"):
|
||||
size = obj.size
|
||||
# Google storage has the option to reload the object to get the size
|
||||
@ -633,7 +626,12 @@ class StorageHelper(object):
|
||||
obj.reload()
|
||||
size = obj.size
|
||||
elif hasattr(obj, "content_length"):
|
||||
size = obj.content_length
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
# To catch botocore exceptions
|
||||
size = obj.content_length # noqa
|
||||
except Exception:
|
||||
pass
|
||||
except (ValueError, AttributeError, KeyError):
|
||||
pass
|
||||
return size
|
||||
|
Loading…
Reference in New Issue
Block a user