mirror of
https://github.com/clearml/clearml
synced 2025-06-26 18:16:07 +00:00
Use sha256sum from storage.util
This commit is contained in:
parent
0be016d3a5
commit
32c134eaa7
@ -1,4 +1,3 @@
|
|||||||
import hashlib
|
|
||||||
import json
|
import json
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import os
|
import os
|
||||||
@ -25,6 +24,7 @@ from ..backend_api.services import tasks
|
|||||||
from ..backend_interface.metrics.events import UploadEvent
|
from ..backend_interface.metrics.events import UploadEvent
|
||||||
from ..debugging.log import LoggerRoot
|
from ..debugging.log import LoggerRoot
|
||||||
from ..storage.helper import remote_driver_schemes
|
from ..storage.helper import remote_driver_schemes
|
||||||
|
from ..storage.util import sha256sum
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -536,7 +536,7 @@ class Artifacts(object):
|
|||||||
local_filename.as_posix()))
|
local_filename.as_posix()))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
file_hash, _ = self.sha256sum(local_filename.as_posix())
|
file_hash, _ = sha256sum(local_filename.as_posix(), block_size=Artifacts._hash_block_size)
|
||||||
file_size = local_filename.stat().st_size
|
file_size = local_filename.stat().st_size
|
||||||
|
|
||||||
uri = self._upload_local_file(local_filename, name,
|
uri = self._upload_local_file(local_filename, name,
|
||||||
@ -634,7 +634,8 @@ class Artifacts(object):
|
|||||||
os.close(fd)
|
os.close(fd)
|
||||||
local_csv = Path(local_csv)
|
local_csv = Path(local_csv)
|
||||||
pd_artifact.to_csv(local_csv.as_posix(), index=False, compression=self._compression)
|
pd_artifact.to_csv(local_csv.as_posix(), index=False, compression=self._compression)
|
||||||
current_sha2, file_sha2 = self.sha256sum(local_csv.as_posix(), skip_header=32)
|
current_sha2, file_sha2 = sha256sum(
|
||||||
|
local_csv.as_posix(), skip_header=32, block_size=Artifacts._hash_block_size)
|
||||||
if name in self._last_artifacts_upload:
|
if name in self._last_artifacts_upload:
|
||||||
previous_sha2 = self._last_artifacts_upload[name]
|
previous_sha2 = self._last_artifacts_upload[name]
|
||||||
if previous_sha2 == current_sha2:
|
if previous_sha2 == current_sha2:
|
||||||
@ -806,28 +807,3 @@ class Artifacts(object):
|
|||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
self._storage_prefix = self._task._get_output_destination_suffix()
|
self._storage_prefix = self._task._get_output_destination_suffix()
|
||||||
return self._storage_prefix
|
return self._storage_prefix
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def sha256sum(filename, skip_header=0):
|
|
||||||
# type: (str, int) -> (Optional[str], Optional[str])
|
|
||||||
# create sha2 of the file, notice we skip the header of the file (32 bytes)
|
|
||||||
# because sometimes that is the only change
|
|
||||||
h = hashlib.sha256()
|
|
||||||
file_hash = hashlib.sha256()
|
|
||||||
b = bytearray(Artifacts._hash_block_size)
|
|
||||||
mv = memoryview(b)
|
|
||||||
try:
|
|
||||||
with open(filename, 'rb', buffering=0) as f:
|
|
||||||
# skip header
|
|
||||||
if skip_header:
|
|
||||||
file_hash.update(f.read(skip_header))
|
|
||||||
# noinspection PyUnresolvedReferences
|
|
||||||
for n in iter(lambda: f.readinto(mv), 0):
|
|
||||||
h.update(mv[:n])
|
|
||||||
if skip_header:
|
|
||||||
file_hash.update(mv[:n])
|
|
||||||
except Exception as e:
|
|
||||||
LoggerRoot.get_base_logger().warning(str(e))
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
return h.hexdigest(), file_hash.hexdigest() if skip_header else None
|
|
||||||
|
Loading…
Reference in New Issue
Block a user