mirror of
https://github.com/clearml/clearml
synced 2025-02-26 05:58:41 +00:00
Replace humanfriendly with utility functions
This commit is contained in:
parent
1c84b31056
commit
0f401545b8
@ -3,10 +3,8 @@ import os
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
from logging import warning
|
from logging import warning
|
||||||
from multiprocessing.pool import ThreadPool
|
from multiprocessing.pool import ThreadPool
|
||||||
from multiprocessing import Lock
|
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
from humanfriendly import format_timespan
|
|
||||||
from pathlib2 import Path
|
from pathlib2 import Path
|
||||||
|
|
||||||
from ...backend_api.services import events as api_events
|
from ...backend_api.services import events as api_events
|
||||||
@ -198,8 +196,8 @@ class Metrics(InterfaceBase):
|
|||||||
t_f, t_u, t_ref = \
|
t_f, t_u, t_ref = \
|
||||||
(self._file_related_event_time, self._file_upload_time, self._file_upload_starvation_warning_sec)
|
(self._file_related_event_time, self._file_upload_time, self._file_upload_starvation_warning_sec)
|
||||||
if t_f and t_u and t_ref and (t_f - t_u) > t_ref:
|
if t_f and t_u and t_ref and (t_f - t_u) > t_ref:
|
||||||
log.warning('Possible metrics file upload starvation: files were not uploaded for %s' %
|
log.warning('Possible metrics file upload starvation: '
|
||||||
format_timespan(t_ref))
|
'files were not uploaded for {} seconds'.format(t_ref))
|
||||||
|
|
||||||
# send the events in a batched request
|
# send the events in a batched request
|
||||||
good_events = [ev for ev in events if ev.upload_exception is None]
|
good_events = [ev for ev in events if ev.upload_exception is None]
|
||||||
|
@ -12,7 +12,6 @@ from threading import Thread
|
|||||||
from time import time
|
from time import time
|
||||||
from zipfile import ZipFile, ZIP_DEFLATED
|
from zipfile import ZipFile, ZIP_DEFLATED
|
||||||
|
|
||||||
import humanfriendly
|
|
||||||
import six
|
import six
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from pathlib2 import Path
|
from pathlib2 import Path
|
||||||
@ -24,7 +23,7 @@ from ..backend_api.services import tasks
|
|||||||
from ..backend_interface.metrics.events import UploadEvent
|
from ..backend_interface.metrics.events import UploadEvent
|
||||||
from ..debugging.log import LoggerRoot
|
from ..debugging.log import LoggerRoot
|
||||||
from ..storage.helper import remote_driver_schemes
|
from ..storage.helper import remote_driver_schemes
|
||||||
from ..storage.util import sha256sum
|
from ..storage.util import sha256sum, format_size
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -428,7 +427,7 @@ class Artifacts(object):
|
|||||||
if filename.is_file():
|
if filename.is_file():
|
||||||
relative_file_name = filename.relative_to(folder).as_posix()
|
relative_file_name = filename.relative_to(folder).as_posix()
|
||||||
archive_preview += '{} - {}\n'.format(
|
archive_preview += '{} - {}\n'.format(
|
||||||
relative_file_name, humanfriendly.format_size(filename.stat().st_size))
|
relative_file_name, format_size(filename.stat().st_size))
|
||||||
zf.write(filename.as_posix(), arcname=relative_file_name)
|
zf.write(filename.as_posix(), arcname=relative_file_name)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# failed uploading folder:
|
# failed uploading folder:
|
||||||
@ -449,7 +448,7 @@ class Artifacts(object):
|
|||||||
|
|
||||||
override_filename_in_uri = artifact_object.parts[-1]
|
override_filename_in_uri = artifact_object.parts[-1]
|
||||||
artifact_type_data.preview = preview or '{} - {}\n'.format(
|
artifact_type_data.preview = preview or '{} - {}\n'.format(
|
||||||
artifact_object, humanfriendly.format_size(artifact_object.stat().st_size))
|
artifact_object, format_size(artifact_object.stat().st_size))
|
||||||
artifact_object = artifact_object.as_posix()
|
artifact_object = artifact_object.as_posix()
|
||||||
artifact_type = 'custom'
|
artifact_type = 'custom'
|
||||||
artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
|
artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
|
||||||
|
@ -9,7 +9,6 @@ from tempfile import mkstemp, mkdtemp
|
|||||||
from typing import Union, Optional, Sequence, List, Dict, Any, Mapping
|
from typing import Union, Optional, Sequence, List, Dict, Any, Mapping
|
||||||
from zipfile import ZipFile, ZIP_DEFLATED
|
from zipfile import ZipFile, ZIP_DEFLATED
|
||||||
|
|
||||||
import humanfriendly
|
|
||||||
from attr import attrs, attrib
|
from attr import attrs, attrib
|
||||||
from pathlib2 import Path
|
from pathlib2 import Path
|
||||||
|
|
||||||
@ -20,7 +19,7 @@ from ..backend_interface.util import mutually_exclusive, exact_match_regex
|
|||||||
from ..debugging.log import LoggerRoot
|
from ..debugging.log import LoggerRoot
|
||||||
from ..storage.helper import StorageHelper
|
from ..storage.helper import StorageHelper
|
||||||
from ..storage.cache import CacheManager
|
from ..storage.cache import CacheManager
|
||||||
from ..storage.util import sha256sum, is_windows, md5text
|
from ..storage.util import sha256sum, is_windows, md5text, format_size
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from pathlib import Path as _Path # noqa
|
from pathlib import Path as _Path # noqa
|
||||||
@ -324,7 +323,7 @@ class Dataset(object):
|
|||||||
relative_file_name = file_entry.relative_path
|
relative_file_name = file_entry.relative_path
|
||||||
zf.write(filename.as_posix(), arcname=relative_file_name)
|
zf.write(filename.as_posix(), arcname=relative_file_name)
|
||||||
archive_preview += '{} - {}\n'.format(
|
archive_preview += '{} - {}\n'.format(
|
||||||
relative_file_name, humanfriendly.format_size(filename.stat().st_size))
|
relative_file_name, format_size(filename.stat().st_size))
|
||||||
file_entry.local_path = None
|
file_entry.local_path = None
|
||||||
count += 1
|
count += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -358,7 +357,7 @@ class Dataset(object):
|
|||||||
self._dataset_file_entries = {k: v for k, v in self._dataset_file_entries.items()
|
self._dataset_file_entries = {k: v for k, v in self._dataset_file_entries.items()
|
||||||
if v.relative_path is not None}
|
if v.relative_path is not None}
|
||||||
# start upload
|
# start upload
|
||||||
zip_file_size = humanfriendly.format_size(Path(zip_file).stat().st_size)
|
zip_file_size = format_size(Path(zip_file).stat().st_size)
|
||||||
self._task.get_logger().report_text(
|
self._task.get_logger().report_text(
|
||||||
'Uploading compressed dataset changes ({} files, total {}) to {}'.format(
|
'Uploading compressed dataset changes ({} files, total {}) to {}'.format(
|
||||||
count, zip_file_size, self.get_default_storage()))
|
count, zip_file_size, self.get_default_storage()))
|
||||||
@ -966,7 +965,7 @@ class Dataset(object):
|
|||||||
'Dataset state\n' \
|
'Dataset state\n' \
|
||||||
'Files added/modified: {0} - total size {1}\n' \
|
'Files added/modified: {0} - total size {1}\n' \
|
||||||
'Current dependency graph: {2}\n'.format(
|
'Current dependency graph: {2}\n'.format(
|
||||||
len(modified_files), humanfriendly.format_size(sum(modified_files)),
|
len(modified_files), format_size(sum(modified_files)),
|
||||||
json.dumps(self._dependency_graph, indent=2, sort_keys=True))
|
json.dumps(self._dependency_graph, indent=2, sort_keys=True))
|
||||||
# store as artifact of the Task.
|
# store as artifact of the Task.
|
||||||
self._task.upload_artifact(
|
self._task.upload_artifact(
|
||||||
@ -1230,8 +1229,8 @@ class Dataset(object):
|
|||||||
removed = len(self.list_removed_files(node))
|
removed = len(self.list_removed_files(node))
|
||||||
modified = len(self.list_modified_files(node))
|
modified = len(self.list_modified_files(node))
|
||||||
table_values += [[node, node_names.get(node, ''),
|
table_values += [[node, node_names.get(node, ''),
|
||||||
removed, modified, count-modified, humanfriendly.format_size(size)]]
|
removed, modified, count-modified, format_size(size)]]
|
||||||
node_details[node] = [removed, modified, count-modified, humanfriendly.format_size(size)]
|
node_details[node] = [removed, modified, count-modified, format_size(size)]
|
||||||
|
|
||||||
# create DAG
|
# create DAG
|
||||||
visited = []
|
visited = []
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
@ -92,3 +93,124 @@ def is_windows():
|
|||||||
:return: True if currently running on windows OS
|
:return: True if currently running on windows OS
|
||||||
"""
|
"""
|
||||||
return sys.platform == 'win32'
|
return sys.platform == 'win32'
|
||||||
|
|
||||||
|
|
||||||
|
def format_size(size_in_bytes, binary=False):
|
||||||
|
# type: (Union[int, float], bool) -> str
|
||||||
|
"""
|
||||||
|
Return the size in human readable format (string)
|
||||||
|
Matching humanfriendly.format_size outputs
|
||||||
|
|
||||||
|
:param size_in_bytes: number of bytes
|
||||||
|
:param binary: If `True` 1 Kb equals 1024 bytes, if False (default) 1 KB = 1000 bytes
|
||||||
|
:return: string representation of the number of bytes (b,Kb,Mb,Gb, Tb,)
|
||||||
|
>>> format_size(0)
|
||||||
|
'0 bytes'
|
||||||
|
>>> format_size(1)
|
||||||
|
'1 byte'
|
||||||
|
>>> format_size(5)
|
||||||
|
'5 bytes'
|
||||||
|
> format_size(1000)
|
||||||
|
'1 KB'
|
||||||
|
> format_size(1024, binary=True)
|
||||||
|
'1 KiB'
|
||||||
|
>>> format_size(1000 ** 3 * 4)
|
||||||
|
'4 GB'
|
||||||
|
"""
|
||||||
|
size = float(size_in_bytes)
|
||||||
|
# single byte is the exception here
|
||||||
|
if size == 1:
|
||||||
|
return '{} byte'.format(int(size))
|
||||||
|
k = 1024 if binary else 1000
|
||||||
|
scale = ('bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB') if binary else ('bytes', 'KB', 'MB', 'GB', 'TB', 'PB')
|
||||||
|
for i, m in enumerate(scale):
|
||||||
|
if size < k**(i+1) or i == len(scale)-1:
|
||||||
|
return ('{:.2f}'.format(size/(k**i)).rstrip('0').rstrip('.')
|
||||||
|
if i > 0 else '{}'.format(int(size))) + ' ' + m
|
||||||
|
# we should never get here
|
||||||
|
return '{} {}'.format(int(size), scale[0])
|
||||||
|
|
||||||
|
|
||||||
|
def parse_size(size, binary=False):
|
||||||
|
"""
|
||||||
|
Parse a human readable data size and return the number of bytes.
|
||||||
|
Match humanfriendly.parse_size
|
||||||
|
|
||||||
|
:param size: The human readable file size to parse (a string).
|
||||||
|
:param binary: :data:`True` to use binary multiples of bytes (base-2) for
|
||||||
|
ambiguous unit symbols and names, :data:`False` to use
|
||||||
|
decimal multiples of bytes (base-10).
|
||||||
|
:returns: The corresponding size in bytes (an integer).
|
||||||
|
:raises: :exc:`InvalidSize` when the input can't be parsed.
|
||||||
|
|
||||||
|
This function knows how to parse sizes in bytes, kilobytes, megabytes,
|
||||||
|
gigabytes, terabytes and petabytes. Some examples:
|
||||||
|
>>> parse_size('42')
|
||||||
|
42
|
||||||
|
>>> parse_size('13b')
|
||||||
|
13
|
||||||
|
>>> parse_size('5 bytes')
|
||||||
|
5
|
||||||
|
>>> parse_size('1 KB')
|
||||||
|
1000
|
||||||
|
>>> parse_size('1 kilobyte')
|
||||||
|
1000
|
||||||
|
>>> parse_size('1 KiB')
|
||||||
|
1024
|
||||||
|
>>> parse_size('1 KB', binary=True)
|
||||||
|
1024
|
||||||
|
>>> parse_size('1.5 GB')
|
||||||
|
1500000000
|
||||||
|
>>> parse_size('1.5 GB', binary=True)
|
||||||
|
1610612736
|
||||||
|
"""
|
||||||
|
def tokenize(text):
|
||||||
|
tokenized_input = []
|
||||||
|
for token in re.split(r'(\d+(?:\.\d+)?)', text):
|
||||||
|
token = token.strip()
|
||||||
|
if re.match(r'\d+\.\d+', token):
|
||||||
|
tokenized_input.append(float(token))
|
||||||
|
elif token.isdigit():
|
||||||
|
tokenized_input.append(int(token))
|
||||||
|
elif token:
|
||||||
|
tokenized_input.append(token)
|
||||||
|
return tokenized_input
|
||||||
|
tokens = tokenize(str(size))
|
||||||
|
if tokens and isinstance(tokens[0], (int, float)):
|
||||||
|
disk_size_units_b = \
|
||||||
|
(('B', 'bytes'), ('KiB', 'kibibyte'), ('MiB', 'mebibyte'), ('GiB', 'gibibyte'),
|
||||||
|
('TiB', 'tebibyte'), ('PiB', 'pebibyte'))
|
||||||
|
disk_size_units_d = \
|
||||||
|
(('B', 'bytes'), ('KB', 'kilobyte'), ('MB', 'megabyte'), ('GB', 'gigabyte'),
|
||||||
|
('TB', 'terabyte'), ('PB', 'petabyte'))
|
||||||
|
disk_size_units_b = [(1024 ** i, s[0], s[1]) for i, s in enumerate(disk_size_units_b)]
|
||||||
|
k = 1024 if binary else 1000
|
||||||
|
disk_size_units_d = [(k ** i, s[0], s[1]) for i, s in enumerate(disk_size_units_d)]
|
||||||
|
disk_size_units = (disk_size_units_b + disk_size_units_d) \
|
||||||
|
if binary else (disk_size_units_d + disk_size_units_b)
|
||||||
|
|
||||||
|
# Get the normalized unit (if any) from the tokenized input.
|
||||||
|
normalized_unit = tokens[1].lower() if len(tokens) == 2 and isinstance(tokens[1], str) else ''
|
||||||
|
# If the input contains only a number, it's assumed to be the number of
|
||||||
|
# bytes. The second token can also explicitly reference the unit bytes.
|
||||||
|
if len(tokens) == 1 or normalized_unit.startswith('b'):
|
||||||
|
return int(tokens[0])
|
||||||
|
# Otherwise we expect two tokens: A number and a unit.
|
||||||
|
if normalized_unit:
|
||||||
|
# Convert plural units to singular units, for details:
|
||||||
|
# https://github.com/xolox/python-humanfriendly/issues/26
|
||||||
|
normalized_unit = normalized_unit.rstrip('s')
|
||||||
|
for k, low, high in disk_size_units:
|
||||||
|
# First we check for unambiguous symbols (KiB, MiB, GiB, etc)
|
||||||
|
# and names (kibibyte, mebibyte, gibibyte, etc) because their
|
||||||
|
# handling is always the same.
|
||||||
|
if normalized_unit in (low.lower(), high.lower()):
|
||||||
|
return int(tokens[0] * k)
|
||||||
|
# Now we will deal with ambiguous prefixes (K, M, G, etc),
|
||||||
|
# symbols (KB, MB, GB, etc) and names (kilobyte, megabyte,
|
||||||
|
# gigabyte, etc) according to the caller's preference.
|
||||||
|
if (normalized_unit in (low.lower(), high.lower()) or
|
||||||
|
normalized_unit.startswith(low.lower())):
|
||||||
|
return int(tokens[0] * k)
|
||||||
|
|
||||||
|
raise ValueError("Failed to parse size! (input {} was tokenized as {})".format(size, tokens))
|
||||||
|
@ -3,14 +3,14 @@ from __future__ import division
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
import six
|
import six
|
||||||
import humanfriendly
|
|
||||||
import pyparsing
|
import pyparsing
|
||||||
from .pyhocon import ConfigFactory, HOCONConverter
|
from .pyhocon import ConfigFactory, HOCONConverter
|
||||||
|
from ..storage.util import parse_size
|
||||||
|
|
||||||
|
|
||||||
def parse_human_size(value):
|
def parse_human_size(value):
|
||||||
if isinstance(value, six.string_types):
|
if isinstance(value, six.string_types):
|
||||||
return humanfriendly.parse_size(value)
|
return parse_size(value)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user