1
0
mirror of https://github.com/clearml/clearml synced 2025-02-26 05:58:41 +00:00

Replace humanfriendly with utility functions

This commit is contained in:
allegroai 2021-01-24 09:22:56 +02:00
parent 1c84b31056
commit 0f401545b8
5 changed files with 135 additions and 17 deletions
clearml
backend_interface/metrics
binding
datasets
storage
utilities

View File

@ -3,10 +3,8 @@ import os
from functools import partial
from logging import warning
from multiprocessing.pool import ThreadPool
from multiprocessing import Lock
from time import time
from humanfriendly import format_timespan
from pathlib2 import Path
from ...backend_api.services import events as api_events
@ -198,8 +196,8 @@ class Metrics(InterfaceBase):
t_f, t_u, t_ref = \
(self._file_related_event_time, self._file_upload_time, self._file_upload_starvation_warning_sec)
if t_f and t_u and t_ref and (t_f - t_u) > t_ref:
log.warning('Possible metrics file upload starvation: files were not uploaded for %s' %
format_timespan(t_ref))
log.warning('Possible metrics file upload starvation: '
'files were not uploaded for {} seconds'.format(t_ref))
# send the events in a batched request
good_events = [ev for ev in events if ev.upload_exception is None]

View File

@ -12,7 +12,6 @@ from threading import Thread
from time import time
from zipfile import ZipFile, ZIP_DEFLATED
import humanfriendly
import six
from PIL import Image
from pathlib2 import Path
@ -24,7 +23,7 @@ from ..backend_api.services import tasks
from ..backend_interface.metrics.events import UploadEvent
from ..debugging.log import LoggerRoot
from ..storage.helper import remote_driver_schemes
from ..storage.util import sha256sum
from ..storage.util import sha256sum, format_size
try:
import pandas as pd
@ -428,7 +427,7 @@ class Artifacts(object):
if filename.is_file():
relative_file_name = filename.relative_to(folder).as_posix()
archive_preview += '{} - {}\n'.format(
relative_file_name, humanfriendly.format_size(filename.stat().st_size))
relative_file_name, format_size(filename.stat().st_size))
zf.write(filename.as_posix(), arcname=relative_file_name)
except Exception as e:
# failed uploading folder:
@ -449,7 +448,7 @@ class Artifacts(object):
override_filename_in_uri = artifact_object.parts[-1]
artifact_type_data.preview = preview or '{} - {}\n'.format(
artifact_object, humanfriendly.format_size(artifact_object.stat().st_size))
artifact_object, format_size(artifact_object.stat().st_size))
artifact_object = artifact_object.as_posix()
artifact_type = 'custom'
artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]

View File

@ -9,7 +9,6 @@ from tempfile import mkstemp, mkdtemp
from typing import Union, Optional, Sequence, List, Dict, Any, Mapping
from zipfile import ZipFile, ZIP_DEFLATED
import humanfriendly
from attr import attrs, attrib
from pathlib2 import Path
@ -20,7 +19,7 @@ from ..backend_interface.util import mutually_exclusive, exact_match_regex
from ..debugging.log import LoggerRoot
from ..storage.helper import StorageHelper
from ..storage.cache import CacheManager
from ..storage.util import sha256sum, is_windows, md5text
from ..storage.util import sha256sum, is_windows, md5text, format_size
try:
from pathlib import Path as _Path # noqa
@ -324,7 +323,7 @@ class Dataset(object):
relative_file_name = file_entry.relative_path
zf.write(filename.as_posix(), arcname=relative_file_name)
archive_preview += '{} - {}\n'.format(
relative_file_name, humanfriendly.format_size(filename.stat().st_size))
relative_file_name, format_size(filename.stat().st_size))
file_entry.local_path = None
count += 1
except Exception as e:
@ -358,7 +357,7 @@ class Dataset(object):
self._dataset_file_entries = {k: v for k, v in self._dataset_file_entries.items()
if v.relative_path is not None}
# start upload
zip_file_size = humanfriendly.format_size(Path(zip_file).stat().st_size)
zip_file_size = format_size(Path(zip_file).stat().st_size)
self._task.get_logger().report_text(
'Uploading compressed dataset changes ({} files, total {}) to {}'.format(
count, zip_file_size, self.get_default_storage()))
@ -966,7 +965,7 @@ class Dataset(object):
'Dataset state\n' \
'Files added/modified: {0} - total size {1}\n' \
'Current dependency graph: {2}\n'.format(
len(modified_files), humanfriendly.format_size(sum(modified_files)),
len(modified_files), format_size(sum(modified_files)),
json.dumps(self._dependency_graph, indent=2, sort_keys=True))
# store as artifact of the Task.
self._task.upload_artifact(
@ -1230,8 +1229,8 @@ class Dataset(object):
removed = len(self.list_removed_files(node))
modified = len(self.list_modified_files(node))
table_values += [[node, node_names.get(node, ''),
removed, modified, count-modified, humanfriendly.format_size(size)]]
node_details[node] = [removed, modified, count-modified, humanfriendly.format_size(size)]
removed, modified, count-modified, format_size(size)]]
node_details[node] = [removed, modified, count-modified, format_size(size)]
# create DAG
visited = []

View File

@ -1,4 +1,5 @@
import hashlib
import re
import sys
from typing import Optional, Union
@ -92,3 +93,124 @@ def is_windows():
:return: True if currently running on windows OS
"""
return sys.platform == 'win32'
def format_size(size_in_bytes, binary=False):
# type: (Union[int, float], bool) -> str
"""
Return the size in human readable format (string)
Matching humanfriendly.format_size outputs
:param size_in_bytes: number of bytes
:param binary: If `True` 1 Kb equals 1024 bytes, if False (default) 1 KB = 1000 bytes
:return: string representation of the number of bytes (b,Kb,Mb,Gb, Tb,)
>>> format_size(0)
'0 bytes'
>>> format_size(1)
'1 byte'
>>> format_size(5)
'5 bytes'
> format_size(1000)
'1 KB'
> format_size(1024, binary=True)
'1 KiB'
>>> format_size(1000 ** 3 * 4)
'4 GB'
"""
size = float(size_in_bytes)
# single byte is the exception here
if size == 1:
return '{} byte'.format(int(size))
k = 1024 if binary else 1000
scale = ('bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB') if binary else ('bytes', 'KB', 'MB', 'GB', 'TB', 'PB')
for i, m in enumerate(scale):
if size < k**(i+1) or i == len(scale)-1:
return ('{:.2f}'.format(size/(k**i)).rstrip('0').rstrip('.')
if i > 0 else '{}'.format(int(size))) + ' ' + m
# we should never get here
return '{} {}'.format(int(size), scale[0])
def parse_size(size, binary=False):
"""
Parse a human readable data size and return the number of bytes.
Match humanfriendly.parse_size
:param size: The human readable file size to parse (a string).
:param binary: :data:`True` to use binary multiples of bytes (base-2) for
ambiguous unit symbols and names, :data:`False` to use
decimal multiples of bytes (base-10).
:returns: The corresponding size in bytes (an integer).
:raises: :exc:`InvalidSize` when the input can't be parsed.
This function knows how to parse sizes in bytes, kilobytes, megabytes,
gigabytes, terabytes and petabytes. Some examples:
>>> parse_size('42')
42
>>> parse_size('13b')
13
>>> parse_size('5 bytes')
5
>>> parse_size('1 KB')
1000
>>> parse_size('1 kilobyte')
1000
>>> parse_size('1 KiB')
1024
>>> parse_size('1 KB', binary=True)
1024
>>> parse_size('1.5 GB')
1500000000
>>> parse_size('1.5 GB', binary=True)
1610612736
"""
def tokenize(text):
tokenized_input = []
for token in re.split(r'(\d+(?:\.\d+)?)', text):
token = token.strip()
if re.match(r'\d+\.\d+', token):
tokenized_input.append(float(token))
elif token.isdigit():
tokenized_input.append(int(token))
elif token:
tokenized_input.append(token)
return tokenized_input
tokens = tokenize(str(size))
if tokens and isinstance(tokens[0], (int, float)):
disk_size_units_b = \
(('B', 'bytes'), ('KiB', 'kibibyte'), ('MiB', 'mebibyte'), ('GiB', 'gibibyte'),
('TiB', 'tebibyte'), ('PiB', 'pebibyte'))
disk_size_units_d = \
(('B', 'bytes'), ('KB', 'kilobyte'), ('MB', 'megabyte'), ('GB', 'gigabyte'),
('TB', 'terabyte'), ('PB', 'petabyte'))
disk_size_units_b = [(1024 ** i, s[0], s[1]) for i, s in enumerate(disk_size_units_b)]
k = 1024 if binary else 1000
disk_size_units_d = [(k ** i, s[0], s[1]) for i, s in enumerate(disk_size_units_d)]
disk_size_units = (disk_size_units_b + disk_size_units_d) \
if binary else (disk_size_units_d + disk_size_units_b)
# Get the normalized unit (if any) from the tokenized input.
normalized_unit = tokens[1].lower() if len(tokens) == 2 and isinstance(tokens[1], str) else ''
# If the input contains only a number, it's assumed to be the number of
# bytes. The second token can also explicitly reference the unit bytes.
if len(tokens) == 1 or normalized_unit.startswith('b'):
return int(tokens[0])
# Otherwise we expect two tokens: A number and a unit.
if normalized_unit:
# Convert plural units to singular units, for details:
# https://github.com/xolox/python-humanfriendly/issues/26
normalized_unit = normalized_unit.rstrip('s')
for k, low, high in disk_size_units:
# First we check for unambiguous symbols (KiB, MiB, GiB, etc)
# and names (kibibyte, mebibyte, gibibyte, etc) because their
# handling is always the same.
if normalized_unit in (low.lower(), high.lower()):
return int(tokens[0] * k)
# Now we will deal with ambiguous prefixes (K, M, G, etc),
# symbols (KB, MB, GB, etc) and names (kilobyte, megabyte,
# gigabyte, etc) according to the caller's preference.
if (normalized_unit in (low.lower(), high.lower()) or
normalized_unit.startswith(low.lower())):
return int(tokens[0] * k)
raise ValueError("Failed to parse size! (input {} was tokenized as {})".format(size, tokens))

View File

@ -3,14 +3,14 @@ from __future__ import division
import json
import six
import humanfriendly
import pyparsing
from .pyhocon import ConfigFactory, HOCONConverter
from ..storage.util import parse_size
def parse_human_size(value):
if isinstance(value, six.string_types):
return humanfriendly.parse_size(value)
return parse_size(value)
return value