mirror of
https://github.com/clearml/clearml
synced 2025-02-26 05:58:41 +00:00
Replace humanfriendly with utility functions
This commit is contained in:
parent
1c84b31056
commit
0f401545b8
@ -3,10 +3,8 @@ import os
|
||||
from functools import partial
|
||||
from logging import warning
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from multiprocessing import Lock
|
||||
from time import time
|
||||
|
||||
from humanfriendly import format_timespan
|
||||
from pathlib2 import Path
|
||||
|
||||
from ...backend_api.services import events as api_events
|
||||
@ -198,8 +196,8 @@ class Metrics(InterfaceBase):
|
||||
t_f, t_u, t_ref = \
|
||||
(self._file_related_event_time, self._file_upload_time, self._file_upload_starvation_warning_sec)
|
||||
if t_f and t_u and t_ref and (t_f - t_u) > t_ref:
|
||||
log.warning('Possible metrics file upload starvation: files were not uploaded for %s' %
|
||||
format_timespan(t_ref))
|
||||
log.warning('Possible metrics file upload starvation: '
|
||||
'files were not uploaded for {} seconds'.format(t_ref))
|
||||
|
||||
# send the events in a batched request
|
||||
good_events = [ev for ev in events if ev.upload_exception is None]
|
||||
|
@ -12,7 +12,6 @@ from threading import Thread
|
||||
from time import time
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
|
||||
import humanfriendly
|
||||
import six
|
||||
from PIL import Image
|
||||
from pathlib2 import Path
|
||||
@ -24,7 +23,7 @@ from ..backend_api.services import tasks
|
||||
from ..backend_interface.metrics.events import UploadEvent
|
||||
from ..debugging.log import LoggerRoot
|
||||
from ..storage.helper import remote_driver_schemes
|
||||
from ..storage.util import sha256sum
|
||||
from ..storage.util import sha256sum, format_size
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
@ -428,7 +427,7 @@ class Artifacts(object):
|
||||
if filename.is_file():
|
||||
relative_file_name = filename.relative_to(folder).as_posix()
|
||||
archive_preview += '{} - {}\n'.format(
|
||||
relative_file_name, humanfriendly.format_size(filename.stat().st_size))
|
||||
relative_file_name, format_size(filename.stat().st_size))
|
||||
zf.write(filename.as_posix(), arcname=relative_file_name)
|
||||
except Exception as e:
|
||||
# failed uploading folder:
|
||||
@ -449,7 +448,7 @@ class Artifacts(object):
|
||||
|
||||
override_filename_in_uri = artifact_object.parts[-1]
|
||||
artifact_type_data.preview = preview or '{} - {}\n'.format(
|
||||
artifact_object, humanfriendly.format_size(artifact_object.stat().st_size))
|
||||
artifact_object, format_size(artifact_object.stat().st_size))
|
||||
artifact_object = artifact_object.as_posix()
|
||||
artifact_type = 'custom'
|
||||
artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
|
||||
|
@ -9,7 +9,6 @@ from tempfile import mkstemp, mkdtemp
|
||||
from typing import Union, Optional, Sequence, List, Dict, Any, Mapping
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
|
||||
import humanfriendly
|
||||
from attr import attrs, attrib
|
||||
from pathlib2 import Path
|
||||
|
||||
@ -20,7 +19,7 @@ from ..backend_interface.util import mutually_exclusive, exact_match_regex
|
||||
from ..debugging.log import LoggerRoot
|
||||
from ..storage.helper import StorageHelper
|
||||
from ..storage.cache import CacheManager
|
||||
from ..storage.util import sha256sum, is_windows, md5text
|
||||
from ..storage.util import sha256sum, is_windows, md5text, format_size
|
||||
|
||||
try:
|
||||
from pathlib import Path as _Path # noqa
|
||||
@ -324,7 +323,7 @@ class Dataset(object):
|
||||
relative_file_name = file_entry.relative_path
|
||||
zf.write(filename.as_posix(), arcname=relative_file_name)
|
||||
archive_preview += '{} - {}\n'.format(
|
||||
relative_file_name, humanfriendly.format_size(filename.stat().st_size))
|
||||
relative_file_name, format_size(filename.stat().st_size))
|
||||
file_entry.local_path = None
|
||||
count += 1
|
||||
except Exception as e:
|
||||
@ -358,7 +357,7 @@ class Dataset(object):
|
||||
self._dataset_file_entries = {k: v for k, v in self._dataset_file_entries.items()
|
||||
if v.relative_path is not None}
|
||||
# start upload
|
||||
zip_file_size = humanfriendly.format_size(Path(zip_file).stat().st_size)
|
||||
zip_file_size = format_size(Path(zip_file).stat().st_size)
|
||||
self._task.get_logger().report_text(
|
||||
'Uploading compressed dataset changes ({} files, total {}) to {}'.format(
|
||||
count, zip_file_size, self.get_default_storage()))
|
||||
@ -966,7 +965,7 @@ class Dataset(object):
|
||||
'Dataset state\n' \
|
||||
'Files added/modified: {0} - total size {1}\n' \
|
||||
'Current dependency graph: {2}\n'.format(
|
||||
len(modified_files), humanfriendly.format_size(sum(modified_files)),
|
||||
len(modified_files), format_size(sum(modified_files)),
|
||||
json.dumps(self._dependency_graph, indent=2, sort_keys=True))
|
||||
# store as artifact of the Task.
|
||||
self._task.upload_artifact(
|
||||
@ -1230,8 +1229,8 @@ class Dataset(object):
|
||||
removed = len(self.list_removed_files(node))
|
||||
modified = len(self.list_modified_files(node))
|
||||
table_values += [[node, node_names.get(node, ''),
|
||||
removed, modified, count-modified, humanfriendly.format_size(size)]]
|
||||
node_details[node] = [removed, modified, count-modified, humanfriendly.format_size(size)]
|
||||
removed, modified, count-modified, format_size(size)]]
|
||||
node_details[node] = [removed, modified, count-modified, format_size(size)]
|
||||
|
||||
# create DAG
|
||||
visited = []
|
||||
|
@ -1,4 +1,5 @@
|
||||
import hashlib
|
||||
import re
|
||||
import sys
|
||||
from typing import Optional, Union
|
||||
|
||||
@ -92,3 +93,124 @@ def is_windows():
|
||||
:return: True if currently running on windows OS
|
||||
"""
|
||||
return sys.platform == 'win32'
|
||||
|
||||
|
||||
def format_size(size_in_bytes, binary=False):
|
||||
# type: (Union[int, float], bool) -> str
|
||||
"""
|
||||
Return the size in human readable format (string)
|
||||
Matching humanfriendly.format_size outputs
|
||||
|
||||
:param size_in_bytes: number of bytes
|
||||
:param binary: If `True` 1 Kb equals 1024 bytes, if False (default) 1 KB = 1000 bytes
|
||||
:return: string representation of the number of bytes (b,Kb,Mb,Gb, Tb,)
|
||||
>>> format_size(0)
|
||||
'0 bytes'
|
||||
>>> format_size(1)
|
||||
'1 byte'
|
||||
>>> format_size(5)
|
||||
'5 bytes'
|
||||
> format_size(1000)
|
||||
'1 KB'
|
||||
> format_size(1024, binary=True)
|
||||
'1 KiB'
|
||||
>>> format_size(1000 ** 3 * 4)
|
||||
'4 GB'
|
||||
"""
|
||||
size = float(size_in_bytes)
|
||||
# single byte is the exception here
|
||||
if size == 1:
|
||||
return '{} byte'.format(int(size))
|
||||
k = 1024 if binary else 1000
|
||||
scale = ('bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB') if binary else ('bytes', 'KB', 'MB', 'GB', 'TB', 'PB')
|
||||
for i, m in enumerate(scale):
|
||||
if size < k**(i+1) or i == len(scale)-1:
|
||||
return ('{:.2f}'.format(size/(k**i)).rstrip('0').rstrip('.')
|
||||
if i > 0 else '{}'.format(int(size))) + ' ' + m
|
||||
# we should never get here
|
||||
return '{} {}'.format(int(size), scale[0])
|
||||
|
||||
|
||||
def parse_size(size, binary=False):
|
||||
"""
|
||||
Parse a human readable data size and return the number of bytes.
|
||||
Match humanfriendly.parse_size
|
||||
|
||||
:param size: The human readable file size to parse (a string).
|
||||
:param binary: :data:`True` to use binary multiples of bytes (base-2) for
|
||||
ambiguous unit symbols and names, :data:`False` to use
|
||||
decimal multiples of bytes (base-10).
|
||||
:returns: The corresponding size in bytes (an integer).
|
||||
:raises: :exc:`InvalidSize` when the input can't be parsed.
|
||||
|
||||
This function knows how to parse sizes in bytes, kilobytes, megabytes,
|
||||
gigabytes, terabytes and petabytes. Some examples:
|
||||
>>> parse_size('42')
|
||||
42
|
||||
>>> parse_size('13b')
|
||||
13
|
||||
>>> parse_size('5 bytes')
|
||||
5
|
||||
>>> parse_size('1 KB')
|
||||
1000
|
||||
>>> parse_size('1 kilobyte')
|
||||
1000
|
||||
>>> parse_size('1 KiB')
|
||||
1024
|
||||
>>> parse_size('1 KB', binary=True)
|
||||
1024
|
||||
>>> parse_size('1.5 GB')
|
||||
1500000000
|
||||
>>> parse_size('1.5 GB', binary=True)
|
||||
1610612736
|
||||
"""
|
||||
def tokenize(text):
|
||||
tokenized_input = []
|
||||
for token in re.split(r'(\d+(?:\.\d+)?)', text):
|
||||
token = token.strip()
|
||||
if re.match(r'\d+\.\d+', token):
|
||||
tokenized_input.append(float(token))
|
||||
elif token.isdigit():
|
||||
tokenized_input.append(int(token))
|
||||
elif token:
|
||||
tokenized_input.append(token)
|
||||
return tokenized_input
|
||||
tokens = tokenize(str(size))
|
||||
if tokens and isinstance(tokens[0], (int, float)):
|
||||
disk_size_units_b = \
|
||||
(('B', 'bytes'), ('KiB', 'kibibyte'), ('MiB', 'mebibyte'), ('GiB', 'gibibyte'),
|
||||
('TiB', 'tebibyte'), ('PiB', 'pebibyte'))
|
||||
disk_size_units_d = \
|
||||
(('B', 'bytes'), ('KB', 'kilobyte'), ('MB', 'megabyte'), ('GB', 'gigabyte'),
|
||||
('TB', 'terabyte'), ('PB', 'petabyte'))
|
||||
disk_size_units_b = [(1024 ** i, s[0], s[1]) for i, s in enumerate(disk_size_units_b)]
|
||||
k = 1024 if binary else 1000
|
||||
disk_size_units_d = [(k ** i, s[0], s[1]) for i, s in enumerate(disk_size_units_d)]
|
||||
disk_size_units = (disk_size_units_b + disk_size_units_d) \
|
||||
if binary else (disk_size_units_d + disk_size_units_b)
|
||||
|
||||
# Get the normalized unit (if any) from the tokenized input.
|
||||
normalized_unit = tokens[1].lower() if len(tokens) == 2 and isinstance(tokens[1], str) else ''
|
||||
# If the input contains only a number, it's assumed to be the number of
|
||||
# bytes. The second token can also explicitly reference the unit bytes.
|
||||
if len(tokens) == 1 or normalized_unit.startswith('b'):
|
||||
return int(tokens[0])
|
||||
# Otherwise we expect two tokens: A number and a unit.
|
||||
if normalized_unit:
|
||||
# Convert plural units to singular units, for details:
|
||||
# https://github.com/xolox/python-humanfriendly/issues/26
|
||||
normalized_unit = normalized_unit.rstrip('s')
|
||||
for k, low, high in disk_size_units:
|
||||
# First we check for unambiguous symbols (KiB, MiB, GiB, etc)
|
||||
# and names (kibibyte, mebibyte, gibibyte, etc) because their
|
||||
# handling is always the same.
|
||||
if normalized_unit in (low.lower(), high.lower()):
|
||||
return int(tokens[0] * k)
|
||||
# Now we will deal with ambiguous prefixes (K, M, G, etc),
|
||||
# symbols (KB, MB, GB, etc) and names (kilobyte, megabyte,
|
||||
# gigabyte, etc) according to the caller's preference.
|
||||
if (normalized_unit in (low.lower(), high.lower()) or
|
||||
normalized_unit.startswith(low.lower())):
|
||||
return int(tokens[0] * k)
|
||||
|
||||
raise ValueError("Failed to parse size! (input {} was tokenized as {})".format(size, tokens))
|
||||
|
@ -3,14 +3,14 @@ from __future__ import division
|
||||
import json
|
||||
|
||||
import six
|
||||
import humanfriendly
|
||||
import pyparsing
|
||||
from .pyhocon import ConfigFactory, HOCONConverter
|
||||
from ..storage.util import parse_size
|
||||
|
||||
|
||||
def parse_human_size(value):
|
||||
if isinstance(value, six.string_types):
|
||||
return humanfriendly.parse_size(value)
|
||||
return parse_size(value)
|
||||
return value
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user