Replace humanfriendly with utility functions

2025-06-16 11:28:31 +00:00 · 2021-01-24 09:22:56 +02:00 · 2021-01-24 09:22:56 +02:00 · 0f401545b8
commit 0f401545b8
parent 1c84b31056
5 changed files with 135 additions and 17 deletions
--- a/clearml/backend_interface/metrics/interface.py
+++ b/clearml/backend_interface/metrics/interface.py
@ -3,10 +3,8 @@ import os
 from functools import partial
 from logging import warning
 from multiprocessing.pool import ThreadPool
 from multiprocessing import Lock
 from time import time
 from humanfriendly import format_timespan
 from pathlib2 import Path
 from ...backend_api.services import events as api_events
@ -198,8 +196,8 @@ class Metrics(InterfaceBase):
        t_f, t_u, t_ref = \
            (self._file_related_event_time, self._file_upload_time, self._file_upload_starvation_warning_sec)
        if t_f and t_u and t_ref and (t_f - t_u) > t_ref:
-            log.warning('Possible metrics file upload starvation: files were not uploaded for %s' %
+            log.warning('Possible metrics file upload starvation: '
-                        format_timespan(t_ref))
+                        'files were not uploaded for {} seconds'.format(t_ref))
        # send the events in a batched request
        good_events = [ev for ev in events if ev.upload_exception is None]
--- a/clearml/binding/artifacts.py
+++ b/clearml/binding/artifacts.py
@ -12,7 +12,6 @@ from threading import Thread
 from time import time
 from zipfile import ZipFile, ZIP_DEFLATED
 import humanfriendly
 import six
 from PIL import Image
 from pathlib2 import Path
@ -24,7 +23,7 @@ from ..backend_api.services import tasks
 from ..backend_interface.metrics.events import UploadEvent
 from ..debugging.log import LoggerRoot
 from ..storage.helper import remote_driver_schemes
-from ..storage.util import sha256sum
+from ..storage.util import sha256sum, format_size
 try:
    import pandas as pd
@ -428,7 +427,7 @@ class Artifacts(object):
                            if filename.is_file():
                                relative_file_name = filename.relative_to(folder).as_posix()
                                archive_preview += '{} - {}\n'.format(
-                                    relative_file_name, humanfriendly.format_size(filename.stat().st_size))
+                                    relative_file_name, format_size(filename.stat().st_size))
                                zf.write(filename.as_posix(), arcname=relative_file_name)
                except Exception as e:
                    # failed uploading folder:
@ -449,7 +448,7 @@ class Artifacts(object):
                override_filename_in_uri = artifact_object.parts[-1]
                artifact_type_data.preview = preview or '{} - {}\n'.format(
-                    artifact_object, humanfriendly.format_size(artifact_object.stat().st_size))
+                    artifact_object, format_size(artifact_object.stat().st_size))
                artifact_object = artifact_object.as_posix()
                artifact_type = 'custom'
                artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
--- a/clearml/datasets/dataset.py
+++ b/clearml/datasets/dataset.py
@ -9,7 +9,6 @@ from tempfile import mkstemp, mkdtemp
 from typing import Union, Optional, Sequence, List, Dict, Any, Mapping
 from zipfile import ZipFile, ZIP_DEFLATED
 import humanfriendly
 from attr import attrs, attrib
 from pathlib2 import Path
@ -20,7 +19,7 @@ from ..backend_interface.util import mutually_exclusive, exact_match_regex
 from ..debugging.log import LoggerRoot
 from ..storage.helper import StorageHelper
 from ..storage.cache import CacheManager
-from ..storage.util import sha256sum, is_windows, md5text
+from ..storage.util import sha256sum, is_windows, md5text, format_size
 try:
    from pathlib import Path as _Path  # noqa
@ -324,7 +323,7 @@ class Dataset(object):
                    relative_file_name = file_entry.relative_path
                    zf.write(filename.as_posix(), arcname=relative_file_name)
                    archive_preview += '{} - {}\n'.format(
-                        relative_file_name, humanfriendly.format_size(filename.stat().st_size))
+                        relative_file_name, format_size(filename.stat().st_size))
                    file_entry.local_path = None
                    count += 1
        except Exception as e:
@ -358,7 +357,7 @@ class Dataset(object):
        self._dataset_file_entries = {k: v for k, v in self._dataset_file_entries.items()
                                      if v.relative_path is not None}
        # start upload
-        zip_file_size = humanfriendly.format_size(Path(zip_file).stat().st_size)
+        zip_file_size = format_size(Path(zip_file).stat().st_size)
        self._task.get_logger().report_text(
            'Uploading compressed dataset changes ({} files, total {}) to {}'.format(
                count, zip_file_size, self.get_default_storage()))
@ -966,7 +965,7 @@ class Dataset(object):
            'Dataset state\n' \
            'Files added/modified: {0} - total size {1}\n' \
            'Current dependency graph: {2}\n'.format(
-                len(modified_files), humanfriendly.format_size(sum(modified_files)),
+                len(modified_files), format_size(sum(modified_files)),
                json.dumps(self._dependency_graph, indent=2, sort_keys=True))
        # store as artifact of the Task.
        self._task.upload_artifact(
@ -1230,8 +1229,8 @@ class Dataset(object):
            removed = len(self.list_removed_files(node))
            modified = len(self.list_modified_files(node))
            table_values += [[node, node_names.get(node, ''),
-                              removed, modified, count-modified, humanfriendly.format_size(size)]]
+                              removed, modified, count-modified, format_size(size)]]
-            node_details[node] = [removed, modified, count-modified, humanfriendly.format_size(size)]
+            node_details[node] = [removed, modified, count-modified, format_size(size)]
        # create DAG
        visited = []
--- a/clearml/storage/util.py
+++ b/clearml/storage/util.py
@ -1,4 +1,5 @@
 import hashlib
 import re
 import sys
 from typing import Optional, Union
@ -92,3 +93,124 @@ def is_windows():
    :return: True if currently running on windows OS
    """
    return sys.platform == 'win32'
 def format_size(size_in_bytes, binary=False):
    # type: (Union[int, float], bool) -> str
    """
    Return the size in human readable format (string)
    Matching humanfriendly.format_size outputs
    :param size_in_bytes: number of bytes
    :param binary: If `True` 1 Kb equals 1024 bytes, if False (default) 1 KB = 1000 bytes
    :return: string representation of the number of bytes (b,Kb,Mb,Gb, Tb,)
        >>> format_size(0)
        '0 bytes'
        >>> format_size(1)
        '1 byte'
        >>> format_size(5)
        '5 bytes'
        > format_size(1000)
        '1 KB'
        > format_size(1024, binary=True)
        '1 KiB'
        >>> format_size(1000 ** 3 * 4)
        '4 GB'
    """
    size = float(size_in_bytes)
    # single byte is the exception here
    if size == 1:
        return '{} byte'.format(int(size))
    k = 1024 if binary else 1000
    scale = ('bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB') if binary else ('bytes', 'KB', 'MB', 'GB', 'TB', 'PB')
    for i, m in enumerate(scale):
        if size < k**(i+1) or i == len(scale)-1:
            return ('{:.2f}'.format(size/(k**i)).rstrip('0').rstrip('.')
                    if i > 0 else '{}'.format(int(size))) + ' ' + m
    # we should never get here
    return '{} {}'.format(int(size), scale[0])
 def parse_size(size, binary=False):
    """
    Parse a human readable data size and return the number of bytes.
    Match humanfriendly.parse_size
    :param size: The human readable file size to parse (a string).
    :param binary: :data:`True` to use binary multiples of bytes (base-2) for
                   ambiguous unit symbols and names, :data:`False` to use
                   decimal multiples of bytes (base-10).
    :returns: The corresponding size in bytes (an integer).
    :raises: :exc:`InvalidSize` when the input can't be parsed.
    This function knows how to parse sizes in bytes, kilobytes, megabytes,
    gigabytes, terabytes and petabytes. Some examples:
        >>> parse_size('42')
        42
        >>> parse_size('13b')
        13
        >>> parse_size('5 bytes')
        5
        >>> parse_size('1 KB')
        1000
        >>> parse_size('1 kilobyte')
        1000
        >>> parse_size('1 KiB')
        1024
        >>> parse_size('1 KB', binary=True)
        1024
        >>> parse_size('1.5 GB')
        1500000000
        >>> parse_size('1.5 GB', binary=True)
        1610612736
    """
    def tokenize(text):
        tokenized_input = []
        for token in re.split(r'(\d+(?:\.\d+)?)', text):
            token = token.strip()
            if re.match(r'\d+\.\d+', token):
                tokenized_input.append(float(token))
            elif token.isdigit():
                tokenized_input.append(int(token))
            elif token:
                tokenized_input.append(token)
        return tokenized_input
    tokens = tokenize(str(size))
    if tokens and isinstance(tokens[0], (int, float)):
        disk_size_units_b = \
            (('B', 'bytes'), ('KiB', 'kibibyte'), ('MiB', 'mebibyte'), ('GiB', 'gibibyte'),
             ('TiB', 'tebibyte'), ('PiB', 'pebibyte'))
        disk_size_units_d = \
            (('B', 'bytes'), ('KB', 'kilobyte'), ('MB', 'megabyte'), ('GB', 'gigabyte'),
             ('TB', 'terabyte'), ('PB', 'petabyte'))
        disk_size_units_b = [(1024 ** i, s[0], s[1]) for i, s in enumerate(disk_size_units_b)]
        k = 1024 if binary else 1000
        disk_size_units_d = [(k ** i, s[0], s[1]) for i, s in enumerate(disk_size_units_d)]
        disk_size_units = (disk_size_units_b + disk_size_units_d) \
            if binary else (disk_size_units_d + disk_size_units_b)
        # Get the normalized unit (if any) from the tokenized input.
        normalized_unit = tokens[1].lower() if len(tokens) == 2 and isinstance(tokens[1], str) else ''
        # If the input contains only a number, it's assumed to be the number of
        # bytes. The second token can also explicitly reference the unit bytes.
        if len(tokens) == 1 or normalized_unit.startswith('b'):
            return int(tokens[0])
        # Otherwise we expect two tokens: A number and a unit.
        if normalized_unit:
            # Convert plural units to singular units, for details:
            # https://github.com/xolox/python-humanfriendly/issues/26
            normalized_unit = normalized_unit.rstrip('s')
            for k, low, high in disk_size_units:
                # First we check for unambiguous symbols (KiB, MiB, GiB, etc)
                # and names (kibibyte, mebibyte, gibibyte, etc) because their
                # handling is always the same.
                if normalized_unit in (low.lower(), high.lower()):
                    return int(tokens[0] * k)
                # Now we will deal with ambiguous prefixes (K, M, G, etc),
                # symbols (KB, MB, GB, etc) and names (kilobyte, megabyte,
                # gigabyte, etc) according to the caller's preference.
                if (normalized_unit in (low.lower(), high.lower()) or
                        normalized_unit.startswith(low.lower())):
                    return int(tokens[0] * k)
    raise ValueError("Failed to parse size! (input {} was tokenized as {})".format(size, tokens))
--- a/clearml/utilities/config.py
+++ b/clearml/utilities/config.py
@ -3,14 +3,14 @@ from __future__ import division
 import json
 import six
 import humanfriendly
 import pyparsing
 from .pyhocon import ConfigFactory, HOCONConverter
 from ..storage.util import parse_size
 def parse_human_size(value):
    if isinstance(value, six.string_types):
-        return humanfriendly.parse_size(value)
+        return parse_size(value)
    return value