mirror of
				https://github.com/clearml/clearml
				synced 2025-06-26 18:16:07 +00:00 
			
		
		
		
	Replace humanfriendly with utility functions
This commit is contained in:
		
							parent
							
								
									1c84b31056
								
							
						
					
					
						commit
						0f401545b8
					
				| @ -3,10 +3,8 @@ import os | |||||||
| from functools import partial | from functools import partial | ||||||
| from logging import warning | from logging import warning | ||||||
| from multiprocessing.pool import ThreadPool | from multiprocessing.pool import ThreadPool | ||||||
| from multiprocessing import Lock |  | ||||||
| from time import time | from time import time | ||||||
| 
 | 
 | ||||||
| from humanfriendly import format_timespan |  | ||||||
| from pathlib2 import Path | from pathlib2 import Path | ||||||
| 
 | 
 | ||||||
| from ...backend_api.services import events as api_events | from ...backend_api.services import events as api_events | ||||||
| @ -198,8 +196,8 @@ class Metrics(InterfaceBase): | |||||||
|         t_f, t_u, t_ref = \ |         t_f, t_u, t_ref = \ | ||||||
|             (self._file_related_event_time, self._file_upload_time, self._file_upload_starvation_warning_sec) |             (self._file_related_event_time, self._file_upload_time, self._file_upload_starvation_warning_sec) | ||||||
|         if t_f and t_u and t_ref and (t_f - t_u) > t_ref: |         if t_f and t_u and t_ref and (t_f - t_u) > t_ref: | ||||||
|             log.warning('Possible metrics file upload starvation: files were not uploaded for %s' % |             log.warning('Possible metrics file upload starvation: ' | ||||||
|                         format_timespan(t_ref)) |                         'files were not uploaded for {} seconds'.format(t_ref)) | ||||||
| 
 | 
 | ||||||
|         # send the events in a batched request |         # send the events in a batched request | ||||||
|         good_events = [ev for ev in events if ev.upload_exception is None] |         good_events = [ev for ev in events if ev.upload_exception is None] | ||||||
|  | |||||||
| @ -12,7 +12,6 @@ from threading import Thread | |||||||
| from time import time | from time import time | ||||||
| from zipfile import ZipFile, ZIP_DEFLATED | from zipfile import ZipFile, ZIP_DEFLATED | ||||||
| 
 | 
 | ||||||
| import humanfriendly |  | ||||||
| import six | import six | ||||||
| from PIL import Image | from PIL import Image | ||||||
| from pathlib2 import Path | from pathlib2 import Path | ||||||
| @ -24,7 +23,7 @@ from ..backend_api.services import tasks | |||||||
| from ..backend_interface.metrics.events import UploadEvent | from ..backend_interface.metrics.events import UploadEvent | ||||||
| from ..debugging.log import LoggerRoot | from ..debugging.log import LoggerRoot | ||||||
| from ..storage.helper import remote_driver_schemes | from ..storage.helper import remote_driver_schemes | ||||||
| from ..storage.util import sha256sum | from ..storage.util import sha256sum, format_size | ||||||
| 
 | 
 | ||||||
| try: | try: | ||||||
|     import pandas as pd |     import pandas as pd | ||||||
| @ -428,7 +427,7 @@ class Artifacts(object): | |||||||
|                             if filename.is_file(): |                             if filename.is_file(): | ||||||
|                                 relative_file_name = filename.relative_to(folder).as_posix() |                                 relative_file_name = filename.relative_to(folder).as_posix() | ||||||
|                                 archive_preview += '{} - {}\n'.format( |                                 archive_preview += '{} - {}\n'.format( | ||||||
|                                     relative_file_name, humanfriendly.format_size(filename.stat().st_size)) |                                     relative_file_name, format_size(filename.stat().st_size)) | ||||||
|                                 zf.write(filename.as_posix(), arcname=relative_file_name) |                                 zf.write(filename.as_posix(), arcname=relative_file_name) | ||||||
|                 except Exception as e: |                 except Exception as e: | ||||||
|                     # failed uploading folder: |                     # failed uploading folder: | ||||||
| @ -449,7 +448,7 @@ class Artifacts(object): | |||||||
| 
 | 
 | ||||||
|                 override_filename_in_uri = artifact_object.parts[-1] |                 override_filename_in_uri = artifact_object.parts[-1] | ||||||
|                 artifact_type_data.preview = preview or '{} - {}\n'.format( |                 artifact_type_data.preview = preview or '{} - {}\n'.format( | ||||||
|                     artifact_object, humanfriendly.format_size(artifact_object.stat().st_size)) |                     artifact_object, format_size(artifact_object.stat().st_size)) | ||||||
|                 artifact_object = artifact_object.as_posix() |                 artifact_object = artifact_object.as_posix() | ||||||
|                 artifact_type = 'custom' |                 artifact_type = 'custom' | ||||||
|                 artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0] |                 artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0] | ||||||
|  | |||||||
| @ -9,7 +9,6 @@ from tempfile import mkstemp, mkdtemp | |||||||
| from typing import Union, Optional, Sequence, List, Dict, Any, Mapping | from typing import Union, Optional, Sequence, List, Dict, Any, Mapping | ||||||
| from zipfile import ZipFile, ZIP_DEFLATED | from zipfile import ZipFile, ZIP_DEFLATED | ||||||
| 
 | 
 | ||||||
| import humanfriendly |  | ||||||
| from attr import attrs, attrib | from attr import attrs, attrib | ||||||
| from pathlib2 import Path | from pathlib2 import Path | ||||||
| 
 | 
 | ||||||
| @ -20,7 +19,7 @@ from ..backend_interface.util import mutually_exclusive, exact_match_regex | |||||||
| from ..debugging.log import LoggerRoot | from ..debugging.log import LoggerRoot | ||||||
| from ..storage.helper import StorageHelper | from ..storage.helper import StorageHelper | ||||||
| from ..storage.cache import CacheManager | from ..storage.cache import CacheManager | ||||||
| from ..storage.util import sha256sum, is_windows, md5text | from ..storage.util import sha256sum, is_windows, md5text, format_size | ||||||
| 
 | 
 | ||||||
| try: | try: | ||||||
|     from pathlib import Path as _Path  # noqa |     from pathlib import Path as _Path  # noqa | ||||||
| @ -324,7 +323,7 @@ class Dataset(object): | |||||||
|                     relative_file_name = file_entry.relative_path |                     relative_file_name = file_entry.relative_path | ||||||
|                     zf.write(filename.as_posix(), arcname=relative_file_name) |                     zf.write(filename.as_posix(), arcname=relative_file_name) | ||||||
|                     archive_preview += '{} - {}\n'.format( |                     archive_preview += '{} - {}\n'.format( | ||||||
|                         relative_file_name, humanfriendly.format_size(filename.stat().st_size)) |                         relative_file_name, format_size(filename.stat().st_size)) | ||||||
|                     file_entry.local_path = None |                     file_entry.local_path = None | ||||||
|                     count += 1 |                     count += 1 | ||||||
|         except Exception as e: |         except Exception as e: | ||||||
| @ -358,7 +357,7 @@ class Dataset(object): | |||||||
|         self._dataset_file_entries = {k: v for k, v in self._dataset_file_entries.items() |         self._dataset_file_entries = {k: v for k, v in self._dataset_file_entries.items() | ||||||
|                                       if v.relative_path is not None} |                                       if v.relative_path is not None} | ||||||
|         # start upload |         # start upload | ||||||
|         zip_file_size = humanfriendly.format_size(Path(zip_file).stat().st_size) |         zip_file_size = format_size(Path(zip_file).stat().st_size) | ||||||
|         self._task.get_logger().report_text( |         self._task.get_logger().report_text( | ||||||
|             'Uploading compressed dataset changes ({} files, total {}) to {}'.format( |             'Uploading compressed dataset changes ({} files, total {}) to {}'.format( | ||||||
|                 count, zip_file_size, self.get_default_storage())) |                 count, zip_file_size, self.get_default_storage())) | ||||||
| @ -966,7 +965,7 @@ class Dataset(object): | |||||||
|             'Dataset state\n' \ |             'Dataset state\n' \ | ||||||
|             'Files added/modified: {0} - total size {1}\n' \ |             'Files added/modified: {0} - total size {1}\n' \ | ||||||
|             'Current dependency graph: {2}\n'.format( |             'Current dependency graph: {2}\n'.format( | ||||||
|                 len(modified_files), humanfriendly.format_size(sum(modified_files)), |                 len(modified_files), format_size(sum(modified_files)), | ||||||
|                 json.dumps(self._dependency_graph, indent=2, sort_keys=True)) |                 json.dumps(self._dependency_graph, indent=2, sort_keys=True)) | ||||||
|         # store as artifact of the Task. |         # store as artifact of the Task. | ||||||
|         self._task.upload_artifact( |         self._task.upload_artifact( | ||||||
| @ -1230,8 +1229,8 @@ class Dataset(object): | |||||||
|             removed = len(self.list_removed_files(node)) |             removed = len(self.list_removed_files(node)) | ||||||
|             modified = len(self.list_modified_files(node)) |             modified = len(self.list_modified_files(node)) | ||||||
|             table_values += [[node, node_names.get(node, ''), |             table_values += [[node, node_names.get(node, ''), | ||||||
|                               removed, modified, count-modified, humanfriendly.format_size(size)]] |                               removed, modified, count-modified, format_size(size)]] | ||||||
|             node_details[node] = [removed, modified, count-modified, humanfriendly.format_size(size)] |             node_details[node] = [removed, modified, count-modified, format_size(size)] | ||||||
| 
 | 
 | ||||||
|         # create DAG |         # create DAG | ||||||
|         visited = [] |         visited = [] | ||||||
|  | |||||||
| @ -1,4 +1,5 @@ | |||||||
| import hashlib | import hashlib | ||||||
|  | import re | ||||||
| import sys | import sys | ||||||
| from typing import Optional, Union | from typing import Optional, Union | ||||||
| 
 | 
 | ||||||
| @ -92,3 +93,124 @@ def is_windows(): | |||||||
|     :return: True if currently running on windows OS |     :return: True if currently running on windows OS | ||||||
|     """ |     """ | ||||||
|     return sys.platform == 'win32' |     return sys.platform == 'win32' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def format_size(size_in_bytes, binary=False): | ||||||
|  |     # type: (Union[int, float], bool) -> str | ||||||
|  |     """ | ||||||
|  |     Return the size in human readable format (string) | ||||||
|  |     Matching humanfriendly.format_size outputs | ||||||
|  | 
 | ||||||
|  |     :param size_in_bytes: number of bytes | ||||||
|  |     :param binary: If `True` 1 Kb equals 1024 bytes, if False (default) 1 KB = 1000 bytes | ||||||
|  |     :return: string representation of the number of bytes (b,Kb,Mb,Gb, Tb,) | ||||||
|  |         >>> format_size(0) | ||||||
|  |         '0 bytes' | ||||||
|  |         >>> format_size(1) | ||||||
|  |         '1 byte' | ||||||
|  |         >>> format_size(5) | ||||||
|  |         '5 bytes' | ||||||
|  |         > format_size(1000) | ||||||
|  |         '1 KB' | ||||||
|  |         > format_size(1024, binary=True) | ||||||
|  |         '1 KiB' | ||||||
|  |         >>> format_size(1000 ** 3 * 4) | ||||||
|  |         '4 GB' | ||||||
|  |     """ | ||||||
|  |     size = float(size_in_bytes) | ||||||
|  |     # single byte is the exception here | ||||||
|  |     if size == 1: | ||||||
|  |         return '{} byte'.format(int(size)) | ||||||
|  |     k = 1024 if binary else 1000 | ||||||
|  |     scale = ('bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB') if binary else ('bytes', 'KB', 'MB', 'GB', 'TB', 'PB') | ||||||
|  |     for i, m in enumerate(scale): | ||||||
|  |         if size < k**(i+1) or i == len(scale)-1: | ||||||
|  |             return ('{:.2f}'.format(size/(k**i)).rstrip('0').rstrip('.') | ||||||
|  |                     if i > 0 else '{}'.format(int(size))) + ' ' + m | ||||||
|  |     # we should never get here | ||||||
|  |     return '{} {}'.format(int(size), scale[0]) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def parse_size(size, binary=False): | ||||||
|  |     """ | ||||||
|  |     Parse a human readable data size and return the number of bytes. | ||||||
|  |     Match humanfriendly.parse_size | ||||||
|  | 
 | ||||||
|  |     :param size: The human readable file size to parse (a string). | ||||||
|  |     :param binary: :data:`True` to use binary multiples of bytes (base-2) for | ||||||
|  |                    ambiguous unit symbols and names, :data:`False` to use | ||||||
|  |                    decimal multiples of bytes (base-10). | ||||||
|  |     :returns: The corresponding size in bytes (an integer). | ||||||
|  |     :raises: :exc:`InvalidSize` when the input can't be parsed. | ||||||
|  | 
 | ||||||
|  |     This function knows how to parse sizes in bytes, kilobytes, megabytes, | ||||||
|  |     gigabytes, terabytes and petabytes. Some examples: | ||||||
|  |         >>> parse_size('42') | ||||||
|  |         42 | ||||||
|  |         >>> parse_size('13b') | ||||||
|  |         13 | ||||||
|  |         >>> parse_size('5 bytes') | ||||||
|  |         5 | ||||||
|  |         >>> parse_size('1 KB') | ||||||
|  |         1000 | ||||||
|  |         >>> parse_size('1 kilobyte') | ||||||
|  |         1000 | ||||||
|  |         >>> parse_size('1 KiB') | ||||||
|  |         1024 | ||||||
|  |         >>> parse_size('1 KB', binary=True) | ||||||
|  |         1024 | ||||||
|  |         >>> parse_size('1.5 GB') | ||||||
|  |         1500000000 | ||||||
|  |         >>> parse_size('1.5 GB', binary=True) | ||||||
|  |         1610612736 | ||||||
|  |     """ | ||||||
|  |     def tokenize(text): | ||||||
|  |         tokenized_input = [] | ||||||
|  |         for token in re.split(r'(\d+(?:\.\d+)?)', text): | ||||||
|  |             token = token.strip() | ||||||
|  |             if re.match(r'\d+\.\d+', token): | ||||||
|  |                 tokenized_input.append(float(token)) | ||||||
|  |             elif token.isdigit(): | ||||||
|  |                 tokenized_input.append(int(token)) | ||||||
|  |             elif token: | ||||||
|  |                 tokenized_input.append(token) | ||||||
|  |         return tokenized_input | ||||||
|  |     tokens = tokenize(str(size)) | ||||||
|  |     if tokens and isinstance(tokens[0], (int, float)): | ||||||
|  |         disk_size_units_b = \ | ||||||
|  |             (('B', 'bytes'), ('KiB', 'kibibyte'), ('MiB', 'mebibyte'), ('GiB', 'gibibyte'), | ||||||
|  |              ('TiB', 'tebibyte'), ('PiB', 'pebibyte')) | ||||||
|  |         disk_size_units_d = \ | ||||||
|  |             (('B', 'bytes'), ('KB', 'kilobyte'), ('MB', 'megabyte'), ('GB', 'gigabyte'), | ||||||
|  |              ('TB', 'terabyte'), ('PB', 'petabyte')) | ||||||
|  |         disk_size_units_b = [(1024 ** i, s[0], s[1]) for i, s in enumerate(disk_size_units_b)] | ||||||
|  |         k = 1024 if binary else 1000 | ||||||
|  |         disk_size_units_d = [(k ** i, s[0], s[1]) for i, s in enumerate(disk_size_units_d)] | ||||||
|  |         disk_size_units = (disk_size_units_b + disk_size_units_d) \ | ||||||
|  |             if binary else (disk_size_units_d + disk_size_units_b) | ||||||
|  | 
 | ||||||
|  |         # Get the normalized unit (if any) from the tokenized input. | ||||||
|  |         normalized_unit = tokens[1].lower() if len(tokens) == 2 and isinstance(tokens[1], str) else '' | ||||||
|  |         # If the input contains only a number, it's assumed to be the number of | ||||||
|  |         # bytes. The second token can also explicitly reference the unit bytes. | ||||||
|  |         if len(tokens) == 1 or normalized_unit.startswith('b'): | ||||||
|  |             return int(tokens[0]) | ||||||
|  |         # Otherwise we expect two tokens: A number and a unit. | ||||||
|  |         if normalized_unit: | ||||||
|  |             # Convert plural units to singular units, for details: | ||||||
|  |             # https://github.com/xolox/python-humanfriendly/issues/26 | ||||||
|  |             normalized_unit = normalized_unit.rstrip('s') | ||||||
|  |             for k, low, high in disk_size_units: | ||||||
|  |                 # First we check for unambiguous symbols (KiB, MiB, GiB, etc) | ||||||
|  |                 # and names (kibibyte, mebibyte, gibibyte, etc) because their | ||||||
|  |                 # handling is always the same. | ||||||
|  |                 if normalized_unit in (low.lower(), high.lower()): | ||||||
|  |                     return int(tokens[0] * k) | ||||||
|  |                 # Now we will deal with ambiguous prefixes (K, M, G, etc), | ||||||
|  |                 # symbols (KB, MB, GB, etc) and names (kilobyte, megabyte, | ||||||
|  |                 # gigabyte, etc) according to the caller's preference. | ||||||
|  |                 if (normalized_unit in (low.lower(), high.lower()) or | ||||||
|  |                         normalized_unit.startswith(low.lower())): | ||||||
|  |                     return int(tokens[0] * k) | ||||||
|  | 
 | ||||||
|  |     raise ValueError("Failed to parse size! (input {} was tokenized as {})".format(size, tokens)) | ||||||
|  | |||||||
| @ -3,14 +3,14 @@ from __future__ import division | |||||||
| import json | import json | ||||||
| 
 | 
 | ||||||
| import six | import six | ||||||
| import humanfriendly |  | ||||||
| import pyparsing | import pyparsing | ||||||
| from .pyhocon import ConfigFactory, HOCONConverter | from .pyhocon import ConfigFactory, HOCONConverter | ||||||
|  | from ..storage.util import parse_size | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def parse_human_size(value): | def parse_human_size(value): | ||||||
|     if isinstance(value, six.string_types): |     if isinstance(value, six.string_types): | ||||||
|         return humanfriendly.parse_size(value) |         return parse_size(value) | ||||||
|     return value |     return value | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 allegroai
						allegroai