From 25d672564ccd0128f0a5d08adbd3c64a7ea3c2ad Mon Sep 17 00:00:00 2001
From: Jake Henning <59198928+jkhenning@users.noreply.github.com>
Date: Wed, 17 May 2023 18:58:20 +0300
Subject: [PATCH 01/15] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 00b754a0..902b4ed9 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
Experiment Manager, MLOps and Data-Management**
[](https://img.shields.io/github/license/allegroai/clearml.svg) [](https://img.shields.io/pypi/pyversions/clearml.svg) [](https://pypi.org/project/clearml/) [](https://anaconda.org/clearml/clearml) [](https://optuna.org)
-[](https://pypi.org/project/clearml/) [](https://artifacthub.io/packages/search?repo=allegroai) [](https://www.youtube.com/c/clearml) [](https://join.slack.com/t/clearml/shared_invite/zt-1rp61f0cg-Bu_7UlETQrvHHjw~hEBh5A) [](https://app.clear.ml)
+[](https://pypi.org/project/clearml/) [](https://artifacthub.io/packages/search?repo=allegroai) [](https://www.youtube.com/c/clearml) [](https://join.slack.com/t/clearml/shared_invite/zt-1v74jzwkn-~XsuWB0btXOlfFQCh8DJQw) [](https://app.clear.ml)
From fa58c69fc3324387612bf0e8e5f3b9883986de48 Mon Sep 17 00:00:00 2001
From: Jake Henning <59198928+jkhenning@users.noreply.github.com>
Date: Wed, 17 May 2023 18:59:44 +0300
Subject: [PATCH 02/15] Update README.md
---
README.md | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 902b4ed9..cabe681c 100644
--- a/README.md
+++ b/README.md
@@ -197,8 +197,7 @@ More information in the [official documentation](https://clear.ml/docs) and [on
For examples and use cases, check the [examples folder](https://github.com/allegroai/clearml/tree/master/examples) and [corresponding documentation](https://clear.ml/docs/latest/docs/guides).
-If you have any questions: post on our [Slack Channel](https://join.slack.com/t/clearml/shared_invite/zt-1rp61f0cg-Bu_7UlETQrvHHjw~hEBh5A
-), or tag your questions on [stackoverflow](https://stackoverflow.com/questions/tagged/clearml) with '**[clearml](https://stackoverflow.com/questions/tagged/clearml)**' tag (*previously [trains](https://stackoverflow.com/questions/tagged/trains) tag*).
+If you have any questions: post on our [Slack Channel](https://join.slack.com/t/clearml/shared_invite/zt-1v74jzwkn-~XsuWB0btXOlfFQCh8DJQw), or tag your questions on [stackoverflow](https://stackoverflow.com/questions/tagged/clearml) with '**[clearml](https://stackoverflow.com/questions/tagged/clearml)**' tag (*previously [trains](https://stackoverflow.com/questions/tagged/trains) tag*).
For feature requests or bug reports, please use [GitHub issues](https://github.com/allegroai/clearml/issues).
From e80d1f1ff4ebf83fc4d83065f05215131541da15 Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Sun, 21 May 2023 09:41:00 +0300
Subject: [PATCH 03/15] Support removing task input models using
`Task.remove_input_models()`
---
clearml/backend_interface/task/task.py | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/clearml/backend_interface/task/task.py b/clearml/backend_interface/task/task.py
index 871d86c9..942bea9c 100644
--- a/clearml/backend_interface/task/task.py
+++ b/clearml/backend_interface/task/task.py
@@ -56,6 +56,9 @@ from .repo import ScriptInfo, pip_freeze
from .hyperparams import HyperParams
from ...config import config, PROC_MASTER_ID_ENV_VAR, SUPPRESS_UPDATE_MESSAGE_ENV_VAR, DOCKER_BASH_SETUP_ENV_VAR
from ...utilities.process.mp import SingletonLock
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+ from ...model import BaseModel
class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
@@ -1374,6 +1377,22 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
execution.model_labels = enumeration
self._edit(execution=execution)
+ def remove_input_models(self, models_to_remove):
+ # type: (Sequence[Union[str, BaseModel]]) -> ()
+ """
+ Remove input models from the current task. Note that the models themselves are not deleted,
+ but the tasks' reference to the models is removed.
+ To delete the models themselves, see `Models.remove`
+
+ :param models_to_remove: The models to remove from the task. Can be a list of ids,
+ or of `BaseModel` (including its subclasses: `Model` and `InputModel`)
+ """
+ ids_to_remove = [model if isinstance(model, str) else model.id for model in models_to_remove]
+ with self._edit_lock:
+ self.reload()
+ self.data.models.input = [model for model in self.data.models.input if model.model not in ids_to_remove]
+ self._edit(models=self.data.models)
+
def _set_default_docker_image(self):
# type: () -> ()
if not DOCKER_IMAGE_ENV_VAR.exists() and not DOCKER_BASH_SETUP_ENV_VAR.exists():
From 7c09251686b14cdbf5a9a64f89550034d04d9c1d Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Sun, 21 May 2023 09:42:32 +0300
Subject: [PATCH 04/15] Fix code hangs when running with joblib (#1009)
---
clearml/binding/frameworks/pytorch_bind.py | 14 ++++----------
clearml/binding/joblib_bind.py | 9 +++++++++
2 files changed, 13 insertions(+), 10 deletions(-)
diff --git a/clearml/binding/frameworks/pytorch_bind.py b/clearml/binding/frameworks/pytorch_bind.py
index 9b8f8255..3cacb47a 100644
--- a/clearml/binding/frameworks/pytorch_bind.py
+++ b/clearml/binding/frameworks/pytorch_bind.py
@@ -18,7 +18,6 @@ class PatchPyTorchModelIO(PatchBaseModelIO):
__patched = None
__patched_lightning = None
__patched_mmcv = None
- __default_checkpoint_filename_counter = {}
@staticmethod
def update_current_task(task, **_):
@@ -185,9 +184,9 @@ class PatchPyTorchModelIO(PatchBaseModelIO):
filename = f.name
else:
- filename = PatchPyTorchModelIO.__create_default_filename()
+ filename = PatchPyTorchModelIO.__get_cached_checkpoint_filename()
except Exception:
- filename = PatchPyTorchModelIO.__create_default_filename()
+ filename = PatchPyTorchModelIO.__get_cached_checkpoint_filename()
# give the model a descriptive name based on the file name
# noinspection PyBroadException
@@ -195,7 +194,6 @@ class PatchPyTorchModelIO(PatchBaseModelIO):
model_name = Path(filename).stem if filename is not None else None
except Exception:
model_name = None
-
WeightsFileHandler.create_output_model(
obj, filename, Framework.pytorch, PatchPyTorchModelIO._current_task, singlefile=True, model_name=model_name)
@@ -284,11 +282,7 @@ class PatchPyTorchModelIO(PatchBaseModelIO):
return model
@staticmethod
- def __create_default_filename():
+ def __get_cached_checkpoint_filename():
tid = threading.current_thread().ident
checkpoint_filename = PatchPyTorchModelIO._checkpoint_filename.get(tid)
- if checkpoint_filename:
- return checkpoint_filename
- counter = PatchPyTorchModelIO.__default_checkpoint_filename_counter.setdefault(tid, 0)
- PatchPyTorchModelIO.__default_checkpoint_filename_counter[tid] += 1
- return "default_{}_{}".format(tid, counter)
+ return checkpoint_filename or None
\ No newline at end of file
diff --git a/clearml/binding/joblib_bind.py b/clearml/binding/joblib_bind.py
index 547da002..12cde17c 100644
--- a/clearml/binding/joblib_bind.py
+++ b/clearml/binding/joblib_bind.py
@@ -48,6 +48,10 @@ class PatchedJoblib(object):
joblib.numpy_pickle.NumpyPickler.__init__ = _patched_call(
joblib.numpy_pickle.NumpyPickler.__init__,
PatchedJoblib._numpypickler)
+ joblib.memory.MemorizedFunc._cached_call = _patched_call(
+ joblib.memory.MemorizedFunc._cached_call,
+ PatchedJoblib._cached_call_recursion_guard
+ )
if not PatchedJoblib._patched_sk_joblib and 'sklearn' in sys.modules:
PatchedJoblib._patched_sk_joblib = True
@@ -194,3 +198,8 @@ class PatchedJoblib(object):
"Can't get model framework {}, model framework will be: {} ".format(object_orig_module, framework))
finally:
return framework
+
+ @staticmethod
+ def _cached_call_recursion_guard(original_fn, *args, **kwargs):
+ # used just to avoid getting into the `_load` binding in the context of memory caching
+ return original_fn(*args, **kwargs)
\ No newline at end of file
From f99a5d03f6c037cc31810b85cb9f5f01c07c06c9 Mon Sep 17 00:00:00 2001
From: Phill Zarfos
Date: Mon, 22 May 2023 04:14:57 -0400
Subject: [PATCH 05/15] Fix comment for storage scheme gs:// (#1018)
Co-authored-by: Phill Zarfos
---
clearml/logger.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clearml/logger.py b/clearml/logger.py
index 2d6a2d6a..884136a3 100644
--- a/clearml/logger.py
+++ b/clearml/logger.py
@@ -1067,7 +1067,7 @@ class Logger(object):
:param str uri: example: 's3://bucket/directory/' or 'file:///tmp/debug/'
- :return: True, if the destination scheme is supported (for example, ``s3://``, ``file://``, or ``gc://``).
+ :return: True, if the destination scheme is supported (for example, ``s3://``, ``file://``, or ``gs://``).
False, if not supported.
"""
From 0fba023fc431d0f872ab6234b0938883490a2b4b Mon Sep 17 00:00:00 2001
From: Alex Burlacu
Date: Tue, 23 May 2023 14:57:07 +0300
Subject: [PATCH 06/15] Add version to clearml-data list command
---
clearml/cli/data/__main__.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clearml/cli/data/__main__.py b/clearml/cli/data/__main__.py
index 79103fa3..202b98d5 100644
--- a/clearml/cli/data/__main__.py
+++ b/clearml/cli/data/__main__.py
@@ -553,12 +553,12 @@ def ds_search(args):
+ str(id_col_len)
+ "}"
)
- print(formatting.format("project", "name", "tags", "created", "id"))
+ print(formatting.format("project", "name", "version", "tags", "created", "id"))
print("-" * len(formatting.format("-", "-", "-", "-", "-")))
for d in datasets:
print(
formatting.format(
- d["project"], d["name"], str(d["tags"] or [])[1:-1], str(d["created"]).split(".")[0], d["id"]
+ d["project"], d["name"], d["version"], str(d["tags"] or [])[1:-1], str(d["created"]).split(".")[0], d["id"]
)
)
return 0
From d828595e83cf834288c9c7dfba0bc5b899a53c7d Mon Sep 17 00:00:00 2001
From: Anton
Date: Thu, 25 May 2023 14:33:25 +0300
Subject: [PATCH 07/15] Fix typo "Categories per column" (#1021)
---
.../pytorch/notebooks/table/train_tabular_predictor.ipynb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/frameworks/pytorch/notebooks/table/train_tabular_predictor.ipynb b/examples/frameworks/pytorch/notebooks/table/train_tabular_predictor.ipynb
index 747250e5..f20cfea0 100644
--- a/examples/frameworks/pytorch/notebooks/table/train_tabular_predictor.ipynb
+++ b/examples/frameworks/pytorch/notebooks/table/train_tabular_predictor.ipynb
@@ -109,7 +109,7 @@
"metadata": {},
"outputs": [],
"source": [
- "columns_categories = data_task.artifacts[\"Categries per column\"].get()\n",
+ "columns_categories = data_task.artifacts[\"Categories per column\"].get()\n",
"columns_categories_ordered = {\n",
" key: columns_categories[key]\n",
" for key in train_set.columns\n",
From 14351e9f932358c2403fcfa139f71c742f6dd1e8 Mon Sep 17 00:00:00 2001
From: Alex Burlacu
Date: Thu, 25 May 2023 17:34:55 +0300
Subject: [PATCH 08/15] Add support for multi-node experiments
---
clearml/task.py | 39 +++++++++++++++++++++++----------------
1 file changed, 23 insertions(+), 16 deletions(-)
diff --git a/clearml/task.py b/clearml/task.py
index 8a356136..5e72e3e5 100644
--- a/clearml/task.py
+++ b/clearml/task.py
@@ -1692,6 +1692,15 @@ class Task(_Task):
dist.init_process_group('gloo')
run(config.get('node_rank'), config.get('total_num_nodes'))
+ When using the ClearML cloud autoscaler apps, one needs to make sure the nodes can reach eachother.
+ The machines need to be in the same security group, the `MASTER_PORT` needs to be exposed and the
+ `MASTER_ADDR` needs to be the right private ip of the instance the master is running on.
+ For example, to achieve this, one can set the following Docker arguments in the `Additional ClearML Configuration` section:
+
+ .. code-block:: py
+
+ agent.extra_docker_arguments=["--ipc=host", "--network=host", "-p", "29500:29500", "--env", "CLEARML_MULTI_NODE_MASTER_DEF_ADDR=`hostname -I | awk '{print $1}'`"]`
+
:param total_num_nodes: The total number of nodes to be enqueued, including the master node,
which should already be enqueued when running remotely
:param port: Port opened by the master node. If the environment variable `CLEARML_MULTI_NODE_MASTER_DEF_PORT`
@@ -1702,8 +1711,13 @@ class Task(_Task):
:param queue: The queue to enqueue the nodes to. Can be different than the queue the master
node is enqueued to. If None, the nodes will be enqueued to the same queue as the master node
:param wait: If True, the master node will wait for the other nodes to start
- :param addr: The address of the master node's worker. If not set, it defaults to the private IP
- of the machine the master is running on
+ :param addr: The address of the master node's worker. If the environment variable
+ `CLEARML_MULTI_NODE_MASTER_DEF_ADDR` is set, the value of this parameter will be set to
+ the one defined in `CLEARML_MULTI_NODE_MASTER_DEF_ADDR`.
+ If `CLEARML_MULTI_NODE_MASTER_DEF_ADDR` doesn't exist, but `MASTER_ADDR` does, then the value of this
+ parameter will be set to the one defined in `MASTER_ADDR`. If neither environment variables exist,
+ the value passed to the parameter will be used. If this value is None (default), the private IP of
+ the machine the master node is running on will be used.
:return: A dictionary containing relevant information regarding the multi node run. This dictionary
has the following entries:
@@ -1724,10 +1738,14 @@ class Task(_Task):
raise UsageError("Master task is not enqueued to any queue and the queue parameter is None")
master_conf = {
- "master_addr": get_private_ip(),
- "master_port": int(os.environ.get("CLEARML_MULTI_NODE_MASTER_DEF_PORT", os.environ.get("MASTER_PORT", port))),
+ "master_addr": os.environ.get(
+ "CLEARML_MULTI_NODE_MASTER_DEF_ADDR", os.environ.get("MASTER_ADDR", addr or get_private_ip())
+ ),
+ "master_port": int(
+ os.environ.get("CLEARML_MULTI_NODE_MASTER_DEF_PORT", os.environ.get("MASTER_PORT", port))
+ ),
"node_rank": 0,
- "wait": wait
+ "wait": wait,
}
editable_conf = {"total_num_nodes": total_num_nodes, "queue": queue}
editable_conf = self.connect(editable_conf, name=self._launch_multi_node_section)
@@ -4650,14 +4668,3 @@ class Task(_Task):
auto_connect_frameworks={'detect_repository': False}) \
if state['main'] else Task.get_task(task_id=state['id'])
self.__dict__ = task.__dict__
-
- def __getattr__(self, name):
- try:
- self.__getattribute__(name)
- except AttributeError as e:
- if self.__class__ is Task:
- getLogger().warning(
- "'clearml.Task' object has no attribute '{}'. Did you mean to import 'Task' from 'allegroai'?".format(name)
- )
- raise e
-
From 813777a4cc82947ea8dbe749df2aa5745d36cea9 Mon Sep 17 00:00:00 2001
From: Alex Burlacu
Date: Thu, 25 May 2023 17:41:35 +0300
Subject: [PATCH 09/15] Fix package identification for scikit-image
---
clearml/backend_api/session/session.py | 22 ++++++++++++-------
.../backend_interface/task/repo/scriptinfo.py | 6 +++++
2 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/clearml/backend_api/session/session.py b/clearml/backend_api/session/session.py
index 2b559de4..898dc82b 100644
--- a/clearml/backend_api/session/session.py
+++ b/clearml/backend_api/session/session.py
@@ -277,15 +277,11 @@ class Session(TokenManager):
return list(retry_codes)
- def _load_vaults(self):
- # () -> Optional[bool]
+ def _read_vaults(self):
+ # () -> Optional[dict]
if not self.check_min_api_version("2.15") or self.feature_set == "basic":
return
- if ENV_DISABLE_VAULT_SUPPORT.get():
- # (self._logger or get_logger()).debug("Vault support is disabled")
- return
-
def parse(vault):
# noinspection PyBroadException
try:
@@ -306,13 +302,23 @@ class Session(TokenManager):
vaults = res.json().get("data", {}).get("vaults", [])
data = list(filter(None, map(parse, vaults)))
if data:
- self.config.set_overrides(*data)
- return True
+ return data
elif res.status_code != 404:
raise Exception(res.json().get("meta", {}).get("result_msg", res.text))
except Exception as ex:
(self._logger or get_logger()).warning("Failed getting vaults: {}".format(ex))
+ def _load_vaults(self):
+ # () -> Optional[bool]
+ if ENV_DISABLE_VAULT_SUPPORT.get():
+ # (self._logger or get_logger()).debug("Vault support is disabled")
+ return
+
+ data = self._read_vaults()
+ if data:
+ self.config.set_overrides(*data)
+ return True
+
def _apply_config_sections(self, local_logger):
# type: (_LocalLogger) -> None # noqa: F821
default = self.config.get("sdk.apply_environment", False)
diff --git a/clearml/backend_interface/task/repo/scriptinfo.py b/clearml/backend_interface/task/repo/scriptinfo.py
index 4856165c..78bef15e 100644
--- a/clearml/backend_interface/task/repo/scriptinfo.py
+++ b/clearml/backend_interface/task/repo/scriptinfo.py
@@ -91,6 +91,12 @@ class ScriptRequirements(object):
for fname, lines in sklearn.items():
modules.add('scikit_learn', fname, lines)
+ # bugfix, replace sklearn with scikit-learn name
+ if 'skimage' in modules:
+ skimage = modules.pop('skimage', {})
+ for fname, lines in skimage.items():
+ modules.add('scikit_image', fname, lines)
+
# if we have torch and it supports tensorboard, we should add that as well
# (because it will not be detected automatically)
if 'torch' in modules and 'tensorboard' not in modules and 'tensorboardX' not in modules:
From a8746de9ebc266f86ccec201b04e0dfae1251d29 Mon Sep 17 00:00:00 2001
From: Alex Burlacu
Date: Thu, 25 May 2023 17:43:13 +0300
Subject: [PATCH 10/15] Adjust LightGBM example
---
.../frameworks/lightgbm/lightgbm_example.py | 155 +++++++++++-------
examples/frameworks/lightgbm/requirements.txt | 1 +
2 files changed, 93 insertions(+), 63 deletions(-)
diff --git a/examples/frameworks/lightgbm/lightgbm_example.py b/examples/frameworks/lightgbm/lightgbm_example.py
index 16034374..8e1614cb 100644
--- a/examples/frameworks/lightgbm/lightgbm_example.py
+++ b/examples/frameworks/lightgbm/lightgbm_example.py
@@ -1,75 +1,104 @@
# ClearML - Example of LightGBM integration
#
import lightgbm as lgb
+import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import mean_squared_error
from clearml import Task
-# Connecting ClearML with the current process,
-# from here on everything is logged automatically
-task = Task.init(project_name="examples", task_name="LightGBM")
-print('Loading data...')
+def main():
+ # Connecting ClearML with the current process,
+ # from here on everything is logged automatically
+ task = Task.init(project_name="examples", task_name="LightGBM")
-# Load or create your dataset
+ print('Loading data...')
+
+ # Load or create your dataset
+
+ df_train = pd.read_csv(
+ 'https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.train',
+ header=None, sep='\t'
+ )
+ df_test = pd.read_csv(
+ 'https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.test',
+ header=None, sep='\t'
+ )
+
+ y_train = df_train[0]
+ y_test = df_test[0]
+ X_train = df_train.drop(0, axis=1)
+ X_test = df_test.drop(0, axis=1)
+
+ # Create dataset for lightgbm
+ lgb_train = lgb.Dataset(X_train, y_train)
+ lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
+
+ # Specify your configurations as a dict
+ params = {
+ 'boosting_type': 'gbdt',
+ 'objective': 'regression',
+ 'metric': {'l2', 'l1'},
+ 'num_leaves': 200,
+ 'max_depth': 0,
+ 'learning_rate': 0.05,
+ 'feature_fraction': 0.9,
+ 'bagging_fraction': 0.8,
+ 'bagging_freq': 5,
+ 'verbose': 0,
+ 'force_col_wise': True,
+ 'deterministic': True,
+ }
+
+ evals_result = {} # to record eval results for plotting
+
+ print('Starting training...')
+
+ # Train
+ gbm = lgb.train(
+ params,
+ lgb_train,
+ num_boost_round=500,
+ valid_sets=[lgb_train, lgb_eval],
+ feature_name=[f'f{i + 1}' for i in range(X_train.shape[-1])],
+ categorical_feature=[21],
+ callbacks=[
+ lgb.record_evaluation(evals_result),
+ ],
+ )
+
+ print('Saving model...')
+
+ # Save model to file
+ gbm.save_model('model.txt')
+
+ print('Plotting metrics recorded during training...')
+
+ ax = lgb.plot_metric(evals_result, metric='l1')
+ plt.show()
+
+ print('Plotting feature importances...')
+
+ ax = lgb.plot_importance(gbm, max_num_features=10)
+ plt.show()
+
+ print('Plotting split value histogram...')
+
+ ax = lgb.plot_split_value_histogram(gbm, feature='f26', bins='auto')
+ plt.show()
+
+ print('Loading model to predict...')
+
+ # Load model to predict
+ bst = lgb.Booster(model_file='model.txt')
+
+ # Can only predict with the best iteration (or the saving iteration)
+ y_pred = bst.predict(X_test)
+
+ # Eval with loaded model
+ print("The rmse of loaded model's prediction is:", mean_squared_error(y_test, y_pred) ** 0.5)
-df_train = pd.read_csv(
- 'https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.train',
- header=None, sep='\t'
-)
-df_test = pd.read_csv(
- 'https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.test',
- header=None, sep='\t'
-)
-
-y_train = df_train[0]
-y_test = df_test[0]
-X_train = df_train.drop(0, axis=1)
-X_test = df_test.drop(0, axis=1)
-
-# Create dataset for lightgbm
-lgb_train = lgb.Dataset(X_train, y_train)
-lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
-
-# Specify your configurations as a dict
-params = {
- 'boosting_type': 'gbdt',
- 'objective': 'regression',
- 'metric': {'l2', 'l1'},
- 'num_leaves': 31,
- 'learning_rate': 0.05,
- 'feature_fraction': 0.9,
- 'bagging_fraction': 0.8,
- 'bagging_freq': 5,
- 'verbose': 0,
- 'force_col_wise': True,
-}
-
-print('Starting training...')
-
-# Train
-gbm = lgb.train(
- params,
- lgb_train,
- num_boost_round=20,
- valid_sets=[lgb_eval],
- callbacks=[lgb.early_stopping(stopping_rounds=5)],
-)
-
-print('Saving model...')
-
-# Save model to file
-gbm.save_model('model.txt')
-
-print('Loading model to predict...')
-
-# Load model to predict
-bst = lgb.Booster(model_file='model.txt')
-
-# Can only predict with the best iteration (or the saving iteration)
-y_pred = bst.predict(X_test)
-
-# Eval with loaded model
-print("The rmse of loaded model's prediction is:", mean_squared_error(y_test, y_pred) ** 0.5)
+if __name__ == '__main__':
+ main()
diff --git a/examples/frameworks/lightgbm/requirements.txt b/examples/frameworks/lightgbm/requirements.txt
index ac7ee02e..8508c428 100644
--- a/examples/frameworks/lightgbm/requirements.txt
+++ b/examples/frameworks/lightgbm/requirements.txt
@@ -1,4 +1,5 @@
lightgbm
scikit-learn
pandas
+matplotlib
clearml
\ No newline at end of file
From 60c3a5ef98d86dc51036759c3db6d4f06197d799 Mon Sep 17 00:00:00 2001
From: Alex Burlacu
Date: Thu, 25 May 2023 18:14:17 +0300
Subject: [PATCH 11/15] Fix downloading artifacts from GCP when the download
URI includes unsafe characters
---
clearml/storage/helper.py | 2358 +++++++++++++++++++------------------
clearml/storage/util.py | 2 +-
2 files changed, 1181 insertions(+), 1179 deletions(-)
diff --git a/clearml/storage/helper.py b/clearml/storage/helper.py
index ff785925..3c48275e 100644
--- a/clearml/storage/helper.py
+++ b/clearml/storage/helper.py
@@ -116,1184 +116,6 @@ class _Driver(object):
cls._file_server_hosts = hosts
return cls._file_server_hosts
-
-class StorageHelper(object):
- """ Storage helper.
- Used by the entire system to download/upload files.
- Supports both local and remote files (currently local files, network-mapped files, HTTP/S and Amazon S3)
- """
- _temp_download_suffix = '.partially'
-
- @classmethod
- def _get_logger(cls):
- return get_logger('storage')
-
- @attrs
- class _PathSubstitutionRule(object):
- registered_prefix = attrib(type=str)
- local_prefix = attrib(type=str)
- replace_windows_sep = attrib(type=bool)
- replace_linux_sep = attrib(type=bool)
-
- path_substitution_config = 'storage.path_substitution'
-
- @classmethod
- def load_list_from_config(cls):
- rules_list = []
- for index, sub_config in enumerate(config.get(cls.path_substitution_config, list())):
- rule = cls(
- registered_prefix=sub_config.get('registered_prefix', None),
- local_prefix=sub_config.get('local_prefix', None),
- replace_windows_sep=sub_config.get('replace_windows_sep', False),
- replace_linux_sep=sub_config.get('replace_linux_sep', False),
- )
-
- if any(prefix is None for prefix in (rule.registered_prefix, rule.local_prefix)):
- StorageHelper._get_logger().warning(
- "Illegal substitution rule configuration '{}[{}]': {}".format(
- cls.path_substitution_config,
- index,
- asdict(rule),
- ))
-
- continue
-
- if all((rule.replace_windows_sep, rule.replace_linux_sep)):
- StorageHelper._get_logger().warning(
- "Only one of replace_windows_sep and replace_linux_sep flags may be set."
- "'{}[{}]': {}".format(
- cls.path_substitution_config,
- index,
- asdict(rule),
- ))
- continue
-
- rules_list.append(rule)
-
- return rules_list
-
- class _UploadData(object):
- @property
- def src_path(self):
- return self._src_path
-
- @property
- def dest_path(self):
- return self._dest_path
-
- @property
- def canonized_dest_path(self):
- return self._canonized_dest_path
-
- @property
- def extra(self):
- return self._extra
-
- @property
- def callback(self):
- return self._callback
-
- @property
- def retries(self):
- return self._retries
-
- @property
- def return_canonized(self):
- return self._return_canonized
-
- def __init__(self, src_path, dest_path, canonized_dest_path, extra, callback, retries, return_canonized):
- self._src_path = src_path
- self._dest_path = dest_path
- self._canonized_dest_path = canonized_dest_path
- self._extra = extra
- self._callback = callback
- self._retries = retries
- self._return_canonized = return_canonized
-
- def __str__(self):
- return "src=%s" % self.src_path
-
- _helpers = {} # cache of helper instances
-
- # global terminate event for async upload threads
- # _terminate = threading.Event()
- _async_upload_threads = set()
- _upload_pool = None
- _upload_pool_pid = None
-
- # collect all bucket credentials that aren't empty (ignore entries with an empty key or secret)
- _s3_configurations = deferred_config('aws.s3', {}, transform=S3BucketConfigurations.from_config)
- _gs_configurations = deferred_config('google.storage', {}, transform=GSBucketConfigurations.from_config)
- _azure_configurations = deferred_config('azure.storage', {}, transform=AzureContainerConfigurations.from_config)
- _path_substitutions = deferred_config(transform=_PathSubstitutionRule.load_list_from_config)
-
- @property
- def log(self):
- return self._log
-
- @property
- def scheme(self):
- return self._scheme
-
- @property
- def secure(self):
- return self._secure
-
- @property
- def base_url(self):
- return self._base_url
-
- @classmethod
- def get(cls, url, logger=None, **kwargs):
- """
- Get a storage helper instance for the given URL
-
- :return: A StorageHelper instance.
- """
- # Handle URL substitution etc before locating the correct storage driver
- url = cls._canonize_url(url)
-
- # Get the credentials we should use for this url
- base_url = cls._resolve_base_url(url)
-
- instance_key = '%s_%s' % (base_url, threading.current_thread().ident or 0)
- # noinspection PyBroadException
- try:
- configs = kwargs.get("configs")
- if configs:
- instance_key += "_{}".format(configs.cache_name)
- except Exception:
- pass
-
- force_create = kwargs.pop('__force_create', False)
- if (instance_key in cls._helpers) and (not force_create) and base_url != "file://":
- return cls._helpers[instance_key]
-
- # Don't canonize URL since we already did it
- try:
- instance = cls(base_url=base_url, url=url, logger=logger, canonize_url=False, **kwargs)
- except (StorageError, UsageError) as ex:
- cls._get_logger().error(str(ex))
- return None
- except Exception as ex:
- cls._get_logger().error("Failed creating storage object {} Reason: {}".format(
- base_url or url, ex))
- return None
-
- cls._helpers[instance_key] = instance
- return instance
-
- @classmethod
- def get_local_copy(cls, remote_url, skip_zero_size_check=False):
- """
- Download a file from remote URL to a local storage, and return path to local copy,
-
- :param remote_url: Remote URL. Example: https://example.com/file.jpg s3://bucket/folder/file.mp4 etc.
- :param skip_zero_size_check: If True, no error will be raised for files with zero bytes size.
- :return: Path to local copy of the downloaded file. None if error occurred.
- """
- helper = cls.get(remote_url)
- if not helper:
- return None
- # create temp file with the requested file name
- file_name = '.' + remote_url.split('/')[-1].split(os.path.sep)[-1]
- local_path = mktemp(suffix=file_name)
- return helper.download_to_file(remote_url, local_path, skip_zero_size_check=skip_zero_size_check)
-
- def __init__(
- self,
- base_url,
- url,
- key=None,
- secret=None,
- region=None,
- verbose=False,
- logger=None,
- retries=5,
- token=None,
- **kwargs
- ):
- level = config.get("storage.log.level", None)
-
- if level:
- try:
- self._get_logger().setLevel(level)
- except (TypeError, ValueError):
- self._get_logger().error('invalid storage log level in configuration: %s' % level)
-
- self._log = logger or self._get_logger()
- self._verbose = verbose
- self._retries = retries
- self._extra = {}
- self._base_url = base_url
- self._secure = True
- self._driver = None
- self._container = None
- self._conf = None
-
- if kwargs.get('canonize_url', True):
- url = self._canonize_url(url)
-
- parsed = urlparse(url)
- self._scheme = parsed.scheme
-
- if self._scheme == _AzureBlobServiceStorageDriver.scheme:
- self._conf = copy(self._azure_configurations.get_config_by_uri(url))
- if self._conf is None:
- raise StorageError("Missing Azure Blob Storage configuration for {}".format(url))
-
- if not self._conf.account_name or not self._conf.account_key:
- raise StorageError(
- "Missing account name or key for Azure Blob Storage access for {}".format(base_url)
- )
-
- self._driver = _AzureBlobServiceStorageDriver()
- self._container = self._driver.get_container(config=self._conf, account_url=parsed.netloc)
-
- elif self._scheme == _Boto3Driver.scheme:
- self._conf = copy(self._s3_configurations.get_config_by_uri(url))
- self._secure = self._conf.secure
-
- final_region = region if region else self._conf.region
- if not final_region:
- final_region = None
-
- self._conf.update(
- key=key or self._conf.key,
- secret=secret or self._conf.secret,
- multipart=self._conf.multipart,
- region=final_region,
- use_credentials_chain=self._conf.use_credentials_chain,
- token=token or self._conf.token,
- extra_args=self._conf.extra_args,
- )
-
- if not self._conf.use_credentials_chain:
- if not self._conf.key or not self._conf.secret:
- raise ValueError(
- "Missing key and secret for S3 storage access (%s)" % base_url
- )
-
- self._driver = _Boto3Driver()
- self._container = self._driver.get_container(
- container_name=self._base_url, retries=retries, config=self._conf)
-
- elif self._scheme == _GoogleCloudStorageDriver.scheme:
- self._conf = copy(self._gs_configurations.get_config_by_uri(url))
- self._driver = _GoogleCloudStorageDriver()
- self._container = self._driver.get_container(
- container_name=self._base_url,
- config=self._conf
- )
-
- elif self._scheme in _HttpDriver.schemes:
- self._driver = _HttpDriver(retries=retries)
- self._container = self._driver.get_container(container_name=self._base_url)
- else: # elif self._scheme == 'file':
- # if this is not a known scheme assume local file
- # url2pathname is specifically intended to operate on (urlparse result).path
- # and returns a cross-platform compatible result
- new_url = normalize_local_path(url[len("file://"):] if url.startswith("file://") else url)
- self._driver = _FileStorageDriver(new_url)
- # noinspection PyBroadException
- try:
- self._container = self._driver.get_container("")
- except Exception:
- self._container = None
-
- @classmethod
- def terminate_uploads(cls, force=True, timeout=2.0):
- if force:
- # since async uploaders are daemon threads, we can just return and let them close by themselves
- return
- # signal all threads to terminate and give them a chance for 'timeout' seconds (total, not per-thread)
- # cls._terminate.set()
- remaining_timeout = timeout
- for thread in cls._async_upload_threads:
- t = time()
- # noinspection PyBroadException
- try:
- thread.join(timeout=remaining_timeout)
- except Exception:
- pass
- remaining_timeout -= (time() - t)
-
- @classmethod
- def get_aws_storage_uri_from_config(cls, bucket_config):
- uri = (
- "s3://{}/{}".format(bucket_config.host, bucket_config.bucket)
- if bucket_config.host
- else "s3://{}".format(bucket_config.bucket)
- )
- if bucket_config.subdir:
- uri += "/" + bucket_config.subdir
- return uri
-
- @classmethod
- def get_gcp_storage_uri_from_config(cls, bucket_config):
- return (
- "gs://{}/{}".format(bucket_config.bucket, bucket_config.subdir)
- if bucket_config.subdir
- else "gs://{}".format(bucket_config.bucket)
- )
-
- @classmethod
- def get_azure_storage_uri_from_config(cls, bucket_config):
- return "azure://{}.blob.core.windows.net/{}".format(bucket_config.account_name, bucket_config.container_name)
-
- @classmethod
- def get_configuration(cls, bucket_config):
- return cls.get_aws_configuration(bucket_config)
-
- @classmethod
- def get_aws_configuration(cls, bucket_config):
- return cls._s3_configurations.get_config_by_bucket(bucket_config.bucket, bucket_config.host)
-
- @classmethod
- def get_gcp_configuration(cls, bucket_config):
- return cls._gs_configurations.get_config_by_uri(
- cls.get_gcp_storage_uri_from_config(bucket_config),
- create_if_not_found=False
- )
-
- @classmethod
- def get_azure_configuration(cls, bucket_config):
- return cls._azure_configurations.get_config(bucket_config.account_name, bucket_config.container_name)
-
- @classmethod
- def add_configuration(cls, bucket_config, log=None, _test_config=True):
- return cls.add_aws_configuration(bucket_config, log=log, _test_config=_test_config)
-
- @classmethod
- def add_aws_configuration(cls, bucket_config, log=None, _test_config=True):
- # Try to use existing configuration if we have no key and secret
- use_existing = not bucket_config.is_valid()
- # Get existing config anyway (we'll either try to use it or alert we're replacing it
- existing = cls.get_aws_configuration(bucket_config)
- configs = cls._s3_configurations
- uri = cls.get_aws_storage_uri_from_config(bucket_config)
-
- if not use_existing:
- # Test bucket config, fails if unsuccessful
- if _test_config:
- _Boto3Driver._test_bucket_config(bucket_config, log) # noqa
- if existing:
- if log:
- log.warning("Overriding existing configuration for '{}'".format(uri))
- configs.remove_config(existing)
- configs.add_config(bucket_config)
- else:
- # Try to use existing configuration
- good_config = False
- if existing:
- if log:
- log.info("Using existing credentials for '{}'".format(uri))
- good_config = _Boto3Driver._test_bucket_config(existing, log, raise_on_error=False) # noqa
-
- if not good_config:
- # Try to use global key/secret
- configs.update_config_with_defaults(bucket_config)
-
- if log:
- log.info("Using global credentials for '{}'".format(uri))
- if _test_config:
- _Boto3Driver._test_bucket_config(bucket_config, log) # noqa
- configs.add_config(bucket_config)
-
- @classmethod
- def add_gcp_configuration(cls, bucket_config, log=None):
- use_existing = not bucket_config.is_valid()
- existing = cls.get_gcp_configuration(bucket_config)
- configs = cls._gs_configurations
- uri = cls.get_gcp_storage_uri_from_config(bucket_config)
-
- if not use_existing:
- if existing:
- if log:
- log.warning("Overriding existing configuration for '{}'".format(uri))
- configs.remove_config(existing)
- configs.add_config(bucket_config)
- else:
- good_config = False
- if existing:
- if log:
- log.info("Using existing config for '{}'".format(uri))
- good_config = _GoogleCloudStorageDriver.test_upload(None, bucket_config)
- if not good_config:
- configs.update_config_with_defaults(bucket_config)
- if log:
- log.info("Using global credentials for '{}'".format(uri))
- configs.add_config(bucket_config)
-
- @classmethod
- def add_azure_configuration(cls, bucket_config, log=None):
- use_existing = not bucket_config.is_valid()
- existing = cls.get_azure_configuration(bucket_config)
- configs = cls._azure_configurations
- uri = cls.get_azure_storage_uri_from_config(bucket_config)
-
- if not use_existing:
- if existing:
- if log:
- log.warning("Overriding existing configuration for '{}'".format(uri))
- configs.remove_config(existing)
- configs.add_config(bucket_config)
- else:
- good_config = False
- if existing:
- if log:
- log.info("Using existing config for '{}'".format(uri))
- good_config = _AzureBlobServiceStorageDriver.test_upload(None, bucket_config)
- if not good_config:
- configs.update_config_with_defaults(bucket_config)
- if log:
- log.info("Using global credentials for '{}'".format(uri))
- configs.add_config(bucket_config)
-
- @classmethod
- def add_path_substitution(
- cls,
- registered_prefix,
- local_prefix,
- replace_windows_sep=False,
- replace_linux_sep=False,
- ):
- """
- Add a path substitution rule for storage paths.
-
- Useful for case where the data was registered under some path, and that
- path was later renamed. This may happen with local storage paths where
- each machine is has different mounts or network drives configurations
-
- :param registered_prefix: The prefix to search for and replace. This is
- the prefix of the path the data is registered under. This should be the
- exact url prefix, case sensitive, as the data is registered.
- :param local_prefix: The prefix to replace 'registered_prefix' with. This
- is the prefix of the path the data is actually saved under. This should be the
- exact url prefix, case sensitive, as the data is saved under.
- :param replace_windows_sep: If set to True, and the prefix matches, the rest
- of the url has all of the windows path separators (backslash '\') replaced with
- the native os path separator.
- :param replace_linux_sep: If set to True, and the prefix matches, the rest
- of the url has all of the linux/unix path separators (slash '/') replaced with
- the native os path separator.
- """
-
- if not registered_prefix or not local_prefix:
- raise UsageError("Path substitution prefixes must be non empty strings")
-
- if replace_windows_sep and replace_linux_sep:
- raise UsageError("Only one of replace_windows_sep and replace_linux_sep may be set.")
-
- rule = cls._PathSubstitutionRule(
- registered_prefix=registered_prefix,
- local_prefix=local_prefix,
- replace_windows_sep=replace_windows_sep,
- replace_linux_sep=replace_linux_sep,
- )
-
- cls._path_substitutions.append(rule)
-
- @classmethod
- def clear_path_substitutions(cls):
- """
- Removes all path substitution rules, including ones from the configuration file.
- """
- cls._path_substitutions = list()
-
- def get_object_size_bytes(self, remote_url, silence_errors=False):
- # type: (str, bool) -> [int, None]
- """
- Get size of the remote file in bytes.
-
- :param str remote_url: The url where the file is stored.
- E.g. 's3://bucket/some_file.txt', 'file://local/file'
- :param bool silence_errors: Silence errors that might occur
- when fetching the size of the file. Default: False
-
- :return: The size of the file in bytes.
- None if the file could not be found or an error occurred.
- """
- obj = self.get_object(remote_url, silence_errors=silence_errors)
- return self._get_object_size_bytes(obj, silence_errors)
-
- def _get_object_size_bytes(self, obj, silence_errors=False):
- # type: (object) -> [int, None]
- """
- Auxiliary function for `get_object_size_bytes`.
- Get size of the remote object in bytes.
-
- :param object obj: The remote object
- :param bool silence_errors: Silence errors that might occur
- when fetching the size of the file. Default: False
-
- :return: The size of the object in bytes.
- None if an error occurred.
- """
- if not obj:
- return None
- size = None
- try:
- if isinstance(self._driver, _HttpDriver) and obj:
- obj = self._driver._get_download_object(obj) # noqa
- size = int(obj.headers.get("Content-Length", 0))
- elif hasattr(obj, "size"):
- size = obj.size
- # Google storage has the option to reload the object to get the size
- if size is None and hasattr(obj, "reload"):
- # noinspection PyBroadException
- try:
- # To catch google.api_core exceptions
- obj.reload()
- size = obj.size
- except Exception as e:
- if not silence_errors:
- self.log.warning("Failed obtaining object size on reload: {}('{}')".format(
- e.__class__.__name__, str(e)))
- elif hasattr(obj, "content_length"):
- # noinspection PyBroadException
- try:
- # To catch botocore exceptions
- size = obj.content_length # noqa
- except Exception as e:
- if not silence_errors:
- self.log.warning("Failed obtaining content_length while getting object size: {}('{}')".format(
- e.__class__.__name__, str(e)))
- except Exception as e:
- if not silence_errors:
- self.log.warning("Failed getting object size: {}('{}')".format(e.__class__.__name__, str(e)))
- return size
-
- def get_object_metadata(self, obj):
- # type: (object) -> dict
- """
- Get the metadata of the remote object.
- The metadata is a dict containing the following keys: `name`, `size`.
-
- :param object obj: The remote object
-
- :return: A dict containing the metadata of the remote object
- """
- name_fields = ("name", "url", "key", "blob_name")
- metadata = {
- "size": self._get_object_size_bytes(obj),
- "name": next(filter(None, (getattr(obj, f, None) for f in name_fields)), None),
- }
- return metadata
-
- def verify_upload(self, folder_uri='', raise_on_error=True, log_on_error=True):
- """
- Verify that this helper can upload files to a folder.
-
- An upload is possible iff:
- 1. the destination folder is under the base uri of the url used to create the helper
- 2. the helper has credentials to write to the destination folder
-
- :param folder_uri: The destination folder to test. Must be an absolute
- url that begins with the base uri of the url used to create the helper.
- :param raise_on_error: Raise an exception if an upload is not possible
- :param log_on_error: Log an error if an upload is not possible
- :return: True, if, and only if, an upload to folder_uri is possible.
- """
-
- folder_uri = self._canonize_url(folder_uri)
-
- folder_uri = self.conform_url(folder_uri, self._base_url)
-
- test_path = self._normalize_object_name(folder_uri)
-
- if self._scheme == _Boto3Driver.scheme:
- _Boto3Driver._test_bucket_config(
- self._conf,
- self._log,
- test_path=test_path,
- raise_on_error=raise_on_error,
- log_on_error=log_on_error,
- )
- elif self._scheme == _GoogleCloudStorageDriver.scheme:
- self._driver.test_upload(test_path, self._conf)
-
- elif self._scheme == 'file':
- # Check path exists
- Path(test_path).mkdir(parents=True, exist_ok=True)
- # check path permissions
- Path(test_path).touch(exist_ok=True)
-
- return folder_uri
-
- def upload_from_stream(self, stream, dest_path, extra=None, retries=1, return_canonized=True):
- canonized_dest_path = self._canonize_url(dest_path)
- object_name = self._normalize_object_name(canonized_dest_path)
- extra = extra.copy() if extra else {}
- extra.update(self._extra)
- last_ex = None
- cb = UploadProgressReport.from_stream(stream, object_name, self._verbose, self._log)
- for i in range(max(1, int(retries))):
- try:
- self._driver.upload_object_via_stream(
- iterator=stream,
- container=self._container,
- object_name=object_name,
- callback=cb,
- extra=extra)
- last_ex = None
- break
- except Exception as ex:
- last_ex = ex
- # seek to beginning if possible
- # noinspection PyBroadException
- try:
- stream.seek(0)
- except Exception:
- pass
- if last_ex:
- raise last_ex
-
- result_dest_path = canonized_dest_path if return_canonized else dest_path
-
- if self.scheme in _HttpDriver.schemes:
- # quote link
- result_dest_path = quote_url(result_dest_path)
-
- return result_dest_path
-
- def upload(
- self, src_path, dest_path=None, extra=None, async_enable=False, cb=None, retries=3, return_canonized=True
- ):
- if not dest_path:
- dest_path = os.path.basename(src_path)
-
- canonized_dest_path = self._canonize_url(dest_path)
- dest_path = dest_path.replace('\\', '/')
- canonized_dest_path = canonized_dest_path.replace('\\', '/')
-
- result_path = canonized_dest_path if return_canonized else dest_path
-
- if cb and self.scheme in _HttpDriver.schemes:
- # store original callback
- a_cb = cb
-
- # quote link
- def callback(result):
- return a_cb(quote_url(result_path) if result else result)
- # replace callback with wrapper
- cb = callback
-
- if async_enable:
- data = self._UploadData(
- src_path=src_path,
- dest_path=dest_path,
- canonized_dest_path=canonized_dest_path,
- extra=extra,
- callback=cb,
- retries=retries,
- return_canonized=return_canonized
- )
- StorageHelper._initialize_upload_pool()
- return StorageHelper._upload_pool.apply_async(self._do_async_upload, args=(data,))
- else:
- res = self._do_upload(
- src_path=src_path,
- dest_path=dest_path,
- canonized_dest_path=canonized_dest_path,
- extra=extra,
- cb=cb,
- verbose=False,
- retries=retries,
- return_canonized=return_canonized)
- if res:
- result_path = quote_url(result_path)
- return result_path
-
- def list(self, prefix=None, with_metadata=False):
- """
- List entries in the helper base path.
-
- Return a list of names inside this helper base path or a list of dictionaries containing
- the objects' metadata. The base path is determined at creation time and is specific
- for each storage medium.
- For Google Storage and S3 it is the bucket of the path.
- For local files it is the root directory.
-
- This operation is not supported for http and https protocols.
-
- :param prefix: If None, return the list as described above. If not, it
- must be a string - the path of a sub directory under the base path.
- the returned list will include only objects under that subdir.
-
- :param with_metadata: Instead of returning just the names of the objects, return a list of dictionaries
- containing the name and metadata of the remote file. Thus, each dictionary will contain the following
- keys: `name`, `size`.
-
- :return: The paths of all the objects in the storage base path under prefix or
- a list of dictionaries containing the objects' metadata.
- Listed relative to the base path.
- """
- if prefix:
- prefix = self._canonize_url(prefix)
- if prefix.startswith(self._base_url):
- prefix = prefix[len(self._base_url):]
- if self._base_url != "file://":
- prefix = prefix.lstrip("/")
- if self._base_url == "file://":
- prefix = prefix.rstrip("/")
- if prefix.startswith(str(self._driver.base_path)):
- prefix = prefix[len(str(self._driver.base_path)):]
- res = self._driver.list_container_objects(self._container, ex_prefix=prefix)
- result = [
- obj.name if not with_metadata else self.get_object_metadata(obj)
- for obj in res
- ]
-
- if self._base_url == "file://":
- if not with_metadata:
- result = [Path(f).as_posix() for f in result]
- else:
- for metadata_entry in result:
- metadata_entry["name"] = Path(metadata_entry["name"]).as_posix()
- return result
- else:
- return [
- obj.name if not with_metadata else self.get_object_metadata(obj)
- for obj in self._driver.list_container_objects(self._container)
- ]
-
- def download_to_file(
- self,
- remote_path,
- local_path,
- overwrite_existing=False,
- delete_on_failure=True,
- verbose=None,
- skip_zero_size_check=False,
- silence_errors=False,
- direct_access=True
- ):
- def next_chunk(astream):
- if isinstance(astream, binary_type):
- chunk = astream
- astream = None
- elif astream:
- try:
- chunk = next(astream)
- except StopIteration:
- chunk = None
- else:
- chunk = None
- return chunk, astream
-
- remote_path = self._canonize_url(remote_path)
- verbose = self._verbose if verbose is None else verbose
-
- tmp_remote_path = remote_path
- # noinspection PyBroadException
- try:
- tmp_remote_path = normalize_local_path(tmp_remote_path)
- if tmp_remote_path.exists():
- remote_path = "file://{}".format(str(tmp_remote_path))
- except Exception:
- pass
- # Check if driver type supports direct access:
- direct_access_path = self.get_driver_direct_access(remote_path)
- if direct_access_path and direct_access:
- return direct_access_path
-
- temp_local_path = None
- try:
- if verbose:
- self._log.info('Start downloading from %s' % remote_path)
- if not overwrite_existing and Path(local_path).is_file():
- self._log.debug(
- 'File {} already exists, no need to download, thread id = {}'.format(
- local_path,
- threading.current_thread().ident,
- ),
- )
-
- return local_path
- if remote_path.startswith("file://"):
- Path(local_path).parent.mkdir(parents=True, exist_ok=True)
- # use remote_path, because direct_access_path might be None, because of access_rules
- # len("file://") == 7
- shutil.copyfile(remote_path[7:], local_path)
- return local_path
- # we download into temp_local_path so that if we accidentally stop in the middle,
- # we won't think we have the entire file
- temp_local_path = '{}_{}{}'.format(local_path, time(), self._temp_download_suffix)
- obj = self.get_object(remote_path, silence_errors=silence_errors)
- if not obj:
- return None
-
- # object size in bytes
- total_size_mb = -1
- dl_total_mb = 0.
- download_reported = False
- # chunks size is ignored and always 5Mb
- chunk_size_mb = 5
-
- # make sure we have the destination folder
- # noinspection PyBroadException
- Path(temp_local_path).parent.mkdir(parents=True, exist_ok=True)
-
- total_size_bytes = self.get_object_size_bytes(remote_path, silence_errors=silence_errors)
- if total_size_bytes is not None:
- total_size_mb = float(total_size_bytes) / (1024 * 1024)
-
- # if driver supports download with callback, use it (it might be faster)
- if hasattr(self._driver, 'download_object'):
- # callback
- cb = DownloadProgressReport(total_size_mb, verbose, remote_path, self._log)
- self._driver.download_object(obj, temp_local_path, callback=cb)
- download_reported = bool(cb.last_reported)
- dl_total_mb = cb.current_status_mb
- else:
- stream = self._driver.download_object_as_stream(obj, chunk_size_mb * 1024 * 1024)
- if stream is None:
- raise ValueError('Could not download %s' % remote_path)
- with open(temp_local_path, 'wb') as fd:
- data, stream = next_chunk(stream)
- while data:
- fd.write(data)
- data, stream = next_chunk(stream)
-
- if not skip_zero_size_check and Path(temp_local_path).stat().st_size <= 0:
- raise Exception('downloaded a 0-sized file')
-
- # if we are on Windows, we need to remove the target file before renaming
- # otherwise posix rename will overwrite the target
- if os.name != 'posix':
- # noinspection PyBroadException
- try:
- os.remove(local_path)
- except Exception:
- pass
-
- # rename temp file to local_file
- # noinspection PyBroadException
- try:
- os.rename(temp_local_path, local_path)
- except Exception:
- # noinspection PyBroadException
- try:
- os.unlink(temp_local_path)
- except Exception:
- pass
- # file was downloaded by a parallel process, check we have the final output and delete the partial copy
- path_local_path = Path(local_path)
- if not path_local_path.is_file() or (not skip_zero_size_check and path_local_path.stat().st_size <= 0):
- raise Exception('Failed renaming partial file, downloaded file exists and a 0-sized file')
-
- # report download if we are on the second chunk
- if verbose or download_reported:
- self._log.info(
- 'Downloaded %.2f MB successfully from %s , saved to %s' % (dl_total_mb, remote_path, local_path))
- return local_path
- except DownloadError:
- raise
- except Exception as e:
- self._log.error("Could not download {} , err: {} ".format(remote_path, e))
- if delete_on_failure:
- # noinspection PyBroadException
- try:
- os.remove(temp_local_path)
- except Exception:
- pass
- return None
-
- def download_as_stream(self, remote_path, chunk_size=None):
- remote_path = self._canonize_url(remote_path)
- try:
- obj = self.get_object(remote_path)
- return self._driver.download_object_as_stream(
- obj, chunk_size=chunk_size, verbose=self._verbose, log=self.log
- )
- except DownloadError:
- raise
- except Exception as e:
- self._log.error("Could not download file : %s, err:%s " % (remote_path, str(e)))
- return None
-
- def download_as_nparray(self, remote_path, chunk_size=None):
- try:
- stream = self.download_as_stream(remote_path, chunk_size)
- if stream is None:
- return
-
- # TODO: ugly py3 hack, please remove ASAP
- if six.PY3 and not isinstance(stream, GeneratorType):
- import numpy as np
- return np.frombuffer(stream, dtype=np.uint8)
- else:
- import numpy as np
- return np.asarray(bytearray(b''.join(stream)), dtype=np.uint8)
-
- except Exception as e:
- self._log.error("Could not download file : %s, err:%s " % (remote_path, str(e)))
-
- def delete(self, path):
- path = self._canonize_url(path)
- return self._driver.delete_object(self.get_object(path))
-
- def check_write_permissions(self, dest_path=None):
- # create a temporary file, then delete it
- base_url = dest_path or self._base_url
- dest_path = base_url + "/.clearml.{}.test".format(str(uuid.uuid4()))
- # do not check http/s connection permissions
- if dest_path.startswith("http"):
- return True
-
- try:
- self.upload_from_stream(stream=six.BytesIO(b"clearml"), dest_path=dest_path)
- except Exception:
- raise ValueError("Insufficient permissions (write failed) for {}".format(base_url))
- try:
- self.delete(path=dest_path)
- except Exception:
- raise ValueError("Insufficient permissions (delete failed) for {}".format(base_url))
- return True
-
- @classmethod
- def download_from_url(cls, remote_url, local_path, overwrite_existing=False, skip_zero_size_check=False):
- """
- Download a file from remote URL to a local storage
-
- :param remote_url: Remote URL. Example: https://example.com/image.jpg or s3://bucket/folder/file.mp4 etc.
- :param local_path: target location for downloaded file. Example: /tmp/image.jpg
- :param overwrite_existing: If True, and local_path exists, it will overwrite it, otherwise print warning
- :param skip_zero_size_check: If True, no error will be raised for files with zero bytes size.
- :return: local_path if download was successful.
- """
- helper = cls.get(remote_url)
- if not helper:
- return None
- return helper.download_to_file(
- remote_url, local_path, overwrite_existing=overwrite_existing, skip_zero_size_check=skip_zero_size_check
- )
-
- def get_driver_direct_access(self, path):
- """
- Check if the helper's driver has a direct access to the file
-
- :param str path: file path to check access to
- :return: Return the string representation of the file as path if have access to it, else None
- """
-
- return self._driver.get_direct_access(path)
-
- @classmethod
- def _canonize_url(cls, url):
- return cls._apply_url_substitutions(url)
-
- @classmethod
- def _apply_url_substitutions(cls, url):
- def replace_separator(_url, where, sep):
- return _url[:where] + _url[where:].replace(sep, os.sep)
-
- for index, rule in enumerate(cls._path_substitutions):
- if url.startswith(rule.registered_prefix):
- url = url.replace(
- rule.registered_prefix,
- rule.local_prefix,
- 1, # count. str.replace() does not support keyword arguments
- )
-
- if rule.replace_windows_sep:
- url = replace_separator(url, len(rule.local_prefix), '\\')
-
- if rule.replace_linux_sep:
- url = replace_separator(url, len(rule.local_prefix), '/')
-
- break
-
- return url
-
- @classmethod
- def _resolve_base_url(cls, base_url):
- parsed = urlparse(base_url)
- if parsed.scheme == _Boto3Driver.scheme:
- conf = cls._s3_configurations.get_config_by_uri(base_url)
- bucket = conf.bucket
- if not bucket:
- parts = Path(parsed.path.strip('/')).parts
- if parts:
- bucket = parts[0]
- return '/'.join(x for x in ('s3:/', conf.host, bucket) if x)
- elif parsed.scheme == _AzureBlobServiceStorageDriver.scheme:
- conf = cls._azure_configurations.get_config_by_uri(base_url)
- if not conf:
- raise StorageError("Can't find azure configuration for {}".format(base_url))
- return str(furl(base_url).set(path=conf.container_name))
- elif parsed.scheme == _GoogleCloudStorageDriver.scheme:
- conf = cls._gs_configurations.get_config_by_uri(base_url)
- return str(furl(scheme=parsed.scheme, netloc=conf.bucket))
- elif parsed.scheme in _HttpDriver.schemes:
- for files_server in _Driver.get_file_server_hosts():
- if base_url.startswith(files_server):
- return files_server
- return parsed.scheme + "://"
- else: # if parsed.scheme == 'file':
- # if we do not know what it is, we assume file
- return 'file://'
-
- @classmethod
- def conform_url(cls, folder_uri, base_url=None):
- if not folder_uri:
- return folder_uri
- _base_url = cls._resolve_base_url(folder_uri) if not base_url else base_url
-
- if not folder_uri.startswith(_base_url):
- prev_folder_uri = folder_uri
- if _base_url == 'file://':
- folder_uri = str(Path(folder_uri).absolute())
- if folder_uri.startswith('/'):
- folder_uri = _base_url + folder_uri
- elif platform.system() == "Windows":
- folder_uri = ''.join((_base_url, folder_uri))
- else:
- folder_uri = '/'.join((_base_url, folder_uri))
-
- cls._get_logger().debug('Upload destination {} amended to {} for registration purposes'.format(
- prev_folder_uri, folder_uri))
- else:
- raise ValueError('folder_uri: {} does not start with base url: {}'.format(folder_uri, _base_url))
-
- return folder_uri
-
- def _absolute_object_name(self, path):
- """ Returns absolute remote path, including any prefix that is handled by the container """
- if not path.startswith(self.base_url):
- return self.base_url.rstrip('/') + '///' + path.lstrip('/')
- return path
-
- def _normalize_object_name(self, path):
- """ Normalize remote path. Remove any prefix that is already handled by the container """
- if path.startswith(self.base_url):
- path = path[len(self.base_url):]
- if path.startswith('/') and os.name == 'nt':
- path = path[1:]
- if self.scheme in (_Boto3Driver.scheme, _GoogleCloudStorageDriver.scheme,
- _AzureBlobServiceStorageDriver.scheme):
- path = path.lstrip('/')
- return path
-
- def _do_async_upload(self, data):
- assert isinstance(data, self._UploadData)
- return self._do_upload(data.src_path, data.dest_path, data.canonized_dest_path, extra=data.extra, cb=data.callback, verbose=True, retries=data.retries, return_canonized=data.return_canonized)
-
- def _upload_from_file(self, local_path, dest_path, extra=None):
- if not hasattr(self._driver, 'upload_object'):
- with open(local_path, 'rb') as stream:
- res = self.upload_from_stream(stream=stream, dest_path=dest_path, extra=extra)
- else:
- object_name = self._normalize_object_name(dest_path)
- extra = extra.copy() if extra else {}
- extra.update(self._extra)
- cb = UploadProgressReport.from_file(local_path, self._verbose, self._log)
- res = self._driver.upload_object(
- file_path=local_path,
- container=self._container,
- object_name=object_name,
- callback=cb,
- extra=extra)
- return res
-
- def _do_upload(self, src_path, dest_path, canonized_dest_path, extra=None, cb=None, verbose=False, retries=1, return_canonized=False):
- object_name = self._normalize_object_name(canonized_dest_path)
- if cb:
- try:
- cb(None)
- except Exception as e:
- self._log.error("Calling upload callback when starting upload: %s" % str(e))
- if verbose:
- msg = 'Starting upload: {} => {}{}'.format(
- src_path,
- (self._container.name if self._container.name.endswith('/') else self._container.name + '/')
- if self._container and self._container.name else '', object_name)
- if object_name.startswith('file://') or object_name.startswith('/'):
- self._log.debug(msg)
- else:
- self._log.info(msg)
- last_ex = None
- for i in range(max(1, int(retries))):
- try:
- if not self._upload_from_file(local_path=src_path, dest_path=canonized_dest_path, extra=extra):
- # retry if failed
- last_ex = ValueError("Upload failed")
- continue
- last_ex = None
- break
- except Exception as e:
- last_ex = e
-
- if last_ex:
- self._log.error("Exception encountered while uploading %s" % str(last_ex))
- if cb:
- try:
- cb(False)
- except Exception as e:
- self._log.warning("Exception on upload callback: %s" % str(e))
- raise last_ex
-
- if verbose:
- self._log.debug("Finished upload: %s => %s" % (src_path, object_name))
- if cb:
- try:
- cb(canonized_dest_path if return_canonized else dest_path)
- except Exception as e:
- self._log.warning("Exception on upload callback: %s" % str(e))
-
- return canonized_dest_path if return_canonized else dest_path
-
- def get_object(self, path, silence_errors=False):
- # type: (str, bool) -> object
- """
- Gets the remote object stored at path. The data held by the object
- differs depending on where it is stored.
-
- :param str path: the path where the remote object is stored
- :param bool silence_errors: Silence errors that might occur
- when fetching the remote object
-
- :return: The remote object
- """
- path = self._canonize_url(path)
- object_name = self._normalize_object_name(path)
- try:
- return self._driver.get_object(
- container_name=self._container.name if self._container else '', object_name=object_name)
- except ConnectionError:
- raise DownloadError
- except Exception as e:
- if not silence_errors:
- self.log.warning("Storage helper problem for {}: {}".format(str(object_name), str(e)))
- return None
-
- @staticmethod
- def _initialize_upload_pool():
- if not StorageHelper._upload_pool or StorageHelper._upload_pool_pid != os.getpid():
- StorageHelper._upload_pool_pid = os.getpid()
- StorageHelper._upload_pool = ThreadPool(processes=1)
-
- @staticmethod
- def close_async_threads():
- if StorageHelper._upload_pool:
- pool = StorageHelper._upload_pool
- StorageHelper._upload_pool = None
- # noinspection PyBroadException
- try:
- pool.terminate()
- pool.join()
- except Exception:
- pass
-
- def exists_file(self, remote_url):
- remote_url = self._canonize_url(remote_url)
- object_name = self._normalize_object_name(remote_url)
- return self._driver.exists_file(
- container_name=self._container.name if self._container else "", object_name=object_name
- )
-
-
class _HttpDriver(_Driver):
""" LibCloud http/https adapter (simple, enough for now) """
@@ -2954,6 +1776,1186 @@ class _FileStorageDriver(_Driver):
return os.path.isfile(object_name)
+
+class StorageHelper(object):
+ """ Storage helper.
+ Used by the entire system to download/upload files.
+ Supports both local and remote files (currently local files, network-mapped files, HTTP/S and Amazon S3)
+ """
+ _temp_download_suffix = '.partially'
+ _quotable_uri_schemes = set(_HttpDriver.schemes) | set([_GoogleCloudStorageDriver.scheme])
+
+ @classmethod
+ def _get_logger(cls):
+ return get_logger('storage')
+
+ @attrs
+ class _PathSubstitutionRule(object):
+ registered_prefix = attrib(type=str)
+ local_prefix = attrib(type=str)
+ replace_windows_sep = attrib(type=bool)
+ replace_linux_sep = attrib(type=bool)
+
+ path_substitution_config = 'storage.path_substitution'
+
+ @classmethod
+ def load_list_from_config(cls):
+ rules_list = []
+ for index, sub_config in enumerate(config.get(cls.path_substitution_config, list())):
+ rule = cls(
+ registered_prefix=sub_config.get('registered_prefix', None),
+ local_prefix=sub_config.get('local_prefix', None),
+ replace_windows_sep=sub_config.get('replace_windows_sep', False),
+ replace_linux_sep=sub_config.get('replace_linux_sep', False),
+ )
+
+ if any(prefix is None for prefix in (rule.registered_prefix, rule.local_prefix)):
+ StorageHelper._get_logger().warning(
+ "Illegal substitution rule configuration '{}[{}]': {}".format(
+ cls.path_substitution_config,
+ index,
+ asdict(rule),
+ ))
+
+ continue
+
+ if all((rule.replace_windows_sep, rule.replace_linux_sep)):
+ StorageHelper._get_logger().warning(
+ "Only one of replace_windows_sep and replace_linux_sep flags may be set."
+ "'{}[{}]': {}".format(
+ cls.path_substitution_config,
+ index,
+ asdict(rule),
+ ))
+ continue
+
+ rules_list.append(rule)
+
+ return rules_list
+
+ class _UploadData(object):
+ @property
+ def src_path(self):
+ return self._src_path
+
+ @property
+ def dest_path(self):
+ return self._dest_path
+
+ @property
+ def canonized_dest_path(self):
+ return self._canonized_dest_path
+
+ @property
+ def extra(self):
+ return self._extra
+
+ @property
+ def callback(self):
+ return self._callback
+
+ @property
+ def retries(self):
+ return self._retries
+
+ @property
+ def return_canonized(self):
+ return self._return_canonized
+
+ def __init__(self, src_path, dest_path, canonized_dest_path, extra, callback, retries, return_canonized):
+ self._src_path = src_path
+ self._dest_path = dest_path
+ self._canonized_dest_path = canonized_dest_path
+ self._extra = extra
+ self._callback = callback
+ self._retries = retries
+ self._return_canonized = return_canonized
+
+ def __str__(self):
+ return "src=%s" % self.src_path
+
+ _helpers = {} # cache of helper instances
+
+ # global terminate event for async upload threads
+ # _terminate = threading.Event()
+ _async_upload_threads = set()
+ _upload_pool = None
+ _upload_pool_pid = None
+
+ # collect all bucket credentials that aren't empty (ignore entries with an empty key or secret)
+ _s3_configurations = deferred_config('aws.s3', {}, transform=S3BucketConfigurations.from_config)
+ _gs_configurations = deferred_config('google.storage', {}, transform=GSBucketConfigurations.from_config)
+ _azure_configurations = deferred_config('azure.storage', {}, transform=AzureContainerConfigurations.from_config)
+ _path_substitutions = deferred_config(transform=_PathSubstitutionRule.load_list_from_config)
+
+ @property
+ def log(self):
+ return self._log
+
+ @property
+ def scheme(self):
+ return self._scheme
+
+ @property
+ def secure(self):
+ return self._secure
+
+ @property
+ def base_url(self):
+ return self._base_url
+
+ @classmethod
+ def get(cls, url, logger=None, **kwargs):
+ """
+ Get a storage helper instance for the given URL
+
+ :return: A StorageHelper instance.
+ """
+ # Handle URL substitution etc before locating the correct storage driver
+ url = cls._canonize_url(url)
+
+ # Get the credentials we should use for this url
+ base_url = cls._resolve_base_url(url)
+
+ instance_key = '%s_%s' % (base_url, threading.current_thread().ident or 0)
+ # noinspection PyBroadException
+ try:
+ configs = kwargs.get("configs")
+ if configs:
+ instance_key += "_{}".format(configs.cache_name)
+ except Exception:
+ pass
+
+ force_create = kwargs.pop('__force_create', False)
+ if (instance_key in cls._helpers) and (not force_create) and base_url != "file://":
+ return cls._helpers[instance_key]
+
+ # Don't canonize URL since we already did it
+ try:
+ instance = cls(base_url=base_url, url=url, logger=logger, canonize_url=False, **kwargs)
+ except (StorageError, UsageError) as ex:
+ cls._get_logger().error(str(ex))
+ return None
+ except Exception as ex:
+ cls._get_logger().error("Failed creating storage object {} Reason: {}".format(
+ base_url or url, ex))
+ return None
+
+ cls._helpers[instance_key] = instance
+ return instance
+
+ @classmethod
+ def get_local_copy(cls, remote_url, skip_zero_size_check=False):
+ """
+ Download a file from remote URL to a local storage, and return path to local copy,
+
+ :param remote_url: Remote URL. Example: https://example.com/file.jpg s3://bucket/folder/file.mp4 etc.
+ :param skip_zero_size_check: If True, no error will be raised for files with zero bytes size.
+ :return: Path to local copy of the downloaded file. None if error occurred.
+ """
+ helper = cls.get(remote_url)
+ if not helper:
+ return None
+ # create temp file with the requested file name
+ file_name = '.' + remote_url.split('/')[-1].split(os.path.sep)[-1]
+ local_path = mktemp(suffix=file_name)
+ return helper.download_to_file(remote_url, local_path, skip_zero_size_check=skip_zero_size_check)
+
+ def __init__(
+ self,
+ base_url,
+ url,
+ key=None,
+ secret=None,
+ region=None,
+ verbose=False,
+ logger=None,
+ retries=5,
+ token=None,
+ **kwargs
+ ):
+ level = config.get("storage.log.level", None)
+
+ if level:
+ try:
+ self._get_logger().setLevel(level)
+ except (TypeError, ValueError):
+ self._get_logger().error('invalid storage log level in configuration: %s' % level)
+
+ self._log = logger or self._get_logger()
+ self._verbose = verbose
+ self._retries = retries
+ self._extra = {}
+ self._base_url = base_url
+ self._secure = True
+ self._driver = None
+ self._container = None
+ self._conf = None
+
+ if kwargs.get('canonize_url', True):
+ url = self._canonize_url(url)
+
+ parsed = urlparse(url)
+ self._scheme = parsed.scheme
+
+ if self._scheme == _AzureBlobServiceStorageDriver.scheme:
+ self._conf = copy(self._azure_configurations.get_config_by_uri(url))
+ if self._conf is None:
+ raise StorageError("Missing Azure Blob Storage configuration for {}".format(url))
+
+ if not self._conf.account_name or not self._conf.account_key:
+ raise StorageError(
+ "Missing account name or key for Azure Blob Storage access for {}".format(base_url)
+ )
+
+ self._driver = _AzureBlobServiceStorageDriver()
+ self._container = self._driver.get_container(config=self._conf, account_url=parsed.netloc)
+
+ elif self._scheme == _Boto3Driver.scheme:
+ self._conf = copy(self._s3_configurations.get_config_by_uri(url))
+ self._secure = self._conf.secure
+
+ final_region = region if region else self._conf.region
+ if not final_region:
+ final_region = None
+
+ self._conf.update(
+ key=key or self._conf.key,
+ secret=secret or self._conf.secret,
+ multipart=self._conf.multipart,
+ region=final_region,
+ use_credentials_chain=self._conf.use_credentials_chain,
+ token=token or self._conf.token,
+ extra_args=self._conf.extra_args,
+ )
+
+ if not self._conf.use_credentials_chain:
+ if not self._conf.key or not self._conf.secret:
+ raise ValueError(
+ "Missing key and secret for S3 storage access (%s)" % base_url
+ )
+
+ self._driver = _Boto3Driver()
+ self._container = self._driver.get_container(
+ container_name=self._base_url, retries=retries, config=self._conf)
+
+ elif self._scheme == _GoogleCloudStorageDriver.scheme:
+ self._conf = copy(self._gs_configurations.get_config_by_uri(url))
+ self._driver = _GoogleCloudStorageDriver()
+ self._container = self._driver.get_container(
+ container_name=self._base_url,
+ config=self._conf
+ )
+
+ elif self._scheme in _HttpDriver.schemes:
+ self._driver = _HttpDriver(retries=retries)
+ self._container = self._driver.get_container(container_name=self._base_url)
+ else: # elif self._scheme == 'file':
+ # if this is not a known scheme assume local file
+ # url2pathname is specifically intended to operate on (urlparse result).path
+ # and returns a cross-platform compatible result
+ new_url = normalize_local_path(url[len("file://"):] if url.startswith("file://") else url)
+ self._driver = _FileStorageDriver(new_url)
+ # noinspection PyBroadException
+ try:
+ self._container = self._driver.get_container("")
+ except Exception:
+ self._container = None
+
+ @classmethod
+ def terminate_uploads(cls, force=True, timeout=2.0):
+ if force:
+ # since async uploaders are daemon threads, we can just return and let them close by themselves
+ return
+ # signal all threads to terminate and give them a chance for 'timeout' seconds (total, not per-thread)
+ # cls._terminate.set()
+ remaining_timeout = timeout
+ for thread in cls._async_upload_threads:
+ t = time()
+ # noinspection PyBroadException
+ try:
+ thread.join(timeout=remaining_timeout)
+ except Exception:
+ pass
+ remaining_timeout -= (time() - t)
+
+ @classmethod
+ def get_aws_storage_uri_from_config(cls, bucket_config):
+ uri = (
+ "s3://{}/{}".format(bucket_config.host, bucket_config.bucket)
+ if bucket_config.host
+ else "s3://{}".format(bucket_config.bucket)
+ )
+ if bucket_config.subdir:
+ uri += "/" + bucket_config.subdir
+ return uri
+
+ @classmethod
+ def get_gcp_storage_uri_from_config(cls, bucket_config):
+ return (
+ "gs://{}/{}".format(bucket_config.bucket, bucket_config.subdir)
+ if bucket_config.subdir
+ else "gs://{}".format(bucket_config.bucket)
+ )
+
+ @classmethod
+ def get_azure_storage_uri_from_config(cls, bucket_config):
+ return "azure://{}.blob.core.windows.net/{}".format(bucket_config.account_name, bucket_config.container_name)
+
+ @classmethod
+ def get_configuration(cls, bucket_config):
+ return cls.get_aws_configuration(bucket_config)
+
+ @classmethod
+ def get_aws_configuration(cls, bucket_config):
+ return cls._s3_configurations.get_config_by_bucket(bucket_config.bucket, bucket_config.host)
+
+ @classmethod
+ def get_gcp_configuration(cls, bucket_config):
+ return cls._gs_configurations.get_config_by_uri(
+ cls.get_gcp_storage_uri_from_config(bucket_config),
+ create_if_not_found=False
+ )
+
+ @classmethod
+ def get_azure_configuration(cls, bucket_config):
+ return cls._azure_configurations.get_config(bucket_config.account_name, bucket_config.container_name)
+
+ @classmethod
+ def add_configuration(cls, bucket_config, log=None, _test_config=True):
+ return cls.add_aws_configuration(bucket_config, log=log, _test_config=_test_config)
+
+ @classmethod
+ def add_aws_configuration(cls, bucket_config, log=None, _test_config=True):
+ # Try to use existing configuration if we have no key and secret
+ use_existing = not bucket_config.is_valid()
+ # Get existing config anyway (we'll either try to use it or alert we're replacing it
+ existing = cls.get_aws_configuration(bucket_config)
+ configs = cls._s3_configurations
+ uri = cls.get_aws_storage_uri_from_config(bucket_config)
+
+ if not use_existing:
+ # Test bucket config, fails if unsuccessful
+ if _test_config:
+ _Boto3Driver._test_bucket_config(bucket_config, log) # noqa
+ if existing:
+ if log:
+ log.warning("Overriding existing configuration for '{}'".format(uri))
+ configs.remove_config(existing)
+ configs.add_config(bucket_config)
+ else:
+ # Try to use existing configuration
+ good_config = False
+ if existing:
+ if log:
+ log.info("Using existing credentials for '{}'".format(uri))
+ good_config = _Boto3Driver._test_bucket_config(existing, log, raise_on_error=False) # noqa
+
+ if not good_config:
+ # Try to use global key/secret
+ configs.update_config_with_defaults(bucket_config)
+
+ if log:
+ log.info("Using global credentials for '{}'".format(uri))
+ if _test_config:
+ _Boto3Driver._test_bucket_config(bucket_config, log) # noqa
+ configs.add_config(bucket_config)
+
+ @classmethod
+ def add_gcp_configuration(cls, bucket_config, log=None):
+ use_existing = not bucket_config.is_valid()
+ existing = cls.get_gcp_configuration(bucket_config)
+ configs = cls._gs_configurations
+ uri = cls.get_gcp_storage_uri_from_config(bucket_config)
+
+ if not use_existing:
+ if existing:
+ if log:
+ log.warning("Overriding existing configuration for '{}'".format(uri))
+ configs.remove_config(existing)
+ configs.add_config(bucket_config)
+ else:
+ good_config = False
+ if existing:
+ if log:
+ log.info("Using existing config for '{}'".format(uri))
+ good_config = _GoogleCloudStorageDriver.test_upload(None, bucket_config)
+ if not good_config:
+ configs.update_config_with_defaults(bucket_config)
+ if log:
+ log.info("Using global credentials for '{}'".format(uri))
+ configs.add_config(bucket_config)
+
+ @classmethod
+ def add_azure_configuration(cls, bucket_config, log=None):
+ use_existing = not bucket_config.is_valid()
+ existing = cls.get_azure_configuration(bucket_config)
+ configs = cls._azure_configurations
+ uri = cls.get_azure_storage_uri_from_config(bucket_config)
+
+ if not use_existing:
+ if existing:
+ if log:
+ log.warning("Overriding existing configuration for '{}'".format(uri))
+ configs.remove_config(existing)
+ configs.add_config(bucket_config)
+ else:
+ good_config = False
+ if existing:
+ if log:
+ log.info("Using existing config for '{}'".format(uri))
+ good_config = _AzureBlobServiceStorageDriver.test_upload(None, bucket_config)
+ if not good_config:
+ configs.update_config_with_defaults(bucket_config)
+ if log:
+ log.info("Using global credentials for '{}'".format(uri))
+ configs.add_config(bucket_config)
+
+ @classmethod
+ def add_path_substitution(
+ cls,
+ registered_prefix,
+ local_prefix,
+ replace_windows_sep=False,
+ replace_linux_sep=False,
+ ):
+ """
+ Add a path substitution rule for storage paths.
+
+ Useful for case where the data was registered under some path, and that
+ path was later renamed. This may happen with local storage paths where
+ each machine is has different mounts or network drives configurations
+
+ :param registered_prefix: The prefix to search for and replace. This is
+ the prefix of the path the data is registered under. This should be the
+ exact url prefix, case sensitive, as the data is registered.
+ :param local_prefix: The prefix to replace 'registered_prefix' with. This
+ is the prefix of the path the data is actually saved under. This should be the
+ exact url prefix, case sensitive, as the data is saved under.
+ :param replace_windows_sep: If set to True, and the prefix matches, the rest
+ of the url has all of the windows path separators (backslash '\') replaced with
+ the native os path separator.
+ :param replace_linux_sep: If set to True, and the prefix matches, the rest
+ of the url has all of the linux/unix path separators (slash '/') replaced with
+ the native os path separator.
+ """
+
+ if not registered_prefix or not local_prefix:
+ raise UsageError("Path substitution prefixes must be non empty strings")
+
+ if replace_windows_sep and replace_linux_sep:
+ raise UsageError("Only one of replace_windows_sep and replace_linux_sep may be set.")
+
+ rule = cls._PathSubstitutionRule(
+ registered_prefix=registered_prefix,
+ local_prefix=local_prefix,
+ replace_windows_sep=replace_windows_sep,
+ replace_linux_sep=replace_linux_sep,
+ )
+
+ cls._path_substitutions.append(rule)
+
+ @classmethod
+ def clear_path_substitutions(cls):
+ """
+ Removes all path substitution rules, including ones from the configuration file.
+ """
+ cls._path_substitutions = list()
+
+ def get_object_size_bytes(self, remote_url, silence_errors=False):
+ # type: (str, bool) -> [int, None]
+ """
+ Get size of the remote file in bytes.
+
+ :param str remote_url: The url where the file is stored.
+ E.g. 's3://bucket/some_file.txt', 'file://local/file'
+ :param bool silence_errors: Silence errors that might occur
+ when fetching the size of the file. Default: False
+
+ :return: The size of the file in bytes.
+ None if the file could not be found or an error occurred.
+ """
+ obj = self.get_object(remote_url, silence_errors=silence_errors)
+ return self._get_object_size_bytes(obj, silence_errors)
+
+ def _get_object_size_bytes(self, obj, silence_errors=False):
+ # type: (object) -> [int, None]
+ """
+ Auxiliary function for `get_object_size_bytes`.
+ Get size of the remote object in bytes.
+
+ :param object obj: The remote object
+ :param bool silence_errors: Silence errors that might occur
+ when fetching the size of the file. Default: False
+
+ :return: The size of the object in bytes.
+ None if an error occurred.
+ """
+ if not obj:
+ return None
+ size = None
+ try:
+ if isinstance(self._driver, _HttpDriver) and obj:
+ obj = self._driver._get_download_object(obj) # noqa
+ size = int(obj.headers.get("Content-Length", 0))
+ elif hasattr(obj, "size"):
+ size = obj.size
+ # Google storage has the option to reload the object to get the size
+ if size is None and hasattr(obj, "reload"):
+ # noinspection PyBroadException
+ try:
+ # To catch google.api_core exceptions
+ obj.reload()
+ size = obj.size
+ except Exception as e:
+ if not silence_errors:
+ self.log.warning("Failed obtaining object size on reload: {}('{}')".format(
+ e.__class__.__name__, str(e)))
+ elif hasattr(obj, "content_length"):
+ # noinspection PyBroadException
+ try:
+ # To catch botocore exceptions
+ size = obj.content_length # noqa
+ except Exception as e:
+ if not silence_errors:
+ self.log.warning("Failed obtaining content_length while getting object size: {}('{}')".format(
+ e.__class__.__name__, str(e)))
+ except Exception as e:
+ if not silence_errors:
+ self.log.warning("Failed getting object size: {}('{}')".format(e.__class__.__name__, str(e)))
+ return size
+
+ def get_object_metadata(self, obj):
+ # type: (object) -> dict
+ """
+ Get the metadata of the remote object.
+ The metadata is a dict containing the following keys: `name`, `size`.
+
+ :param object obj: The remote object
+
+ :return: A dict containing the metadata of the remote object
+ """
+ name_fields = ("name", "url", "key", "blob_name")
+ metadata = {
+ "size": self._get_object_size_bytes(obj),
+ "name": next(filter(None, (getattr(obj, f, None) for f in name_fields)), None),
+ }
+ return metadata
+
+ def verify_upload(self, folder_uri='', raise_on_error=True, log_on_error=True):
+ """
+ Verify that this helper can upload files to a folder.
+
+ An upload is possible iff:
+ 1. the destination folder is under the base uri of the url used to create the helper
+ 2. the helper has credentials to write to the destination folder
+
+ :param folder_uri: The destination folder to test. Must be an absolute
+ url that begins with the base uri of the url used to create the helper.
+ :param raise_on_error: Raise an exception if an upload is not possible
+ :param log_on_error: Log an error if an upload is not possible
+ :return: True, if, and only if, an upload to folder_uri is possible.
+ """
+
+ folder_uri = self._canonize_url(folder_uri)
+
+ folder_uri = self.conform_url(folder_uri, self._base_url)
+
+ test_path = self._normalize_object_name(folder_uri)
+
+ if self._scheme == _Boto3Driver.scheme:
+ _Boto3Driver._test_bucket_config(
+ self._conf,
+ self._log,
+ test_path=test_path,
+ raise_on_error=raise_on_error,
+ log_on_error=log_on_error,
+ )
+ elif self._scheme == _GoogleCloudStorageDriver.scheme:
+ self._driver.test_upload(test_path, self._conf)
+
+ elif self._scheme == 'file':
+ # Check path exists
+ Path(test_path).mkdir(parents=True, exist_ok=True)
+ # check path permissions
+ Path(test_path).touch(exist_ok=True)
+
+ return folder_uri
+
+ def upload_from_stream(self, stream, dest_path, extra=None, retries=1, return_canonized=True):
+ canonized_dest_path = self._canonize_url(dest_path)
+ object_name = self._normalize_object_name(canonized_dest_path)
+ extra = extra.copy() if extra else {}
+ extra.update(self._extra)
+ last_ex = None
+ cb = UploadProgressReport.from_stream(stream, object_name, self._verbose, self._log)
+ for i in range(max(1, int(retries))):
+ try:
+ self._driver.upload_object_via_stream(
+ iterator=stream,
+ container=self._container,
+ object_name=object_name,
+ callback=cb,
+ extra=extra)
+ last_ex = None
+ break
+ except Exception as ex:
+ last_ex = ex
+ # seek to beginning if possible
+ # noinspection PyBroadException
+ try:
+ stream.seek(0)
+ except Exception:
+ pass
+ if last_ex:
+ raise last_ex
+
+ result_dest_path = canonized_dest_path if return_canonized else dest_path
+
+ if self.scheme in StorageHelper._quotable_uri_schemes: # TODO: fix-driver-schema
+ # quote link
+ result_dest_path = quote_url(result_dest_path)
+
+ return result_dest_path
+
+ def upload(
+ self, src_path, dest_path=None, extra=None, async_enable=False, cb=None, retries=3, return_canonized=True
+ ):
+ if not dest_path:
+ dest_path = os.path.basename(src_path)
+
+ canonized_dest_path = self._canonize_url(dest_path)
+ dest_path = dest_path.replace('\\', '/')
+ canonized_dest_path = canonized_dest_path.replace('\\', '/')
+
+ result_path = canonized_dest_path if return_canonized else dest_path
+
+ if cb and self.scheme in StorageHelper._quotable_uri_schemes: # TODO: fix-driver-schema
+ # store original callback
+ a_cb = cb
+
+ # quote link
+ def callback(result):
+ return a_cb(quote_url(result_path) if result else result)
+ # replace callback with wrapper
+ cb = callback
+
+ if async_enable:
+ data = self._UploadData(
+ src_path=src_path,
+ dest_path=dest_path,
+ canonized_dest_path=canonized_dest_path,
+ extra=extra,
+ callback=cb,
+ retries=retries,
+ return_canonized=return_canonized
+ )
+ StorageHelper._initialize_upload_pool()
+ return StorageHelper._upload_pool.apply_async(self._do_async_upload, args=(data,))
+ else:
+ res = self._do_upload(
+ src_path=src_path,
+ dest_path=dest_path,
+ canonized_dest_path=canonized_dest_path,
+ extra=extra,
+ cb=cb,
+ verbose=False,
+ retries=retries,
+ return_canonized=return_canonized)
+ if res:
+ result_path = quote_url(result_path)
+ return result_path
+
+ def list(self, prefix=None, with_metadata=False):
+ """
+ List entries in the helper base path.
+
+ Return a list of names inside this helper base path or a list of dictionaries containing
+ the objects' metadata. The base path is determined at creation time and is specific
+ for each storage medium.
+ For Google Storage and S3 it is the bucket of the path.
+ For local files it is the root directory.
+
+ This operation is not supported for http and https protocols.
+
+ :param prefix: If None, return the list as described above. If not, it
+ must be a string - the path of a sub directory under the base path.
+ the returned list will include only objects under that subdir.
+
+ :param with_metadata: Instead of returning just the names of the objects, return a list of dictionaries
+ containing the name and metadata of the remote file. Thus, each dictionary will contain the following
+ keys: `name`, `size`.
+
+ :return: The paths of all the objects in the storage base path under prefix or
+ a list of dictionaries containing the objects' metadata.
+ Listed relative to the base path.
+ """
+ if prefix:
+ prefix = self._canonize_url(prefix)
+ if prefix.startswith(self._base_url):
+ prefix = prefix[len(self._base_url):]
+ if self._base_url != "file://":
+ prefix = prefix.lstrip("/")
+ if self._base_url == "file://":
+ prefix = prefix.rstrip("/")
+ if prefix.startswith(str(self._driver.base_path)):
+ prefix = prefix[len(str(self._driver.base_path)):]
+ res = self._driver.list_container_objects(self._container, ex_prefix=prefix)
+ result = [
+ obj.name if not with_metadata else self.get_object_metadata(obj)
+ for obj in res
+ ]
+
+ if self._base_url == "file://":
+ if not with_metadata:
+ result = [Path(f).as_posix() for f in result]
+ else:
+ for metadata_entry in result:
+ metadata_entry["name"] = Path(metadata_entry["name"]).as_posix()
+ return result
+ else:
+ return [
+ obj.name if not with_metadata else self.get_object_metadata(obj)
+ for obj in self._driver.list_container_objects(self._container)
+ ]
+
+ def download_to_file(
+ self,
+ remote_path,
+ local_path,
+ overwrite_existing=False,
+ delete_on_failure=True,
+ verbose=None,
+ skip_zero_size_check=False,
+ silence_errors=False,
+ direct_access=True
+ ):
+ def next_chunk(astream):
+ if isinstance(astream, binary_type):
+ chunk = astream
+ astream = None
+ elif astream:
+ try:
+ chunk = next(astream)
+ except StopIteration:
+ chunk = None
+ else:
+ chunk = None
+ return chunk, astream
+
+ remote_path = self._canonize_url(remote_path)
+ verbose = self._verbose if verbose is None else verbose
+
+ tmp_remote_path = remote_path
+ # noinspection PyBroadException
+ try:
+ tmp_remote_path = normalize_local_path(tmp_remote_path)
+ if tmp_remote_path.exists():
+ remote_path = "file://{}".format(str(tmp_remote_path))
+ except Exception:
+ pass
+ # Check if driver type supports direct access:
+ direct_access_path = self.get_driver_direct_access(remote_path)
+ if direct_access_path and direct_access:
+ return direct_access_path
+
+ temp_local_path = None
+ try:
+ if verbose:
+ self._log.info('Start downloading from %s' % remote_path)
+ if not overwrite_existing and Path(local_path).is_file():
+ self._log.debug(
+ 'File {} already exists, no need to download, thread id = {}'.format(
+ local_path,
+ threading.current_thread().ident,
+ ),
+ )
+
+ return local_path
+ if remote_path.startswith("file://"):
+ Path(local_path).parent.mkdir(parents=True, exist_ok=True)
+ # use remote_path, because direct_access_path might be None, because of access_rules
+ # len("file://") == 7
+ shutil.copyfile(remote_path[7:], local_path)
+ return local_path
+ # we download into temp_local_path so that if we accidentally stop in the middle,
+ # we won't think we have the entire file
+ temp_local_path = '{}_{}{}'.format(local_path, time(), self._temp_download_suffix)
+ obj = self.get_object(remote_path, silence_errors=silence_errors)
+ if not obj:
+ return None
+
+ # object size in bytes
+ total_size_mb = -1
+ dl_total_mb = 0.
+ download_reported = False
+ # chunks size is ignored and always 5Mb
+ chunk_size_mb = 5
+
+ # make sure we have the destination folder
+ # noinspection PyBroadException
+ Path(temp_local_path).parent.mkdir(parents=True, exist_ok=True)
+
+ total_size_bytes = self.get_object_size_bytes(remote_path, silence_errors=silence_errors)
+ if total_size_bytes is not None:
+ total_size_mb = float(total_size_bytes) / (1024 * 1024)
+
+ # if driver supports download with callback, use it (it might be faster)
+ if hasattr(self._driver, 'download_object'):
+ # callback
+ cb = DownloadProgressReport(total_size_mb, verbose, remote_path, self._log)
+ self._driver.download_object(obj, temp_local_path, callback=cb)
+ download_reported = bool(cb.last_reported)
+ dl_total_mb = cb.current_status_mb
+ else:
+ stream = self._driver.download_object_as_stream(obj, chunk_size_mb * 1024 * 1024)
+ if stream is None:
+ raise ValueError('Could not download %s' % remote_path)
+ with open(temp_local_path, 'wb') as fd:
+ data, stream = next_chunk(stream)
+ while data:
+ fd.write(data)
+ data, stream = next_chunk(stream)
+
+ if not skip_zero_size_check and Path(temp_local_path).stat().st_size <= 0:
+ raise Exception('downloaded a 0-sized file')
+
+ # if we are on Windows, we need to remove the target file before renaming
+ # otherwise posix rename will overwrite the target
+ if os.name != 'posix':
+ # noinspection PyBroadException
+ try:
+ os.remove(local_path)
+ except Exception:
+ pass
+
+ # rename temp file to local_file
+ # noinspection PyBroadException
+ try:
+ os.rename(temp_local_path, local_path)
+ except Exception:
+ # noinspection PyBroadException
+ try:
+ os.unlink(temp_local_path)
+ except Exception:
+ pass
+ # file was downloaded by a parallel process, check we have the final output and delete the partial copy
+ path_local_path = Path(local_path)
+ if not path_local_path.is_file() or (not skip_zero_size_check and path_local_path.stat().st_size <= 0):
+ raise Exception('Failed renaming partial file, downloaded file exists and a 0-sized file')
+
+ # report download if we are on the second chunk
+ if verbose or download_reported:
+ self._log.info(
+ 'Downloaded %.2f MB successfully from %s , saved to %s' % (dl_total_mb, remote_path, local_path))
+ return local_path
+ except DownloadError:
+ raise
+ except Exception as e:
+ self._log.error("Could not download {} , err: {} ".format(remote_path, e))
+ if delete_on_failure:
+ # noinspection PyBroadException
+ try:
+ os.remove(temp_local_path)
+ except Exception:
+ pass
+ return None
+
+ def download_as_stream(self, remote_path, chunk_size=None):
+ remote_path = self._canonize_url(remote_path)
+ try:
+ obj = self.get_object(remote_path)
+ return self._driver.download_object_as_stream(
+ obj, chunk_size=chunk_size, verbose=self._verbose, log=self.log
+ )
+ except DownloadError:
+ raise
+ except Exception as e:
+ self._log.error("Could not download file : %s, err:%s " % (remote_path, str(e)))
+ return None
+
+ def download_as_nparray(self, remote_path, chunk_size=None):
+ try:
+ stream = self.download_as_stream(remote_path, chunk_size)
+ if stream is None:
+ return
+
+ # TODO: ugly py3 hack, please remove ASAP
+ if six.PY3 and not isinstance(stream, GeneratorType):
+ import numpy as np
+ return np.frombuffer(stream, dtype=np.uint8)
+ else:
+ import numpy as np
+ return np.asarray(bytearray(b''.join(stream)), dtype=np.uint8)
+
+ except Exception as e:
+ self._log.error("Could not download file : %s, err:%s " % (remote_path, str(e)))
+
+ def delete(self, path):
+ path = self._canonize_url(path)
+ return self._driver.delete_object(self.get_object(path))
+
+ def check_write_permissions(self, dest_path=None):
+ # create a temporary file, then delete it
+ base_url = dest_path or self._base_url
+ dest_path = base_url + "/.clearml.{}.test".format(str(uuid.uuid4()))
+ # do not check http/s connection permissions
+ if dest_path.startswith("http"):
+ return True
+
+ try:
+ self.upload_from_stream(stream=six.BytesIO(b"clearml"), dest_path=dest_path)
+ except Exception:
+ raise ValueError("Insufficient permissions (write failed) for {}".format(base_url))
+ try:
+ self.delete(path=dest_path)
+ except Exception:
+ raise ValueError("Insufficient permissions (delete failed) for {}".format(base_url))
+ return True
+
+ @classmethod
+ def download_from_url(cls, remote_url, local_path, overwrite_existing=False, skip_zero_size_check=False):
+ """
+ Download a file from remote URL to a local storage
+
+ :param remote_url: Remote URL. Example: https://example.com/image.jpg or s3://bucket/folder/file.mp4 etc.
+ :param local_path: target location for downloaded file. Example: /tmp/image.jpg
+ :param overwrite_existing: If True, and local_path exists, it will overwrite it, otherwise print warning
+ :param skip_zero_size_check: If True, no error will be raised for files with zero bytes size.
+ :return: local_path if download was successful.
+ """
+ helper = cls.get(remote_url)
+ if not helper:
+ return None
+ return helper.download_to_file(
+ remote_url, local_path, overwrite_existing=overwrite_existing, skip_zero_size_check=skip_zero_size_check
+ )
+
+ def get_driver_direct_access(self, path):
+ """
+ Check if the helper's driver has a direct access to the file
+
+ :param str path: file path to check access to
+ :return: Return the string representation of the file as path if have access to it, else None
+ """
+
+ return self._driver.get_direct_access(path)
+
+ @classmethod
+ def _canonize_url(cls, url):
+ return cls._apply_url_substitutions(url)
+
+ @classmethod
+ def _apply_url_substitutions(cls, url):
+ def replace_separator(_url, where, sep):
+ return _url[:where] + _url[where:].replace(sep, os.sep)
+
+ for index, rule in enumerate(cls._path_substitutions):
+ if url.startswith(rule.registered_prefix):
+ url = url.replace(
+ rule.registered_prefix,
+ rule.local_prefix,
+ 1, # count. str.replace() does not support keyword arguments
+ )
+
+ if rule.replace_windows_sep:
+ url = replace_separator(url, len(rule.local_prefix), '\\')
+
+ if rule.replace_linux_sep:
+ url = replace_separator(url, len(rule.local_prefix), '/')
+
+ break
+
+ return url
+
+ @classmethod
+ def _resolve_base_url(cls, base_url):
+ parsed = urlparse(base_url)
+ if parsed.scheme == _Boto3Driver.scheme:
+ conf = cls._s3_configurations.get_config_by_uri(base_url)
+ bucket = conf.bucket
+ if not bucket:
+ parts = Path(parsed.path.strip('/')).parts
+ if parts:
+ bucket = parts[0]
+ return '/'.join(x for x in ('s3:/', conf.host, bucket) if x)
+ elif parsed.scheme == _AzureBlobServiceStorageDriver.scheme:
+ conf = cls._azure_configurations.get_config_by_uri(base_url)
+ if not conf:
+ raise StorageError("Can't find azure configuration for {}".format(base_url))
+ return str(furl(base_url).set(path=conf.container_name))
+ elif parsed.scheme == _GoogleCloudStorageDriver.scheme:
+ conf = cls._gs_configurations.get_config_by_uri(base_url)
+ return str(furl(scheme=parsed.scheme, netloc=conf.bucket))
+ elif parsed.scheme in _HttpDriver.schemes:
+ for files_server in _Driver.get_file_server_hosts():
+ if base_url.startswith(files_server):
+ return files_server
+ return parsed.scheme + "://"
+ else: # if parsed.scheme == 'file':
+ # if we do not know what it is, we assume file
+ return 'file://'
+
+ @classmethod
+ def conform_url(cls, folder_uri, base_url=None):
+ if not folder_uri:
+ return folder_uri
+ _base_url = cls._resolve_base_url(folder_uri) if not base_url else base_url
+
+ if not folder_uri.startswith(_base_url):
+ prev_folder_uri = folder_uri
+ if _base_url == 'file://':
+ folder_uri = str(Path(folder_uri).absolute())
+ if folder_uri.startswith('/'):
+ folder_uri = _base_url + folder_uri
+ elif platform.system() == "Windows":
+ folder_uri = ''.join((_base_url, folder_uri))
+ else:
+ folder_uri = '/'.join((_base_url, folder_uri))
+
+ cls._get_logger().debug('Upload destination {} amended to {} for registration purposes'.format(
+ prev_folder_uri, folder_uri))
+ else:
+ raise ValueError('folder_uri: {} does not start with base url: {}'.format(folder_uri, _base_url))
+
+ return folder_uri
+
+ def _absolute_object_name(self, path):
+ """ Returns absolute remote path, including any prefix that is handled by the container """
+ if not path.startswith(self.base_url):
+ return self.base_url.rstrip('/') + '///' + path.lstrip('/')
+ return path
+
+ def _normalize_object_name(self, path):
+ """ Normalize remote path. Remove any prefix that is already handled by the container """
+ if path.startswith(self.base_url):
+ path = path[len(self.base_url):]
+ if path.startswith('/') and os.name == 'nt':
+ path = path[1:]
+ if self.scheme in (_Boto3Driver.scheme, _GoogleCloudStorageDriver.scheme,
+ _AzureBlobServiceStorageDriver.scheme):
+ path = path.lstrip('/')
+ return path
+
+ def _do_async_upload(self, data):
+ assert isinstance(data, self._UploadData)
+ return self._do_upload(data.src_path, data.dest_path, data.canonized_dest_path, extra=data.extra, cb=data.callback, verbose=True, retries=data.retries, return_canonized=data.return_canonized)
+
+ def _upload_from_file(self, local_path, dest_path, extra=None):
+ if not hasattr(self._driver, 'upload_object'):
+ with open(local_path, 'rb') as stream:
+ res = self.upload_from_stream(stream=stream, dest_path=dest_path, extra=extra)
+ else:
+ object_name = self._normalize_object_name(dest_path)
+ extra = extra.copy() if extra else {}
+ extra.update(self._extra)
+ cb = UploadProgressReport.from_file(local_path, self._verbose, self._log)
+ res = self._driver.upload_object(
+ file_path=local_path,
+ container=self._container,
+ object_name=object_name,
+ callback=cb,
+ extra=extra)
+ return res
+
+ def _do_upload(self, src_path, dest_path, canonized_dest_path, extra=None, cb=None, verbose=False, retries=1, return_canonized=False):
+ object_name = self._normalize_object_name(canonized_dest_path)
+ if cb:
+ try:
+ cb(None)
+ except Exception as e:
+ self._log.error("Calling upload callback when starting upload: %s" % str(e))
+ if verbose:
+ msg = 'Starting upload: {} => {}{}'.format(
+ src_path,
+ (self._container.name if self._container.name.endswith('/') else self._container.name + '/')
+ if self._container and self._container.name else '', object_name)
+ if object_name.startswith('file://') or object_name.startswith('/'):
+ self._log.debug(msg)
+ else:
+ self._log.info(msg)
+ last_ex = None
+ for i in range(max(1, int(retries))):
+ try:
+ if not self._upload_from_file(local_path=src_path, dest_path=canonized_dest_path, extra=extra):
+ # retry if failed
+ last_ex = ValueError("Upload failed")
+ continue
+ last_ex = None
+ break
+ except Exception as e:
+ last_ex = e
+
+ if last_ex:
+ self._log.error("Exception encountered while uploading %s" % str(last_ex))
+ if cb:
+ try:
+ cb(False)
+ except Exception as e:
+ self._log.warning("Exception on upload callback: %s" % str(e))
+ raise last_ex
+
+ if verbose:
+ self._log.debug("Finished upload: %s => %s" % (src_path, object_name))
+ if cb:
+ try:
+ cb(canonized_dest_path if return_canonized else dest_path)
+ except Exception as e:
+ self._log.warning("Exception on upload callback: %s" % str(e))
+
+ return canonized_dest_path if return_canonized else dest_path
+
+ def get_object(self, path, silence_errors=False):
+ # type: (str, bool) -> object
+ """
+ Gets the remote object stored at path. The data held by the object
+ differs depending on where it is stored.
+
+ :param str path: the path where the remote object is stored
+ :param bool silence_errors: Silence errors that might occur
+ when fetching the remote object
+
+ :return: The remote object
+ """
+ path = self._canonize_url(path)
+ object_name = self._normalize_object_name(path)
+ try:
+ return self._driver.get_object(
+ container_name=self._container.name if self._container else '', object_name=object_name)
+ except ConnectionError:
+ raise DownloadError
+ except Exception as e:
+ if not silence_errors:
+ self.log.warning("Storage helper problem for {}: {}".format(str(object_name), str(e)))
+ return None
+
+ @staticmethod
+ def _initialize_upload_pool():
+ if not StorageHelper._upload_pool or StorageHelper._upload_pool_pid != os.getpid():
+ StorageHelper._upload_pool_pid = os.getpid()
+ StorageHelper._upload_pool = ThreadPool(processes=1)
+
+ @staticmethod
+ def close_async_threads():
+ if StorageHelper._upload_pool:
+ pool = StorageHelper._upload_pool
+ StorageHelper._upload_pool = None
+ # noinspection PyBroadException
+ try:
+ pool.terminate()
+ pool.join()
+ except Exception:
+ pass
+
+ def exists_file(self, remote_url):
+ remote_url = self._canonize_url(remote_url)
+ object_name = self._normalize_object_name(remote_url)
+ return self._driver.exists_file(
+ container_name=self._container.name if self._container else "", object_name=object_name
+ )
+
+
+
def normalize_local_path(local_path):
"""
Get a normalized local path
diff --git a/clearml/storage/util.py b/clearml/storage/util.py
index c1523fbd..0d3b17fa 100644
--- a/clearml/storage/util.py
+++ b/clearml/storage/util.py
@@ -44,7 +44,7 @@ def get_config_object_matcher(**patterns):
def quote_url(url):
parsed = urlparse(url)
- if parsed.scheme not in ("http", "https"):
+ if parsed.scheme not in ("http", "https", "gs"):
return url
parsed = parsed._replace(path=quote(parsed.path))
return urlunparse(parsed)
From 5772a1551ee3d06a27e1286b1feb6e1fa02b0210 Mon Sep 17 00:00:00 2001
From: Alex Burlacu
Date: Thu, 25 May 2023 18:15:33 +0300
Subject: [PATCH 12/15] Add support for offline datasets and JSON previews
---
clearml/backend_api/session/session.py | 2 +-
clearml/backend_interface/task/hyperparams.py | 7 +
clearml/backend_interface/task/task.py | 80 ++++++-
clearml/datasets/dataset.py | 216 ++++++++++++++----
4 files changed, 249 insertions(+), 56 deletions(-)
diff --git a/clearml/backend_api/session/session.py b/clearml/backend_api/session/session.py
index 898dc82b..54f2b88a 100644
--- a/clearml/backend_api/session/session.py
+++ b/clearml/backend_api/session/session.py
@@ -655,7 +655,7 @@ class Session(TokenManager):
if session:
active_sessions.append(session)
new_sessions_weakrefs.append(session_weakref)
- cls._sessions_weakrefs = session_weakref
+ cls._sessions_weakrefs = new_sessions_weakrefs
return active_sessions
@classmethod
diff --git a/clearml/backend_interface/task/hyperparams.py b/clearml/backend_interface/task/hyperparams.py
index 9d924a47..c34367a0 100644
--- a/clearml/backend_interface/task/hyperparams.py
+++ b/clearml/backend_interface/task/hyperparams.py
@@ -118,6 +118,13 @@ class HyperParams(object):
item = make_item(i)
props.update({item.name: item})
+ if self.task.is_offline():
+ hyperparams = self.task.data.hyperparams or {}
+ hyperparams.setdefault("properties", tasks.SectionParams())
+ hyperparams["properties"].update(props)
+ self.task._save_data_to_offline_dir(hyperparams=hyperparams)
+ return True
+
res = self.task.session.send(
tasks.EditHyperParamsRequest(
task=self.task.task_id,
diff --git a/clearml/backend_interface/task/task.py b/clearml/backend_interface/task/task.py
index 942bea9c..56c61e38 100644
--- a/clearml/backend_interface/task/task.py
+++ b/clearml/backend_interface/task/task.py
@@ -369,7 +369,13 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
)
res = self.send(req)
- return res.response.id if res else 'offline-{}'.format(str(uuid4()).replace("-", ""))
+ if res:
+ return res.response.id
+
+ id = "offline-{}".format(str(uuid4()).replace("-", ""))
+ self._edit(type=tasks.TaskTypeEnum(task_type))
+ return id
+
def _set_storage_uri(self, value):
value = value.rstrip('/') if value else None
@@ -1962,6 +1968,8 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
'iter': iteration (default), 'timestamp': timestamp as milliseconds since epoch, 'iso_time': absolute time
:return: dict: Nested scalar graphs: dict[title(str), dict[series(str), dict[axis(str), list(float)]]]
"""
+ scalar_metrics_iter_histogram_request_max_size = 4800
+
if x_axis not in ('iter', 'timestamp', 'iso_time'):
raise ValueError("Scalar x-axis supported values are: 'iter', 'timestamp', 'iso_time'")
@@ -1978,8 +1986,51 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
if not response.ok() or not response.response_data:
return {}
+ metrics_returned = 0
+ for metric in response.response_data.values():
+ for series in metric.values():
+ metrics_returned += len(series.get("x", []))
+ if metrics_returned >= scalar_metrics_iter_histogram_request_max_size:
+ return self._get_all_reported_scalars(x_axis)
+
return response.response_data
+ def _get_all_reported_scalars(self, x_axis):
+ reported_scalars = {}
+ batch_size = 1000
+ scroll_id = None
+ while True:
+ response = self.send(
+ events.GetTaskEventsRequest(
+ task=self.id, event_type="training_stats_scalar", scroll_id=scroll_id, batch_size=batch_size
+ )
+ )
+ if not response:
+ return reported_scalars
+ response = response.wait()
+ if not response.ok() or not response.response_data:
+ return reported_scalars
+ response = response.response_data
+ for event in response.get("events", []):
+ metric = event["metric"]
+ variant = event["variant"]
+ if x_axis in ["timestamp", "iter"]:
+ x_val = event[x_axis]
+ else:
+ x_val = datetime.utcfromtimestamp(event["timestamp"] / 1000).isoformat(timespec="milliseconds") + "Z"
+ y_val = event["value"]
+ reported_scalars.setdefault(metric, {})
+ reported_scalars[metric].setdefault(variant, {"name": variant, "x": [], "y": []})
+ if len(reported_scalars[metric][variant]["x"]) == 0 or reported_scalars[metric][variant]["x"][-1] != x_val:
+ reported_scalars[metric][variant]["x"].append(x_val)
+ reported_scalars[metric][variant]["y"].append(y_val)
+ else:
+ reported_scalars[metric][variant]["y"][-1] = y_val
+ if response.get("returned", 0) < batch_size or not response.get("scroll_id"):
+ break
+ scroll_id = response["scroll_id"]
+ return reported_scalars
+
def get_reported_plots(
self,
max_iterations=None
@@ -2459,19 +2510,26 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
"""
return running_remotely() and get_remote_task_id() == self.id
+ def _save_data_to_offline_dir(self, **kwargs):
+ # type: (**Any) -> ()
+ for k, v in kwargs.items():
+ setattr(self.data, k, v)
+ offline_mode_folder = self.get_offline_mode_folder()
+ if not offline_mode_folder:
+ return
+ Path(offline_mode_folder).mkdir(parents=True, exist_ok=True)
+ with open((offline_mode_folder / self._offline_filename).as_posix(), "wt") as f:
+ export_data = self.data.to_dict()
+ export_data["project_name"] = self.get_project_name()
+ export_data["offline_folder"] = self.get_offline_mode_folder().as_posix()
+ export_data["offline_output_models"] = self._offline_output_models
+ json.dump(export_data, f, ensure_ascii=True, sort_keys=True)
+
def _edit(self, **kwargs):
# type: (**Any) -> Any
with self._edit_lock:
if self._offline_mode:
- for k, v in kwargs.items():
- setattr(self.data, k, v)
- Path(self.get_offline_mode_folder()).mkdir(parents=True, exist_ok=True)
- with open((self.get_offline_mode_folder() / self._offline_filename).as_posix(), "wt") as f:
- export_data = self.data.to_dict()
- export_data["project_name"] = self.get_project_name()
- export_data["offline_folder"] = self.get_offline_mode_folder().as_posix()
- export_data["offline_output_models"] = self._offline_output_models
- json.dump(export_data, f, ensure_ascii=True, sort_keys=True)
+ self._save_data_to_offline_dir(**kwargs)
return None
# Since we ae using forced update, make sure he task status is valid
@@ -2593,6 +2651,8 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
Return the folder where all the task outputs and logs are stored in the offline session.
:return: Path object, local folder, later to be used with `report_offline_session()`
"""
+ if not self.task_id:
+ return None
if self._offline_dir:
return self._offline_dir
if not self._offline_mode:
diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py
index 93f29e16..fa010d29 100644
--- a/clearml/datasets/dataset.py
+++ b/clearml/datasets/dataset.py
@@ -122,12 +122,14 @@ class Dataset(object):
__hyperparams_section = "Datasets"
__datasets_runtime_prop = "datasets"
__orig_datasets_runtime_prop_prefix = "orig_datasets"
+ __preview_media_max_file_size = deferred_config("dataset.preview.media.max_file_size", 5 * 1024 * 1024, transform=int)
__preview_tabular_table_count = deferred_config("dataset.preview.tabular.table_count", 10, transform=int)
__preview_tabular_row_count = deferred_config("dataset.preview.tabular.row_count", 10, transform=int)
__preview_media_image_count = deferred_config("dataset.preview.media.image_count", 10, transform=int)
__preview_media_video_count = deferred_config("dataset.preview.media.video_count", 10, transform=int)
__preview_media_audio_count = deferred_config("dataset.preview.media.audio_count", 10, transform=int)
__preview_media_html_count = deferred_config("dataset.preview.media.html_count", 10, transform=int)
+ __preview_media_json_count = deferred_config("dataset.preview.media.json_count", 10, transform=int)
_dataset_chunk_size_mb = deferred_config("storage.dataset_chunk_size_mb", 512, transform=int)
def __init__(
@@ -191,7 +193,7 @@ class Dataset(object):
if "/.datasets/" not in task.get_project_name() or "":
dataset_project, parent_project = self._build_hidden_project_name(task.get_project_name(), task.name)
task.move_to_project(new_project_name=dataset_project)
- if bool(Session.check_min_api_server_version(Dataset.__min_api_version)):
+ if Dataset.is_offline() or bool(Session.check_min_api_server_version(Dataset.__min_api_version)):
get_or_create_project(task.session, project_name=parent_project, system_tags=[self.__hidden_tag])
get_or_create_project(
task.session,
@@ -202,9 +204,21 @@ class Dataset(object):
else:
self._created_task = True
dataset_project, parent_project = self._build_hidden_project_name(dataset_project, dataset_name)
- task = Task.create(
- project_name=dataset_project, task_name=dataset_name, task_type=Task.TaskTypes.data_processing)
- if bool(Session.check_min_api_server_version(Dataset.__min_api_version)):
+ if not Dataset.is_offline():
+ task = Task.create(
+ project_name=dataset_project, task_name=dataset_name, task_type=Task.TaskTypes.data_processing)
+ else:
+ task = Task.init(
+ project_name=dataset_project,
+ task_name=dataset_name,
+ task_type=Task.TaskTypes.data_processing,
+ reuse_last_task_id=False,
+ auto_connect_frameworks=False,
+ auto_connect_arg_parser=False,
+ auto_resource_monitoring=False,
+ auto_connect_streams=False
+ )
+ if Dataset.is_offline() or bool(Session.check_min_api_server_version(Dataset.__min_api_version)):
get_or_create_project(task.session, project_name=parent_project, system_tags=[self.__hidden_tag])
get_or_create_project(
task.session,
@@ -218,25 +232,25 @@ class Dataset(object):
if dataset_tags:
task.set_tags((task.get_tags() or []) + list(dataset_tags))
task.mark_started()
- # generate the script section
- script = (
- "from clearml import Dataset\n\n"
- "ds = Dataset.create(dataset_project='{dataset_project}', dataset_name='{dataset_name}', "
- "dataset_version='{dataset_version}')\n".format(
- dataset_project=dataset_project, dataset_name=dataset_name, dataset_version=dataset_version
+ if not Dataset.is_offline():
+ # generate the script section
+ script = (
+ "from clearml import Dataset\n\n"
+ "ds = Dataset.create(dataset_project='{dataset_project}', dataset_name='{dataset_name}', "
+ "dataset_version='{dataset_version}')\n".format(
+ dataset_project=dataset_project, dataset_name=dataset_name, dataset_version=dataset_version
+ )
)
- )
- task.data.script.diff = script
- task.data.script.working_dir = '.'
- task.data.script.entry_point = 'register_dataset.py'
- from clearml import __version__
- task.data.script.requirements = {'pip': 'clearml == {}\n'.format(__version__)}
- # noinspection PyProtectedMember
- task._edit(script=task.data.script)
-
- # if the task is running make sure we ping to the server so it will not be aborted by a watchdog
- self._task_pinger = DevWorker()
- self._task_pinger.register(task, stop_signal_support=False)
+ task.data.script.diff = script
+ task.data.script.working_dir = '.'
+ task.data.script.entry_point = 'register_dataset.py'
+ from clearml import __version__
+ task.data.script.requirements = {'pip': 'clearml == {}\n'.format(__version__)}
+ # noinspection PyProtectedMember
+ task._edit(script=task.data.script)
+ # if the task is running make sure we ping to the server so it will not be aborted by a watchdog
+ self._task_pinger = DevWorker()
+ self._task_pinger.register(task, stop_signal_support=False)
# set the newly created Dataset parent ot the current Task, so we know who created it.
if Task.current_task() and Task.current_task().id != task.id:
task.set_parent(Task.current_task())
@@ -279,6 +293,7 @@ class Dataset(object):
self.__preview_video_count = 0
self.__preview_audio_count = 0
self.__preview_html_count = 0
+ self.__preview_json_count = 0
@property
def id(self):
@@ -321,7 +336,7 @@ class Dataset(object):
@property
def name(self):
# type: () -> str
- if bool(Session.check_min_api_server_version(Dataset.__min_api_version)):
+ if Dataset.is_offline() or bool(Session.check_min_api_server_version(Dataset.__min_api_version)):
return self._task.get_project_name().partition("/.datasets/")[-1]
return self._task.name
@@ -464,8 +479,8 @@ class Dataset(object):
else:
if len(dataset_path) != len(source_url):
raise ValueError(
- f"dataset_path must be a string or a list of strings with the same length as source_url"
- f" (received {len(dataset_path)} paths for {len(source_url)} source urls))"
+ "dataset_path must be a string or a list of strings with the same length as source_url"
+ " (received {} paths for {} source urls))".format(len(dataset_path), len(source_url))
)
dataset_paths = dataset_path
with ThreadPoolExecutor(max_workers=max_workers) as tp:
@@ -635,6 +650,9 @@ class Dataset(object):
:raise: If the upload failed (i.e. at least one zip failed to upload), raise a `ValueError`
"""
self._report_dataset_preview()
+ if Dataset.is_offline():
+ self._serialize()
+ return
# set output_url
if output_url:
@@ -642,7 +660,11 @@ class Dataset(object):
self._task.get_logger().set_default_upload_destination(output_url)
if not max_workers:
- max_workers = 1 if self._task.output_uri.startswith(tuple(cloud_driver_schemes)) else psutil.cpu_count()
+ max_workers = (
+ 1
+ if self._task.output_uri and self._task.output_uri.startswith(tuple(cloud_driver_schemes))
+ else psutil.cpu_count()
+ )
self._task.get_logger().report_text(
"Uploading dataset files: {}".format(
@@ -774,6 +796,9 @@ class Dataset(object):
:param raise_on_error: If True, raise exception if dataset finalizing failed
:param auto_upload: Automatically upload dataset if not called yet, will upload to default location.
"""
+ if Dataset.is_offline():
+ LoggerRoot.get_base_logger().warning("Cannot finalize dataset in offline mode.")
+ return
# check we do not have files waiting for upload.
if self._dirty:
if auto_upload:
@@ -905,6 +930,8 @@ class Dataset(object):
:return: A base folder for the entire dataset
"""
assert self._id
+ if Dataset.is_offline():
+ raise ValueError("Cannot get dataset local copy in offline mode.")
if not self._task:
self._task = Task.get_task(task_id=self._id)
if not self.is_final():
@@ -950,6 +977,8 @@ class Dataset(object):
:return: The target folder containing the entire dataset
"""
assert self._id
+ if Dataset.is_offline():
+ raise ValueError("Cannot get dataset local copy in offline mode.")
max_workers = max_workers or psutil.cpu_count()
target_folder = Path(target_folder).absolute()
target_folder.mkdir(parents=True, exist_ok=True)
@@ -1204,7 +1233,7 @@ class Dataset(object):
:return: Newly created Dataset object
"""
- if not Session.check_min_api_server_version("2.13"):
+ if not Dataset.is_offline() and not Session.check_min_api_server_version("2.13"):
raise NotImplementedError("Datasets are not supported with your current ClearML server version. Please update your server.")
parent_datasets = [cls.get(dataset_id=p) if not isinstance(p, Dataset) else p for p in (parent_datasets or [])]
@@ -1264,7 +1293,7 @@ class Dataset(object):
if description:
instance.set_description(description)
# noinspection PyProtectedMember
- if output_uri and not Task._offline_mode:
+ if output_uri and not Dataset.is_offline():
# noinspection PyProtectedMember
instance._task.output_uri = output_uri
# noinspection PyProtectedMember
@@ -1283,20 +1312,13 @@ class Dataset(object):
instance._serialize()
# noinspection PyProtectedMember
instance._report_dataset_struct()
- # noinspection PyProtectedMember
- instance._task.get_logger().report_text(
- "ClearML results page: {}".format(instance._task.get_output_log_web_page())
- )
- if bool(Session.check_min_api_server_version(cls.__min_api_version)):
- instance._task.get_logger().report_text( # noqa
- "ClearML dataset page: {}".format(
- "{}/datasets/simple/{}/experiments/{}".format(
- instance._task._get_app_server(), # noqa
- instance._task.project if instance._task.project is not None else "*", # noqa
- instance._task.id, # noqa
- )
- )
+ if not Dataset.is_offline():
+ # noinspection PyProtectedMember
+ instance._task.get_logger().report_text(
+ "ClearML results page: {}".format(instance._task.get_output_log_web_page())
)
+ # noinspection PyProtectedMember
+ instance._log_dataset_page()
# noinspection PyProtectedMember
instance._task.flush(wait_for_uploads=True)
# noinspection PyProtectedMember
@@ -1499,6 +1521,8 @@ class Dataset(object):
:param dataset_project: The project the datasets to be renamed belongs to
:param dataset_name: The name of the datasets (before renaming)
"""
+ if Dataset.is_offline():
+ raise ValueError("Cannot rename dataset in offline mode")
if not bool(Session.check_min_api_server_version(cls.__min_api_version)):
LoggerRoot.get_base_logger().warning(
"Could not rename dataset because API version < {}".format(cls.__min_api_version)
@@ -1544,6 +1568,8 @@ class Dataset(object):
:param dataset_project: Project of the dataset(s) to move to new project
:param dataset_name: Name of the dataset(s) to move to new project
"""
+ if cls.is_offline():
+ raise ValueError("Cannot move dataset project in offlime mode")
if not bool(Session.check_min_api_server_version(cls.__min_api_version)):
LoggerRoot.get_base_logger().warning(
"Could not move dataset to another project because API version < {}".format(cls.__min_api_version)
@@ -1618,6 +1644,9 @@ class Dataset(object):
:return: Dataset object
"""
+ if Dataset.is_offline():
+ raise ValueError("Cannot get dataset in offline mode.")
+
system_tags = ["__$all", cls.__tag]
if not include_archived:
system_tags = ["__$all", cls.__tag, "__$not", "archived"]
@@ -1801,6 +1830,9 @@ class Dataset(object):
Examples: `s3://bucket/data`, `gs://bucket/data` , `azure://bucket/data` , `/mnt/share/data`
:return: Newly created dataset object.
"""
+ if Dataset.is_offline():
+ raise ValueError("Cannot squash datasets in offline mode")
+
mutually_exclusive(dataset_ids=dataset_ids, dataset_project_name_pairs=dataset_project_name_pairs)
datasets = [cls.get(dataset_id=d) for d in dataset_ids] if dataset_ids else \
[cls.get(dataset_project=pair[0], dataset_name=pair[1]) for pair in dataset_project_name_pairs]
@@ -1877,7 +1909,7 @@ class Dataset(object):
type=[str(Task.TaskTypes.data_processing)],
tags=tags or None,
status=["stopped", "published", "completed", "closed"] if only_completed else None,
- only_fields=["created", "id", "name", "project", "tags"],
+ only_fields=["created", "id", "name", "project", "tags", "runtime"],
search_hidden=True,
exact_match_regex_flag=False,
_allow_extra_fields_=True,
@@ -1892,6 +1924,7 @@ class Dataset(object):
"project": cls._remove_hidden_part_from_dataset_project(project_id_lookup[d.project]),
"id": d.id,
"tags": d.tags,
+ "version": d.runtime.get("version")
}
for d in datasets
]
@@ -2028,6 +2061,10 @@ class Dataset(object):
for k, parents in self._dependency_graph.items() if k in used_dataset_versions}
# make sure we do not remove our parents, for geology sake
self._dependency_graph[self._id] = current_parents
+ if not Dataset.is_offline():
+ to_delete = [k for k in self._dependency_graph.keys() if k.startswith("offline-")]
+ for k in to_delete:
+ del self._dependency_graph[k]
def _serialize(self, update_dependency_chunk_lookup=False):
# type: (bool) -> ()
@@ -2609,6 +2646,89 @@ class Dataset(object):
"""
return 'dsh{}'.format(md5text(dataset_id))
+ @classmethod
+ def is_offline(cls):
+ # type: () -> bool
+ """
+ Return offline-mode state, If in offline-mode, no communication to the backend is enabled.
+
+ :return: boolean offline-mode state
+ """
+ return Task.is_offline()
+
+ @classmethod
+ def set_offline(cls, offline_mode=False):
+ # type: (bool) -> None
+ """
+ Set offline mode, where all data and logs are stored into local folder, for later transmission
+
+ :param offline_mode: If True, offline-mode is turned on, and no communication to the backend is enabled.
+ """
+ Task.set_offline(offline_mode=offline_mode)
+
+ def get_offline_mode_folder(self):
+ # type: () -> Optional[Path]
+ """
+ Return the folder where all the dataset data is stored in the offline session.
+
+ :return: Path object, local folder
+ """
+ return self._task.get_offline_mode_folder()
+
+ @classmethod
+ def import_offline_session(cls, session_folder_zip, upload=True, finalize=False):
+ # type: (str, bool, bool) -> str
+ """
+ Import an offline session of a dataset.
+ Includes repository details, installed packages, artifacts, logs, metric and debug samples.
+
+ :param session_folder_zip: Path to a folder containing the session, or zip-file of the session folder.
+ :param upload: If True, upload the dataset's data
+ :param finalize: If True, finalize the dataset
+
+ :return: The ID of the imported dataset
+ """
+ id = Task.import_offline_session(session_folder_zip)
+ dataset = Dataset.get(dataset_id=id)
+ # note that there can only be one offline session in the dependency graph: our session
+ # noinspection PyProtectedMember
+ dataset._dependency_graph = {
+ (id if k.startswith("offline-") else k): [(id if sub_v.startswith("offline-") else sub_v) for sub_v in v]
+ for k, v in dataset._dependency_graph.items() # noqa
+ }
+ # noinspection PyProtectedMember
+ dataset._update_dependency_graph()
+ # noinspection PyProtectedMember
+ dataset._log_dataset_page()
+
+ started = False
+ if upload or finalize:
+ started = True
+ # noinspection PyProtectedMember
+ dataset._task.mark_started(force=True)
+
+ if upload:
+ dataset.upload()
+ if finalize:
+ dataset.finalize()
+
+ if started:
+ # noinspection PyProtectedMember
+ dataset._task.mark_completed()
+
+ return id
+
+ def _log_dataset_page(self):
+ if bool(Session.check_min_api_server_version(self.__min_api_version)):
+ self._task.get_logger().report_text(
+ "ClearML dataset page: {}".format(
+ "{}/datasets/simple/{}/experiments/{}".format(
+ self._task._get_app_server(),
+ self._task.project if self._task.project is not None else "*",
+ self._task.id,
+ )
+ )
+ )
def _build_dependency_chunk_lookup(self):
# type: () -> Dict[str, int]
"""
@@ -2850,7 +2970,10 @@ class Dataset(object):
dependency_graph_ex[id_] = parents
task = Task.get_task(task_id=id_)
- dataset_struct_entry = {"job_id": id_, "status": task.status}
+ dataset_struct_entry = {
+ "job_id": id_[len("offline-"):] if id_.startswith("offline-") else id_, # .removeprefix not supported < Python 3.9
+ "status": task.status
+ }
# noinspection PyProtectedMember
last_update = task._get_last_update()
if last_update:
@@ -2964,7 +3087,7 @@ class Dataset(object):
except Exception:
pass
continue
- if compression:
+ if compression or os.path.getsize(file_path) > self.__preview_media_max_file_size:
continue
guessed_type = mimetypes.guess_type(file_path)
if not guessed_type or not guessed_type[0]:
@@ -2982,6 +3105,9 @@ class Dataset(object):
elif guessed_type == "text/html" and self.__preview_html_count < self.__preview_media_html_count:
self._task.get_logger().report_media("HTML", file_name, local_path=file_path)
self.__preview_html_count += 1
+ elif guessed_type == "application/json" and self.__preview_json_count < self.__preview_media_json_count:
+ self._task.get_logger().report_media("JSON", file_name, local_path=file_path, file_extension=".txt")
+ self.__preview_json_count += 1
@classmethod
def _set_project_system_tags(cls, task):
@@ -3366,7 +3492,7 @@ class Dataset(object):
if not dataset_project:
return None, None
project_name = cls._remove_hidden_part_from_dataset_project(dataset_project)
- if bool(Session.check_min_api_server_version(cls.__min_api_version)):
+ if Dataset.is_offline() or bool(Session.check_min_api_server_version(cls.__min_api_version)):
parent_project = "{}.datasets".format(dataset_project + "/" if dataset_project else "")
if dataset_name:
project_name = "{}/{}".format(parent_project, dataset_name)
From 0b6cb2edb86649a6eb99c076a0015c46fe97d81f Mon Sep 17 00:00:00 2001
From: Alex Burlacu
Date: Thu, 25 May 2023 19:00:05 +0300
Subject: [PATCH 13/15] Add Task.get_all_reported_scalars
---
clearml/backend_interface/task/task.py | 32 ++++++++++++++++++--------
1 file changed, 22 insertions(+), 10 deletions(-)
diff --git a/clearml/backend_interface/task/task.py b/clearml/backend_interface/task/task.py
index 56c61e38..07fe4dcf 100644
--- a/clearml/backend_interface/task/task.py
+++ b/clearml/backend_interface/task/task.py
@@ -1953,6 +1953,11 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
This call is not cached, any call will retrieve all the scalar reports from the back-end.
If the Task has many scalars reported, it might take long for the call to return.
+ .. note::
+ Calling this method will return potentially downsampled scalars. The maximum number of returned samples is 5000.
+ Even when setting `max_samples` to a value larger than 5000, it will be limited to at most 5000 samples.
+ To fetch all scalar values, please see the :meth:`Task.get_all_reported_scalars`.
+
Example:
.. code-block:: py
@@ -1962,13 +1967,12 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
"y": [10, 11 ,12]
}}}
- :param int max_samples: Maximum samples per series to return. Default is 0 returning all scalars.
+ :param int max_samples: Maximum samples per series to return. Default is 0 returning up to 5000 samples.
With sample limit, average scalar values inside sampling window.
:param str x_axis: scalar x_axis, possible values:
'iter': iteration (default), 'timestamp': timestamp as milliseconds since epoch, 'iso_time': absolute time
:return: dict: Nested scalar graphs: dict[title(str), dict[series(str), dict[axis(str), list(float)]]]
"""
- scalar_metrics_iter_histogram_request_max_size = 4800
if x_axis not in ('iter', 'timestamp', 'iso_time'):
raise ValueError("Scalar x-axis supported values are: 'iter', 'timestamp', 'iso_time'")
@@ -1986,16 +1990,24 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
if not response.ok() or not response.response_data:
return {}
- metrics_returned = 0
- for metric in response.response_data.values():
- for series in metric.values():
- metrics_returned += len(series.get("x", []))
- if metrics_returned >= scalar_metrics_iter_histogram_request_max_size:
- return self._get_all_reported_scalars(x_axis)
-
return response.response_data
- def _get_all_reported_scalars(self, x_axis):
+ def get_all_reported_scalars(self, x_axis='iter'):
+ # type: (str) -> Mapping[str, Mapping[str, Mapping[str, Sequence[float]]]]
+ """
+ Return a nested dictionary for the all scalar graphs, containing all the registered samples,
+ where the first key is the graph title and the second is the series name.
+ Value is a dict with 'x': values and 'y': values.
+ To fetch downsampled scalar values, please see the :meth:`Task.get_reported_scalars`.
+
+ .. note::
+ This call is not cached, any call will retrieve all the scalar reports from the back-end.
+ If the Task has many scalars reported, it might take long for the call to return.
+
+ :param str x_axis: scalar x_axis, possible values:
+ 'iter': iteration (default), 'timestamp': timestamp as milliseconds since epoch, 'iso_time': absolute time
+ :return: dict: Nested scalar graphs: dict[title(str), dict[series(str), dict[axis(str), list(float)]]]
+ """
reported_scalars = {}
batch_size = 1000
scroll_id = None
From 449a4cc42dcc92b7d5840955711a9a60f145b550 Mon Sep 17 00:00:00 2001
From: Alex Burlacu
Date: Thu, 25 May 2023 19:00:20 +0300
Subject: [PATCH 14/15] Bump version to v1.11.0
---
clearml/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clearml/version.py b/clearml/version.py
index b436016b..da77e85c 100644
--- a/clearml/version.py
+++ b/clearml/version.py
@@ -1 +1 @@
-__version__ = '1.10.4'
+__version__ = '1.11.0'
From db2f899d95db0cf1b26a698b4afb31043a80519e Mon Sep 17 00:00:00 2001
From: pollfly <75068813+pollfly@users.noreply.github.com>
Date: Sun, 28 May 2023 08:48:49 +0300
Subject: [PATCH 15/15] Edit docstrings (#1013)
---
clearml/automation/controller.py | 14 +-
clearml/automation/hpbandster/bandster.py | 7 +-
clearml/automation/optimization.py | 218 +++++++++++-----------
clearml/automation/parameters.py | 18 +-
clearml/datasets/dataset.py | 25 +--
5 files changed, 145 insertions(+), 137 deletions(-)
diff --git a/clearml/automation/controller.py b/clearml/automation/controller.py
index 6b2c9713..a5b3c05c 100644
--- a/clearml/automation/controller.py
+++ b/clearml/automation/controller.py
@@ -470,7 +470,7 @@ class PipelineController(object):
pass
:param post_execute_callback: Callback function, called when a step (Task) is completed
- and it other jobs are executed. Allows a user to modify the Task status after completion.
+ and other jobs are executed. Allows a user to modify the Task status after completion.
.. code-block:: py
@@ -738,7 +738,7 @@ class PipelineController(object):
pass
:param post_execute_callback: Callback function, called when a step (Task) is completed
- and it other jobs are executed. Allows a user to modify the Task status after completion.
+ and other jobs are executed. Allows a user to modify the Task status after completion.
.. code-block:: py
@@ -862,7 +862,7 @@ class PipelineController(object):
pass
:param Callable step_task_completed_callback: Callback function, called when a step (Task) is completed
- and it other jobs are executed. Allows a user to modify the Task status after completion.
+ and other jobs are executed. Allows a user to modify the Task status after completion.
.. code-block:: py
@@ -951,7 +951,7 @@ class PipelineController(object):
def connect_configuration(self, configuration, name=None, description=None):
# type: (Union[Mapping, list, Path, str], Optional[str], Optional[str]) -> Union[dict, Path, str]
"""
- Connect a configuration dictionary or configuration file (pathlib.Path / str) to a the PipelineController object.
+ Connect a configuration dictionary or configuration file (pathlib.Path / str) to the PipelineController object.
This method should be called before reading the configuration file.
For example, a local file:
@@ -1373,7 +1373,7 @@ class PipelineController(object):
pass
:param Callable step_task_completed_callback: Callback function, called when a step (Task) is completed
- and it other jobs are executed. Allows a user to modify the Task status after completion.
+ and other jobs are executed. Allows a user to modify the Task status after completion.
.. code-block:: py
@@ -1895,7 +1895,7 @@ class PipelineController(object):
pass
:param post_execute_callback: Callback function, called when a step (Task) is completed
- and it other jobs are executed. Allows a user to modify the Task status after completion.
+ and other jobs are executed. Allows a user to modify the Task status after completion.
.. code-block:: py
@@ -3644,7 +3644,7 @@ class PipelineDecorator(PipelineController):
pass
:param post_execute_callback: Callback function, called when a step (Task) is completed
- and it other jobs are executed. Allows a user to modify the Task status after completion.
+ and other jobs are executed. Allows a user to modify the Task status after completion.
.. code-block:: py
diff --git a/clearml/automation/hpbandster/bandster.py b/clearml/automation/hpbandster/bandster.py
index a787c2f6..1b0595e1 100644
--- a/clearml/automation/hpbandster/bandster.py
+++ b/clearml/automation/hpbandster/bandster.py
@@ -229,14 +229,15 @@ class OptimizerBOHB(SearchStrategy, RandomSeed):
year = {2018},
}
- :param eta : float (3)
+
+ :param eta: float (3)
In each iteration, a complete run of sequential halving is executed. In it,
after evaluating each configuration on the same subset size, only a fraction of
1/eta of them 'advances' to the next round.
Must be greater or equal to 2.
- :param min_budget : float (0.01)
+ :param min_budget: float (0.01)
The smallest budget to consider. Needs to be positive!
- :param max_budget : float (1)
+ :param max_budget: float (1)
The largest budget to consider. Needs to be larger than min_budget!
The budgets will be geometrically distributed
:math:`a^2 + b^2 = c^2 /sim /eta^k` for :math:`k/in [0, 1, ... , num/_subsets - 1]`.
diff --git a/clearml/automation/optimization.py b/clearml/automation/optimization.py
index 8773d4c4..75f14628 100644
--- a/clearml/automation/optimization.py
+++ b/clearml/automation/optimization.py
@@ -432,7 +432,7 @@ class SearchStrategy(object):
Helper function, Implementation is not required. Default use in process_step default implementation.
Check if the job needs to be aborted or already completed.
- If returns ``False``, the job was aborted / completed, and should be taken off the current job list
+ If returns ``False``, the job was aborted / completed, and should be taken off the current job list.
If there is a budget limitation, this call should update
``self.budget.compute_time.update`` / ``self.budget.iterations.update``
@@ -534,6 +534,8 @@ class SearchStrategy(object):
where index 0 is the best performing Task.
Example w/ all_metrics=False:
+ .. code-block:: py
+
[
('0593b76dc7234c65a13a301f731958fa',
{
@@ -550,6 +552,8 @@ class SearchStrategy(object):
Example w/ all_metrics=True:
+ .. code-block:: py
+
[
('0593b76dc7234c65a13a301f731958fa',
{
@@ -599,9 +603,8 @@ class SearchStrategy(object):
# type: (int, bool, bool, bool) -> Sequence[(str, dict)]
"""
Return a list of dictionaries of the top performing experiments.
- Example: [
- {'task_id': Task-ID, 'metrics': scalar-metric-dict, 'hyper_parameters': Hyper-Parameters},
- ]
+ Example: ``[{'task_id': Task-ID, 'metrics': scalar-metric-dict, 'hyper_parameters': Hyper-Parameters},]``
+
Order is based on the controller ``Objective`` object.
:param int top_k: The number of Tasks (experiments) to return.
@@ -614,46 +617,50 @@ class SearchStrategy(object):
where index 0 is the best performing Task.
Example w/ all_metrics=False:
- [
- {
- task_id: '0593b76dc7234c65a13a301f731958fa',
- hyper_parameters: {'General/lr': '0.03', 'General/batch_size': '32'},
- metrics: {
- 'accuracy per class/cat': {
- 'metric': 'accuracy per class',
- 'variant': 'cat',
- 'value': 0.119,
- 'min_value': 0.119,
- 'max_value': 0.782
- },
- }
- },
- ]
+ .. code-block:: py
- Example w/ all_metrics=True:
+ [
+ {
+ task_id: '0593b76dc7234c65a13a301f731958fa',
+ hyper_parameters: {'General/lr': '0.03', 'General/batch_size': '32'},
+ metrics: {
+ 'accuracy per class/cat': {
+ 'metric': 'accuracy per class',
+ 'variant': 'cat',
+ 'value': 0.119,
+ 'min_value': 0.119,
+ 'max_value': 0.782
+ },
+ }
+ },
+ ]
- [
- {
- task_id: '0593b76dc7234c65a13a301f731958fa',
- hyper_parameters: {'General/lr': '0.03', 'General/batch_size': '32'},
- metrics: {
- 'accuracy per class/cat': {
- 'metric': 'accuracy per class',
- 'variant': 'cat',
- 'value': 0.119,
- 'min_value': 0.119,
- 'max_value': 0.782
- },
- 'accuracy per class/deer': {
- 'metric': 'accuracy per class',
- 'variant': 'deer',
- 'value': 0.219,
- 'min_value': 0.219,
- 'max_value': 0.282
- },
- }
- },
- ]
+ Example w/ all_metrics=True:
+
+ .. code-block:: py
+
+ [
+ {
+ task_id: '0593b76dc7234c65a13a301f731958fa',
+ hyper_parameters: {'General/lr': '0.03', 'General/batch_size': '32'},
+ metrics: {
+ 'accuracy per class/cat': {
+ 'metric': 'accuracy per class',
+ 'variant': 'cat',
+ 'value': 0.119,
+ 'min_value': 0.119,
+ 'max_value': 0.782
+ },
+ 'accuracy per class/deer': {
+ 'metric': 'accuracy per class',
+ 'variant': 'deer',
+ 'value': 0.219,
+ 'min_value': 0.219,
+ 'max_value': 0.282
+ },
+ }
+ },
+ ]
"""
additional_filters = dict(page_size=int(top_k), page=0)
if only_completed:
@@ -761,7 +768,8 @@ class SearchStrategy(object):
"""
Set the function used to name a newly created job.
- :param callable naming_function:
+ :param callable naming_function: Callable function for naming a newly created job.
+ Use the following format:
.. code-block:: py
@@ -1072,7 +1080,7 @@ class RandomSearch(SearchStrategy):
class HyperParameterOptimizer(object):
"""
- Hyper-parameter search controller. Clones the base experiment, changes arguments and tries to maximize/minimize
+ Hyperparameter search controller. Clones the base experiment, changes arguments and tries to maximize/minimize
the defined objective.
"""
_tag = 'optimization'
@@ -1105,13 +1113,12 @@ class HyperParameterOptimizer(object):
``validation``).
:param str objective_metric_series: The Objective metric series to maximize / minimize (for example, ``loss``).
:param str objective_metric_sign: The objective to maximize / minimize.
-
The values are:
- - ``min`` - Minimize the last reported value for the specified title/series scalar.
- - ``max`` - Maximize the last reported value for the specified title/series scalar.
- - ``min_global`` - Minimize the min value of *all* reported values for the specific title/series scalar.
- - ``max_global`` - Maximize the max value of *all* reported values for the specific title/series scalar.
+ - ``min`` - Minimize the last reported value for the specified title/series scalar.
+ - ``max`` - Maximize the last reported value for the specified title/series scalar.
+ - ``min_global`` - Minimize the min value of *all* reported values for the specific title/series scalar.
+ - ``max_global`` - Maximize the max value of *all* reported values for the specific title/series scalar.
:param class.SearchStrategy optimizer_class: The SearchStrategy optimizer to use for the hyper-parameter search
:param int max_number_of_concurrent_tasks: The maximum number of concurrent Tasks (experiments) running at the
@@ -1121,24 +1128,21 @@ class HyperParameterOptimizer(object):
default is ``None``, indicating no time limit.
:param float compute_time_limit: The maximum compute time in minutes. When time limit is exceeded,
all jobs aborted. (Optional)
- :param bool auto_connect_task: Store optimization arguments and configuration in the Task
-
+ :param bool auto_connect_task: Store optimization arguments and configuration in the Task.
The values are:
- - ``True`` - The optimization argument and configuration will be stored in the Task. All arguments will
- be under the hyper-parameter section ``opt``, and the optimization hyper_parameters space will
+ - ``True`` - The optimization argument and configuration will be stored in the Task. All arguments will
+ be under the hyperparameter section ``opt``, and the optimization hyper_parameters space will be
stored in the Task configuration object section.
+ - ``False`` - Do not store with Task.
+ - ``Task`` - A specific Task object to connect the optimization process with.
- - ``False`` - Do not store with Task.
- - ``Task`` - A specific Task object to connect the optimization process with.
- :param bool always_create_task: Always create a new Task
-
+ :param bool always_create_task: Always create a new Task.
The values are:
- - ``True`` - No current Task initialized. Create a new task named ``optimization`` in the ``base_task_id``
+ - ``True`` - No current Task initialized. Create a new task named ``optimization`` in the ``base_task_id``
project.
-
- - ``False`` - Use the :py:meth:`task.Task.current_task` (if exists) to report statistics.
+ - ``False`` - Use the :py:meth:`task.Task.current_task` (if exists) to report statistics.
:param str spawn_project: If project name is specified, create all optimization Jobs (Tasks) in the
specified project instead of the original base_task_id project.
@@ -1505,9 +1509,8 @@ class HyperParameterOptimizer(object):
# type: (int, bool, bool, bool) -> Sequence[(str, dict)]
"""
Return a list of dictionaries of the top performing experiments.
- Example: [
- {'task_id': Task-ID, 'metrics': scalar-metric-dict, 'hyper_parameters': Hyper-Parameters},
- ]
+ Example: ``[{'task_id': Task-ID, 'metrics': scalar-metric-dict, 'hyper_parameters': Hyper-Parameters},]``
+
Order is based on the controller ``Objective`` object.
:param int top_k: The number of Tasks (experiments) to return.
@@ -1520,46 +1523,50 @@ class HyperParameterOptimizer(object):
where index 0 is the best performing Task.
Example w/ all_metrics=False:
- [
- {
- task_id: '0593b76dc7234c65a13a301f731958fa',
- hyper_parameters: {'General/lr': '0.03', 'General/batch_size': '32'},
- metrics: {
- 'accuracy per class/cat': {
- 'metric': 'accuracy per class',
- 'variant': 'cat',
- 'value': 0.119,
- 'min_value': 0.119,
- 'max_value': 0.782
- },
- }
- },
- ]
+ .. code-block:: py
- Example w/ all_metrics=True:
+ [
+ {
+ task_id: '0593b76dc7234c65a13a301f731958fa',
+ hyper_parameters: {'General/lr': '0.03', 'General/batch_size': '32'},
+ metrics: {
+ 'accuracy per class/cat': {
+ 'metric': 'accuracy per class',
+ 'variant': 'cat',
+ 'value': 0.119,
+ 'min_value': 0.119,
+ 'max_value': 0.782
+ },
+ }
+ },
+ ]
- [
- {
- task_id: '0593b76dc7234c65a13a301f731958fa',
- hyper_parameters: {'General/lr': '0.03', 'General/batch_size': '32'},
- metrics: {
- 'accuracy per class/cat': {
- 'metric': 'accuracy per class',
- 'variant': 'cat',
- 'value': 0.119,
- 'min_value': 0.119,
- 'max_value': 0.782
- },
- 'accuracy per class/deer': {
- 'metric': 'accuracy per class',
- 'variant': 'deer',
- 'value': 0.219,
- 'min_value': 0.219,
- 'max_value': 0.282
- },
- }
- },
- ]
+ Example w/ all_metrics=True:
+
+ .. code-block:: py
+
+ [
+ {
+ task_id: '0593b76dc7234c65a13a301f731958fa',
+ hyper_parameters: {'General/lr': '0.03', 'General/batch_size': '32'},
+ metrics: {
+ 'accuracy per class/cat': {
+ 'metric': 'accuracy per class',
+ 'variant': 'cat',
+ 'value': 0.119,
+ 'min_value': 0.119,
+ 'max_value': 0.782
+ },
+ 'accuracy per class/deer': {
+ 'metric': 'accuracy per class',
+ 'variant': 'deer',
+ 'value': 0.219,
+ 'min_value': 0.219,
+ 'max_value': 0.282
+ },
+ }
+ },
+ ]
"""
if not self.optimizer:
return []
@@ -1615,13 +1622,12 @@ class HyperParameterOptimizer(object):
``validation``).
:param str objective_metric_series: The Objective metric series to maximize / minimize (for example, ``loss``).
:param str objective_metric_sign: The objective to maximize / minimize.
-
The values are:
- - ``min`` - Minimize the last reported value for the specified title/series scalar.
- - ``max`` - Maximize the last reported value for the specified title/series scalar.
- - ``min_global`` - Minimize the min value of *all* reported values for the specific title/series scalar.
- - ``max_global`` - Maximize the max value of *all* reported values for the specific title/series scalar.
+ - ``min`` - Minimize the last reported value for the specified title/series scalar.
+ - ``max`` - Maximize the last reported value for the specified title/series scalar.
+ - ``min_global`` - Minimize the min value of *all* reported values for the specific title/series scalar.
+ - ``max_global`` - Maximize the max value of *all* reported values for the specific title/series scalar.
:param str optimizer_task_id: Parent optimizer Task ID
:param top_k: The number of Tasks (experiments) to return.
:return: A list of Task objects, ordered by performance, where index 0 is the best performing Task.
diff --git a/clearml/automation/parameters.py b/clearml/automation/parameters.py
index 6d06773f..a32cee74 100644
--- a/clearml/automation/parameters.py
+++ b/clearml/automation/parameters.py
@@ -110,7 +110,7 @@ class Parameter(RandomSeed):
class UniformParameterRange(Parameter):
"""
- Uniform randomly sampled hyper-parameter object.
+ Uniform randomly sampled hyperparameter object.
"""
def __init__(
@@ -129,12 +129,11 @@ class UniformParameterRange(Parameter):
:param float min_value: The minimum sample to use for uniform random sampling.
:param float max_value: The maximum sample to use for uniform random sampling.
:param float step_size: If not ``None``, set step size (quantization) for value sampling.
- :param bool include_max_value: Range includes the ``max_value``
-
+ :param bool include_max_value: Range includes the ``max_value``.
The values are:
- - ``True`` - The range includes the ``max_value`` (Default)
- - ``False`` - Does not include.
+ - ``True`` - The range includes the ``max_value`` (Default)
+ - ``False`` - Does not include.
"""
super(UniformParameterRange, self).__init__(name=name)
@@ -221,7 +220,7 @@ class LogUniformParameterRange(UniformParameterRange):
class UniformIntegerParameterRange(Parameter):
"""
- Uniform randomly sampled integer Hyper-Parameter object.
+ Uniform randomly sampled integer Hyperparameter object.
"""
def __init__(self, name, min_value, max_value, step_size=1, include_max_value=True):
@@ -233,12 +232,11 @@ class UniformIntegerParameterRange(Parameter):
:param int min_value: The minimum sample to use for uniform random sampling.
:param int max_value: The maximum sample to use for uniform random sampling.
:param int step_size: The default step size is ``1``.
- :param bool include_max_value: Range includes the ``max_value``
-
+ :param bool include_max_value: Range includes the ``max_value``.
The values are:
- - ``True`` - Includes the ``max_value`` (Default)
- - ``False`` - Does not include.
+ - ``True`` - Includes the ``max_value`` (Default)
+ - ``False`` - Does not include.
"""
super(UniformIntegerParameterRange, self).__init__(name=name)
diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py
index fa010d29..0d929fef 100644
--- a/clearml/datasets/dataset.py
+++ b/clearml/datasets/dataset.py
@@ -324,6 +324,7 @@ class Dataset(object):
# type: () -> Mapping[str, LinkEntry]
"""
Notice this call returns an internal representation, do not modify!
+
:return: dict with relative file path as key, and LinkEntry as value
"""
return self._dataset_link_entries
@@ -643,8 +644,9 @@ class Dataset(object):
If -1 is provided, use a single zip artifact for the entire dataset change-set (old behaviour)
:param max_workers: Numbers of threads to be spawned when zipping and uploading the files.
If None (default) it will be set to:
- - 1: if the upload destination is a cloud provider ('s3', 'gs', 'azure')
- - number of logical cores: otherwise
+
+ - 1: if the upload destination is a cloud provider ('s3', 'gs', 'azure')
+ - number of logical cores: otherwise
:param int retries: Number of retries before failing to upload each zip. If 0, the upload is not retried.
:raise: If the upload failed (i.e. at least one zip failed to upload), raise a `ValueError`
@@ -839,7 +841,7 @@ class Dataset(object):
# type: (Union[numpy.array, pd.DataFrame, Dict[str, Any]], str, bool) -> () # noqa: F821
"""
Attach a user-defined metadata to the dataset. Check `Task.upload_artifact` for supported types.
- If type is Optionally make it visible as a table in the UI.
+ If type is Pandas Dataframes, optionally make it visible as a table in the UI.
"""
if metadata_name.startswith(self.__data_entry_name_prefix):
raise ValueError("metadata_name can not start with '{}'".format(self.__data_entry_name_prefix))
@@ -954,7 +956,7 @@ class Dataset(object):
# type: (Union[Path, _Path, str], bool, Optional[int], Optional[int], bool, Optional[int]) -> Optional[str]
"""
return a base folder with a writable (mutable) local copy of the entire dataset
- download and copy / soft-link, files from all the parent dataset versions
+ download and copy / soft-link, files from all the parent dataset versions
:param target_folder: Target folder for the writable copy
:param overwrite: If True, recursively delete the target folder before creating a copy.
@@ -1223,11 +1225,11 @@ class Dataset(object):
:param output_uri: Location to upload the datasets file to, including preview samples.
The following are examples of ``output_uri`` values for the supported locations:
- - A shared folder: ``/mnt/share/folder``
- - S3: ``s3://bucket/folder``
- - Google Cloud Storage: ``gs://bucket-name/folder``
- - Azure Storage: ``azure://company.blob.core.windows.net/folder/``
- - Default file server: None
+ - A shared folder: ``/mnt/share/folder``
+ - S3: ``s3://bucket/folder``
+ - Google Cloud Storage: ``gs://bucket-name/folder``
+ - Azure Storage: ``azure://company.blob.core.windows.net/folder/``
+ - Default file server: None
:param description: Description of the dataset
@@ -1786,6 +1788,7 @@ class Dataset(object):
"""
Return a Logger object for the Dataset, allowing users to report statistics metrics
and debug samples on the Dataset itself
+
:return: Logger object
"""
return self._task.get_logger()
@@ -1797,8 +1800,8 @@ class Dataset(object):
(it does not imply on the number of chunks parent versions store)
:param include_parents: If True (default),
- return the total number of chunks from this version and all parent versions.
- If False, only return the number of chunks we stored on this specific version.
+ return the total number of chunks from this version and all parent versions.
+ If False, only return the number of chunks we stored on this specific version.
:return: Number of chunks stored on the dataset.
"""