From c253af58c0d8781a113242c8511b6dae7fc7d5fb Mon Sep 17 00:00:00 2001
From: pollfly <75068813+pollfly@users.noreply.github.com>
Date: Sun, 28 Jan 2024 11:39:01 +0200
Subject: [PATCH] Add Hyper-Datasets intro (#766)

---
 docs/hyper_datasets.md | 46 ++++++++++++++++++++++++++++++++++++++++++
 sidebars.js            | 12 ++++++++++-
 2 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 docs/hyper_datasets.md
diff --git a/docs/hyper_datasets.md b/docs/hyper_datasets.md
new file mode 100644
index 00000000..d2ce9b9c
--- /dev/null
+++ b/docs/hyper_datasets.md
@@ -0,0 +1,46 @@
+---
+title: Hyper-Datasets
+---
+
+:::important Enterprise Feature
+Hyper-Datasets are available under the ClearML Enterprise plan
+:::
+
+<div class="vid">
+<iframe style={{position: 'absolute', top: '0', left: '0', bottom: '0', right: '0', width: '100%', height: '100%'}} 
+        src="https://www.youtube.com/embed/1VliYRexeLU?si=WAXIdAwsja7D0lxH" 
+        title="YouTube video player" 
+        frameborder="0" 
+        allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; fullscreen" 
+        allowfullscreen>
+</iframe>
+</div>
+
+<br/>
+
+ClearML's **Hyper-Datasets** are an MLOps-oriented abstraction of your data, which facilitates traceable, reproducible model development
+through parameterized data access and metadata version control. 
+
+Hyper-Datasets is a data management system specifically tailored for handling unstructured data, like text, audio, or 
+visual data. You can create, manage, and version your datasets. Datasets can be set up to inherit from other datasets, so
+data lineages can be created, and users can track when and how their data changes. In the ClearML Enterprise's WebApp, 
+you can view a dataset's version history, as well as its contents, including annotations, metadata, masks, and other 
+information.
+
+![Frame viewer](img/hyperdatasets/web-app/dataset_example_frame_editor.png)
+
+The basic premise of Hyper-Datasets is that a user-formed query is a full representation of the dataset used by the ML/DL 
+process. Hyper-Datasets decouple metadata from raw data files, allowing you to manipulate metadata through sophisticated
+queries and parameters that can be tracked through the experiment manager. You can clone experiments using different 
+data manipulations--or **DataViews**--without changing any of the hard coded values, making these manipulations part of 
+the experiment. 
+
+ClearML **Enterprise**'s Hyper-Datasets supports rapid prototyping, creating new opportunities such as: 
+* Hyperparameter optimization of the data itself
+* QA/QC pipelining
+* CD/CT (continuous training) during deployment
+* Enabling complex applications like collaborative (federated) learning. 
+
+
+For more information, see [Hyper-Datasets](hyperdatasets/overview.md).
+
diff --git a/sidebars.js b/sidebars.js
index 66f31027..e2d2aa93 100644
--- a/sidebars.js
+++ b/sidebars.js
@@ -54,7 +54,17 @@ module.exports = {
                 ]
         },
         {'ClearML Data': ['clearml_data/clearml_data', 'clearml_data/clearml_data_cli', 'clearml_data/clearml_data_sdk', 'clearml_data/best_practices',
-                {'Workflows': ['clearml_data/data_management_examples/workflows', 'clearml_data/data_management_examples/data_man_simple', 'clearml_data/data_management_examples/data_man_folder_sync', 'clearml_data/data_management_examples/data_man_cifar_classification', 'clearml_data/data_management_examples/data_man_python']},]},
+                {'Workflows': [
+                    'clearml_data/data_management_examples/workflows',
+                    'clearml_data/data_management_examples/data_man_simple',
+                    'clearml_data/data_management_examples/data_man_folder_sync',
+                    'clearml_data/data_management_examples/data_man_cifar_classification',
+                    'clearml_data/data_management_examples/data_man_python'
+                    ]
+                },
+            ]
+        },
+        'hyper_datasets',
         'model_registry',
         'apps/clearml_session',
         {'ClearML Serving':['clearml_serving/clearml_serving', 'clearml_serving/clearml_serving_setup', 'clearml_serving/clearml_serving_cli', 'clearml_serving/clearml_serving_tutorial']},