From 038b95012eade557963c1610210c54db7520c2d9 Mon Sep 17 00:00:00 2001
From: pollfly <75068813+pollfly@users.noreply.github.com>
Date: Wed, 14 Sep 2022 12:30:19 +0300
Subject: [PATCH] Add clearml-data dataset version info (#328)

---
 docs/clearml_data/best_practices.md   | 2 +-
 docs/clearml_data/clearml_data_cli.md | 4 ++--
 docs/clearml_data/clearml_data_sdk.md | 6 ++++++
 3 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/docs/clearml_data/best_practices.md b/docs/clearml_data/best_practices.md
index fade03f4..ff7af939 100644
--- a/docs/clearml_data/best_practices.md
+++ b/docs/clearml_data/best_practices.md
@@ -47,5 +47,5 @@ serves as a dataset's single point of truth, you can schedule a script which use
 will update the dataset based on the modifications made to the folder. This way, there is no need to manually modify a dataset. 
 This functionality will also track the modifications made to a folder.
 
-See the sync function with the [CLI](clearml_data_cli.md#syncing-local-storage) or [SDK](clearml_data_sdk.md#syncing-local-storage)
+See the sync function with the [CLI](clearml_data_cli.md#sync) or [SDK](clearml_data_sdk.md#syncing-local-storage)
 interface. 
diff --git a/docs/clearml_data/clearml_data_cli.md b/docs/clearml_data/clearml_data_cli.md
index 27ad9437..1991d0fa 100644
--- a/docs/clearml_data/clearml_data_cli.md
+++ b/docs/clearml_data/clearml_data_cli.md
@@ -32,7 +32,7 @@ clearml-data create [-h] [--parents [PARENTS [PARENTS ...]]] [--project PROJECT]
 |---|---|---|
 |`--name` |Dataset's name| <img src="/docs/latest/icons/ico-optional-no.svg" alt="No" className="icon size-md center-md" /> |
 |`--project`|Dataset's project| <img src="/docs/latest/icons/ico-optional-no.svg" alt="No" className="icon size-md center-md" /> |
-|`--version` |Dataset version. If not specified a version will automatically be assigned | <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" /> |
+|`--version` |Dataset version. Use the [semantic versioning](https://semver.org) scheme. If not specified a version will automatically be assigned | <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" /> |
 |`--parents`|IDs of the dataset's parents. The dataset inherits all of its parents' content. Multiple parents can be entered, but they are merged in the order they were entered| <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" /> |
 |`--output-uri`| Sets where dataset and its previews are uploaded to| <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
 |`--tags` |Dataset user tags. The dataset can be labeled, which can be useful for organizing datasets| <img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
@@ -183,7 +183,7 @@ clearml-data sync [-h] [--id ID] [--dataset-folder DATASET_FOLDER] --folder FOLD
 |`--parents`|IDs of the dataset's parents (i.e. merge all parents). All modifications made to the folder since the parents were synced will be reflected in the dataset|<img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
 |`--project`|If creating a new dataset, specify the dataset's project name|<img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
 |`--name`|If creating a new dataset, specify the dataset's name|<img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
-|`--version`|Specify the dataset’s version. Default: `1.0.0`|<img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
+|`--version`|Specify the dataset’s version using the [semantic versioning](https://semver.org) scheme. Default: `1.0.0`|<img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
 |`--tags`|Dataset user tags|<img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
 |`--skip-close`|Do not auto close dataset after syncing folders|<img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
 |`--chunk-size`| Set dataset artifact upload chunk size in MB. Default 512, (pass -1 for a single chunk). Example: 512, dataset will be split and uploaded in 512 MB chunks. |<img src="/docs/latest/icons/ico-optional-yes.svg" alt="Yes" className="icon size-md center-md" />|
diff --git a/docs/clearml_data/clearml_data_sdk.md b/docs/clearml_data/clearml_data_sdk.md
index 49b29cb1..c01a1bd6 100644
--- a/docs/clearml_data/clearml_data_sdk.md
+++ b/docs/clearml_data/clearml_data_sdk.md
@@ -52,6 +52,12 @@ For datasets created with `clearml` v1.6 or newer on ClearML Server v1.6 or newe
 For datasets created with earlier versions of `clearml`, or if using an earlier version of ClearML Server, find the ID in the task header of the [dataset task's info panel](../webapp/webapp_exp_track_visual.md).  
 :::
 
+:::info Dataset Version
+Input the dataset's version using the [semantic versioning](https://semver.org) scheme (e.g. `1.0.1`, `2.0`). If a version 
+is not input, the method tries finding the latest dataset version with the specified `dataset_name` and `dataset_project` 
+and auto-increments the version number. 
+:::
+
 Use the `output_uri` parameter to specify a network storage target to upload the dataset files, and associated information 
 (such as previews) to (e.g. `s3://bucket/data`, `gs://bucket/data`, `azure://bucket/data`, `file:///mnt/share/data`). 
 By default, the dataset uploads to ClearML's file server. The `output_uri` parameter of the [`Dataset.upload`](#uploading-files)