From d2b7135074c9712f3110c6305c81a1673b7400f8 Mon Sep 17 00:00:00 2001 From: pollfly <75068813+pollfly@users.noreply.github.com> Date: Sun, 20 Nov 2022 10:24:44 +0200 Subject: [PATCH] Update docs (#373) --- docs/hyperdatasets/dataset.md | 40 +++++++- docs/hyperdatasets/dataviews.md | 138 +++++++++++++++++++--------- docs/hyperdatasets/frame_groups.md | 6 +- docs/hyperdatasets/single_frames.md | 10 +- 4 files changed, 147 insertions(+), 47 deletions(-) diff --git a/docs/hyperdatasets/dataset.md b/docs/hyperdatasets/dataset.md index df29a16f..a286bdfd 100644 --- a/docs/hyperdatasets/dataset.md +++ b/docs/hyperdatasets/dataset.md @@ -43,6 +43,18 @@ from allegroai import DatasetVersion myDataset = DatasetVersion.create_new_dataset(dataset_name='myDataset Two') ``` +When creating a dataset, you can put it into a project. In this case, the dataset will adhere to the [access rules](../webapp/webapp_profile.md#access-rules) +specified for its containing project. Use `dataset_project` parameter in `Dataset.create` or `DatasetVersion.create_new_dataset` +to specify a project name. + +```python +myDataset_1 = Dataset.create(dataset_name="myDataset", dataset_project="myDataset Project") + +myDataset_2 = DatasetVersion.create_new_dataset( + dataset_name="myDataset_2", dataset_project="myDatasetProject_2" +) +``` + To raise a `ValueError` exception if the Dataset exists, specify the `raise_if_exists` parameters as `True`. * With `Dataset.create` @@ -102,7 +114,33 @@ Delete a Dataset even if it contains versions whose status is *Published*. ```python Dataset.delete(dataset_name='MyDataset', delete_all_versions=True, force=True) ``` - + +Delete a Dataset and the sources associated with its deleted frames: + +```python +Dataset.delete( + dataset_name='MyDataset', delete_all_versions=True, force=True, delete_sources=True +) +``` + +This supports deleting sources located in AWS S3, GCP, and Azure Storage (not local storage). The `delete_sources` +parameter is ignored if `delete_all_versions` is `False`. You can view the deletion process’ progress by passing +`show_progress=True` (`tqdm` required). + +### Tagging Datasets + +Tags can be added to datasets, allowing to easily identify and group experiments. + +Add tags to a dataset: +```python +MyDataset.add_tags(["coco", "dogs"]) +``` + +Remove tags from a dataset: + +```python +MyDataset.remove_tags(["dogs"]) +``` ## Dataset Versioning diff --git a/docs/hyperdatasets/dataviews.md b/docs/hyperdatasets/dataviews.md index ff8bb291..9b4f4004 100644 --- a/docs/hyperdatasets/dataviews.md +++ b/docs/hyperdatasets/dataviews.md @@ -363,49 +363,6 @@ myDataView.add_query( ) ``` -### Mapping ROI Labels - -ROI label translation (label mapping) enables combining labels for training, combining disparate datasets, and hiding -certain labels for training. - -This example demonstrates consolidating two disparate Datasets. Two Dataset versions use `car` (lower case "c"), but the -third uses `Car` (upper case "C"). -The example maps `Car` (upper case "C") to `car` (lower case "c"). - -```python -# Create a Dataview object for an iterator that randomly returns frames according to queries -myDataView = DataView(iteration_order=IterationOrder.random, iteration_infinite=True) - -# The 1st Dataset (version) - "car" with lowercase "c" -myDataView.add_query( - dataset_name='myDataset', - version_name='myVersion', - roi_query='car' -) - -# The 2nd Dataset (version) - "car" with lowercase "c" -myDataView.add_query( - dataset_name='dataset_2', - version_name='aVersion', - roi_query='car' -) - -# A 3rd Dataset (version) - "Car" with uppercase "C" -myDataView.add_query( - dataset_name='dataset_3', - version_name='training', - roi_query='Car' -) - -# Use a mapping rule to translate "Car" (uppercase) to "car" (lowercase) -myDataView.add_mapping_rule( - dataset_name='dataset_3', - version_name='training', - from_labels=['Car'], - to_label='car' -) -``` - ### Setting Label Enumeration Values Set label enumeration values to maintain data conformity across multiple codebases and datasets. @@ -459,3 +416,98 @@ myDataView.set_labels( {"cat": 1, "dog": 2, "bird": 3, "sheep": 4, "cow": 5, "ignore": -1,} ) ``` + +### Mapping ROI Labels + +ROI label translation (label mapping) enables combining labels for training, combining disparate datasets, and hiding +certain labels for training. + +This example demonstrates consolidating two disparate Datasets. Two Dataset versions use `car` (lower case "c"), but the +third uses `Car` (upper case "C"). +The example maps `Car` (upper case "C") to `car` (lower case "c"). + +```python +# Create a Dataview object for an iterator that randomly returns frames according to queries +myDataView = DataView(iteration_order=IterationOrder.random, iteration_infinite=True) + +# The 1st Dataset (version) - "car" with lowercase "c" +myDataView.add_query( + dataset_name='myDataset', + version_name='myVersion', + roi_query='car' +) + +# The 2nd Dataset (version) - "car" with lowercase "c" +myDataView.add_query( + dataset_name='dataset_2', + version_name='aVersion', + roi_query='car' +) + +# A 3rd Dataset (version) - "Car" with uppercase "C" +myDataView.add_query( + dataset_name='dataset_3', + version_name='training', + roi_query='Car' +) + +# Use a mapping rule to translate "Car" (uppercase) to "car" (lowercase) +myDataView.add_mapping_rule( + dataset_name='dataset_3', + version_name='training', + from_labels=['Car'], + to_label='car' +) +``` + +### Accessing Frames + +Dataview objects can be retrieved by the Dataview ID or name using the [DataView.get](../references/hyperdataset/dataview.md#dataviewget) +class method. + +```python +my_dataview = DataView.get(dataview_id='12344kg2p3hf8') +``` + +Access the Dataview's frames as a python list, dictionary, or through a pythonic iterator. + +The `DataView.to_dict` method returns a list of dictionaries, where each dictionary represents a frame. Use the +`projection` parameter to specify a subset of the frame fields to be included in the result. Input a list of strings, +where each string represents a frame field or subfield (using dot-separated notation). + +For example, the code below specifies that the frame dictionaries should include only the `id` and `sources` fields and +the `dataset.id` subfield: + +```python +my_dataview = DataView.get(dataview_id='') +my_dataview.to_dict(projection=['id', 'dataset.id', 'sources']) +``` + +The method returns a list of dictionaries that looks something like this: + +```json +[ + { + "id": "", + "dataset": { + "id": "" + }, + "sources": [ + { + "id": "", + "uri": "", + "timestamp": , + "preview": { + "uri": "", + "timestamp": + } + } + ] + }, + # additional dictionaries with the same format here +] +``` + +Since the `to_list`/`to_dict` methods return all the frames in the dataview, it is recommended to use the [`DataView.get_iterator`](../references/hyperdataset/dataview.md#get_iterator) +method, which returns an iterator of the dataview. You can also specify the desired frame fields in this method using +the `projection` parameter, just like in the `DataView.to_dict` method, as described above. diff --git a/docs/hyperdatasets/frame_groups.md b/docs/hyperdatasets/frame_groups.md index a0e76826..c6cb1636 100644 --- a/docs/hyperdatasets/frame_groups.md +++ b/docs/hyperdatasets/frame_groups.md @@ -50,7 +50,11 @@ To add FrameGroups to a Dataset Version: 1. Append the FrameGroup object to a list of frames -1. Add that list to a DatasetVersion. +1. Add that list to a DatasetVersion using the [`DatasetVersion.add_frames`](../references/hyperdataset/hyperdatasetversion.md#add_frames) +method. Use the `upload_retries` parameter to set the number of times the upload of a frame should be retried in case of +failure, before marking the frame as failed and continuing to upload the next frames. In the case that a single frame in +the FrameGroup fails to upload, the entire group will not be registered. The method returns a list of frames that were +not successfully registered or uploaded. ```python # Create a FrameGroup object diff --git a/docs/hyperdatasets/single_frames.md b/docs/hyperdatasets/single_frames.md index 93127f76..03d835cc 100644 --- a/docs/hyperdatasets/single_frames.md +++ b/docs/hyperdatasets/single_frames.md @@ -222,8 +222,14 @@ For more information, see the `SingleFrame` class description. ### Adding SingleFrames to a Dataset Version -Use the `DatasetVersion.add_frames` method to add SingleFrames to a [Dataset version](dataset.md#dataset-versioning) -(see [Creating snapshots](dataset.md#creating-snapshots) or [Creating child versions](dataset.md#creating-child-versions)). +Use the [`DatasetVersion.add_frames`](../references/hyperdataset/hyperdatasetversion.md#add_frames) method to add +SingleFrames to a [Dataset version](dataset.md#dataset-versioning) (see [Creating snapshots](dataset.md#creating-snapshots) +or [Creating child versions](dataset.md#creating-child-versions)). Frames that are already a part of the dataset version +will only be updated. + +Use the `upload_retries` parameter to set the number of times the upload of a frame should be retried in case of failure, +before marking the frame as failed and continuing to upload the next frames. The method returns a list of frames that +were not successfully registered or uploaded. ```python from allegroai import DatasetVersion, SingleFrame