mirror of
https://github.com/clearml/clearml
synced 2025-06-26 18:16:07 +00:00
Add hyperdataset examples (#823)
* Add hyperdataset examples Co-authored-by: Erez Schnaider <erez@clear.ml>
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
from allegroai import Task, DataView
|
||||
|
||||
|
||||
task = Task.init(project_name="examples", task_name="dataview example with masks")
|
||||
|
||||
# simple query
|
||||
dataview = DataView(iteration_order='random')
|
||||
dataview.set_iteration_parameters(random_seed=123)
|
||||
|
||||
dataview.add_query(dataset_name='sample-dataset-masks', version_name='Current')
|
||||
|
||||
# print the number of frames the queries return
|
||||
print("count", dataview.get_count())
|
||||
|
||||
# generate a list of FrameGroups from the query
|
||||
# Note that the metadata is cached locally, it means the next time we call to_list() it will return faster.
|
||||
list_frame_groups = dataview.to_list()
|
||||
|
||||
# A FrameGroup is a dictionary of SingleFrames - you can access each object with the key it was register with ("000002")
|
||||
print([frame_group["000002"].get_local_source() for frame_group in list_frame_groups])
|
||||
|
||||
print("now in iterator form")
|
||||
|
||||
# iterator version of the same code, notice this time metadata is not locally cached
|
||||
for frame_group in dataview:
|
||||
for key in frame_group.keys():
|
||||
print(frame_group[key].get_local_source(), frame_group[key].get_local_mask_source())
|
||||
|
||||
print("done")
|
||||
@@ -0,0 +1,38 @@
|
||||
"""
|
||||
How to access and go over data
|
||||
The general flow:
|
||||
- Create new dataview.
|
||||
- Query your dataview.
|
||||
- Two ways to go over the frames:
|
||||
- dataview.get_iterator()
|
||||
- dataview.to_list()
|
||||
"""
|
||||
from allegroai import Task, DataView
|
||||
|
||||
|
||||
task = Task.init(project_name="examples", task_name="dataview example")
|
||||
|
||||
# simple query
|
||||
dataview = DataView(iteration_order='random')
|
||||
dataview.set_iteration_parameters(random_seed=123)
|
||||
|
||||
# We can query our dataset(s) with `add_query` function, for all the data use roi_query="*" or
|
||||
# use only dataset and version.
|
||||
# This is a general example, you can change the parameters of the `add_query` function
|
||||
dataview.add_query(dataset_name='sample-dataset', version_name='Current', roi_query=["aeroplane"])
|
||||
|
||||
# print the number of frames the queries return
|
||||
print("count", dataview.get_count())
|
||||
|
||||
# generate a list of FrameGroups from the query
|
||||
# Note that the metadata is cached locally, it means the next time we call to_list() it will return faster.
|
||||
list_single_frames = dataview.to_list()
|
||||
print([f.get_local_source() for f in list_single_frames])
|
||||
|
||||
print("now in iterator form")
|
||||
|
||||
# iterator version of the same code, notice this time metadata is not locally cached
|
||||
for f in dataview:
|
||||
print(f.get_local_source())
|
||||
|
||||
print("done")
|
||||
@@ -0,0 +1,51 @@
|
||||
import numpy as np
|
||||
import torch.utils.data
|
||||
from allegroai import DataView, SingleFrame, Task
|
||||
from PIL import Image
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
||||
class ExampleDataset(torch.utils.data.Dataset):
|
||||
def __init__(self, dv):
|
||||
# automatically adjust dataset to balance all queries
|
||||
self.frames = dv.to_list()
|
||||
|
||||
def __getitem__(self, idx):
|
||||
frame = self.frames[idx] # type: SingleFrame
|
||||
img_path = frame.get_local_source()
|
||||
img = Image.open(img_path).convert("RGB").resize((256, 256))
|
||||
|
||||
return np.array(img)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.frames)
|
||||
|
||||
|
||||
task = Task.init(project_name='examples', task_name='PyTorch Sample Dataset')
|
||||
|
||||
# Create DataView with example query
|
||||
dataview = DataView()
|
||||
dataview.add_query(dataset_name='sample-dataset', version_name='Current')
|
||||
|
||||
# if we want all files to be downloaded in the background, we can call prefetch
|
||||
# dataview.prefetch_files()
|
||||
|
||||
# create PyTorch Dataset
|
||||
dataset = ExampleDataset(dataview)
|
||||
|
||||
# do your thing here :)
|
||||
print('Fake PyTorch stuff below:')
|
||||
print('Dataset length', len(dataset))
|
||||
|
||||
torch.manual_seed(0)
|
||||
data_loader = DataLoader(
|
||||
dataset,
|
||||
batch_size=2,
|
||||
num_workers=1,
|
||||
pin_memory=True,
|
||||
prefetch_factor=2,
|
||||
)
|
||||
for i, data in enumerate(data_loader):
|
||||
print('{}] {}'.format(i, data))
|
||||
|
||||
print('done')
|
||||
@@ -0,0 +1,55 @@
|
||||
import numpy as np
|
||||
import torch.utils.data
|
||||
from allegroai import DataView, FrameGroup, Task
|
||||
from PIL import Image
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
||||
class ExampleDataset(torch.utils.data.Dataset):
|
||||
def __init__(self, dv):
|
||||
# automatically adjust dataset to balance all queries
|
||||
self.frames = dv.to_list()
|
||||
|
||||
def __getitem__(self, idx):
|
||||
frame_group = self.frames[idx] # type: FrameGroup
|
||||
img_path = frame_group["000002"].get_local_source()
|
||||
img = Image.open(img_path).convert("RGB").resize((256, 256))
|
||||
|
||||
mask_path = frame_group["000002"].get_local_mask_source()
|
||||
mask = Image.open(mask_path).resize((256, 256))
|
||||
|
||||
return np.array(img), np.array(mask),
|
||||
|
||||
def __len__(self):
|
||||
return len(self.frames)
|
||||
|
||||
|
||||
task = Task.init(project_name='examples', task_name='PyTorch Sample Dataset with Masks')
|
||||
|
||||
# Create DataView with example query
|
||||
dataview = DataView()
|
||||
dataview.add_query(dataset_name='sample-dataset-masks', version_name='Current')
|
||||
# dataview.add_query(dataset_name='sample-dataset', version_name='Current', roi_query=["aeroplane"])
|
||||
|
||||
# if we want all files to be downloaded in the background, we can call prefetch
|
||||
# dataview.prefetch_files()
|
||||
|
||||
# create PyTorch Dataset
|
||||
dataset = ExampleDataset(dataview)
|
||||
|
||||
# do your thing here :)
|
||||
print('Fake PyTorch stuff below:')
|
||||
print('Dataset length', len(dataset))
|
||||
|
||||
torch.manual_seed(0)
|
||||
data_loader = DataLoader(
|
||||
dataset,
|
||||
batch_size=2,
|
||||
num_workers=1,
|
||||
pin_memory=True,
|
||||
prefetch_factor=2,
|
||||
)
|
||||
for i, data in enumerate(data_loader):
|
||||
print('{}] {}'.format(i, data))
|
||||
|
||||
print('done')
|
||||
Reference in New Issue
Block a user