clearml/examples/hyperdatasets/data-registration/register_dataset_with_roi.py
2023-01-24 12:30:55 +02:00

149 lines
5.3 KiB
Python

"""
How to register data with ROIs and metadata from a json file.
Create a list of ROI's for each image in the metadata format required by a frame.
Notice: This is a custom parser for a specific dataset. Each dataset requires a different parser.
You can run this example from this dir with:
python register_dataset_with_roi.py
--ext jpg --ds_name my_uploaded_dataset --version_name my_version
"""
import glob
import json
import os
from allegroai import DatasetVersion, SingleFrame, Task
from argparse import ArgumentParser
from clearml import StorageManager
def get_json_file(filename):
"""
Get the data from the json file
:param filename: Full file path
:type filename: str
:return: json data parse as python dictionary
"""
json_file_path = filename.replace('.jpg', '.json')
json_file = open(json_file_path, "r")
data = json.load(json_file)
json_file.close()
return data
def get_frames_with_roi_meta(data_path, ext):
"""
Create a ready to register list of SingleFrame(s)
:param data_path: Path to the folder you like to register
:type data_path: str
:param ext: Files extension to upload from the dir
:type ext: str
:return: List[SingleFrame] list contains all the SingleFrame that should be register
"""
frames_to_reg = []
# Go over each jpg file in base path
for file in glob.glob(os.path.join(data_path, "*.{}".format(ext))):
full_path = os.path.abspath(file)
print("Getting files from: " + full_path)
# read the json file next to the image
data = get_json_file(full_path)
# Create the SingleFrame object
a_frame = SingleFrame(source=full_path)
# Iterating over rois in the json, and add them
for roi in data['rois']:
a_frame.add_annotation(
poly2d_xy=roi["poly"],
labels=roi['labels'],
metadata={'alive': roi['meta']['alive']},
confidence=roi['confidence']
)
# add generic meta-data to the frame
a_frame.width = data['size']['x']
a_frame.height = data['size']['y']
a_frame.metadata['dangerous'] = data['meta']['dangerous']
# add to our SingleFrame Collection
frames_to_reg.append(a_frame)
return frames_to_reg
def create_version_with_frames(new_frames, ds_name, ver_name, local_dataset_path):
"""
Create a DatasetVersion with new_frames as frames
:param new_frames: list with all the frames to be registered
:type new_frames: List[SingleFrame]
:param ds_name: The dataset name
:type ds_name: str
:param ver_name: The version name
:type ver_name: str
:param local_dataset_path: Path to the folder you register
:param local_dataset_path: str
"""
# Get the dataset (it will create a new one if we don't have it)
ds = DatasetVersion.create_new_dataset(dataset_name=ds_name)
# create a specific dataset version, or just use the latest version
dv = ds.create_version(version_name=ver_name) if ver_name else \
DatasetVersion.get_current(dataset_name=ds_name)
# Add and upload frames to created version
dv.add_frames(
new_frames,
# where to upload the files, we will use for example the default one, you can also s3://bucket/ etc
auto_upload_destination=Task.current_task().get_output_destination(),
# The local root, this will make sure we keep the same
# files structure in the upload destination as we have on the local machine
local_dataset_root_path=local_dataset_path
)
dv.commit_version()
if __name__ == '__main__':
parser = ArgumentParser(description='Register allegro dataset with rois and meta')
parser.add_argument(
'--ext', type=str, help='Files extension to upload from the dir. Default: "jpg"',
default="jpg")
parser.add_argument(
'--ds_name', type=str, help='Dataset name for the data. Default: "sample-dataset"',
default="sample-dataset")
parser.add_argument(
'--version_name', type=str, help='Version name for the data (default is current version)',
default="initial")
args = parser.parse_args()
# this folder contains the images and json files for the data
example_dataset_path = 's3://clearml-public/datasets/hyperdataset_example/ds_with_rois'
local_img_path = StorageManager.download_folder(example_dataset_path)
# this folder contains the images and json files for the data
base_path = os.path.abspath('{}/datasets/hyperdataset_example/ds_with_rois'.format(local_img_path))
dataset_name = args.ds_name
version_name = args.version_name
task = Task.init(
project_name="uploading_datasets", task_name="upload_sample",
task_type=Task.TaskTypes.data_processing,
# This will make sure we have a valid output destination for our local files to be uploaded to. This support
# also other storage types:
# - A shared folder: ``/mnt/share/folder``
# - S3: ``s3://bucket/folder``
# - Google Cloud Storage: ``gs://bucket-name/folder``
# - Azure Storage: ``azure://company.blob.core.windows.net/folder/``
output_uri=True
)
frames = get_frames_with_roi_meta(base_path, args.ext)
create_version_with_frames(frames, dataset_name, version_name, base_path)
print("We are done :)")