from pathlib2 import Path from clearml import Dataset, StorageManager def main(): manager = StorageManager() print("STEP1 : Downloading mnist dataset") mnist_dataset = Path(manager.get_local_copy( remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")) mnist_dataset_train = mnist_dataset / "TRAIN" mnist_dataset_test = mnist_dataset / "TEST" print("STEP2 : Creating the training dataset") train_dataset = Dataset.create( dataset_project="dataset_examples/MNIST", dataset_name="MNIST Training Dataset") train_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN") train_dataset.upload() train_dataset.finalize() print("STEP3 : Creating the testing dataset") test_dataset = Dataset.create( dataset_project="dataset_examples/MNIST", dataset_name="MNIST Testing Dataset") test_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST") test_dataset.upload() test_dataset.finalize() print("STEP4 : Create a child dataset with both mnist train and test data") child_dataset = Dataset.create( dataset_project="dataset_examples/MNIST", dataset_name="MNIST Complete Dataset", parent_datasets=[train_dataset.id, test_dataset.id]) child_dataset.upload() child_dataset.finalize() print("We are done, have a great day :)") if __name__ == "__main__": main()